Runfiles working (for now) + tests

This commit is contained in:
Stuart Axelbrooke 2025-04-18 11:01:11 -07:00
parent 68608e412f
commit daba2e2b12
No known key found for this signature in database
GPG key ID: 1B0A848C29D46A35
18 changed files with 208 additions and 74 deletions

View file

@ -1,6 +1,8 @@
build --java_runtime_version=21
build --tool_java_runtime_version=21
test --test_output=errors
# Default to quiet mode for run commands
run --ui_event_filters=-info,-stdout,-stderr
run --noshow_progress

View file

@ -8,6 +8,7 @@ databuild_graph(
":sum_job",
],
plan = ":basic_graph_plan",
visibility = ["//visibility:public"],
)
py_binary(
@ -20,12 +21,13 @@ databuild_job(
name = "generate_number_job",
configure = ":generate_number_configure",
execute = ":generate_number_execute",
visibility = ["//visibility:public"],
)
java_binary(
name = "generate_number_configure",
srcs = glob(["*.java"]),
data = ["@databuild//:json_schema"],
create_executable = True,
main_class = "com.databuild.examples.basic_graph.GenerateConfigure",
deps = [
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
@ -45,12 +47,12 @@ databuild_job(
name = "sum_job",
configure = ":sum_configure",
execute = ":sum_execute",
visibility = ["//visibility:public"],
)
java_binary(
name = "sum_configure",
srcs = glob(["*.java"]),
data = ["@databuild//:json_schema"],
main_class = "com.databuild.examples.basic_graph.SumConfigure",
deps = [
"@maven//:com_fasterxml_jackson_core_jackson_annotations",

View file

@ -1,11 +1,7 @@
package com.databuild.examples.basic_graph;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import java.io.File;
import java.util.Arrays;
@ -32,12 +28,6 @@ public class GenerateConfigure {
try {
ObjectMapper mapper = new ObjectMapper();
// Load the schema
JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
// Create JSON Schema validator
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);

View file

@ -1,11 +1,7 @@
package com.databuild.examples.basic_graph;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import java.io.File;
import java.util.Arrays;
@ -29,16 +25,12 @@ public class SumConfigure {
JobConfig config = new JobConfig();
config.outputs = Collections.singletonList(partitionRef);
config.args = Arrays.asList(upstreams);
// Create a hashmap for env with {"OUTPUT_REF": "foo"}
config.env = Collections.singletonMap("OUTPUT_REF", args[0]);
// inputs and env are already initialized as empty collections in the constructor
try {
ObjectMapper mapper = new ObjectMapper();
// Load the schema
JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
// Create JSON Schema validator
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);

View file

@ -9,6 +9,9 @@ public class SumExecute {
System.exit(1);
}
// Get output ref from env var OUTPUT_REF
String outputRef = System.getenv("OUTPUT_REF");
// For each arg, load it from the file system and add it to the sum
int sum = 0;
for (String partitionRef : args) {
@ -24,10 +27,12 @@ public class SumExecute {
}
System.out.println("Sum of " + args.length + " partitions: " + sum);
// Write the sum to the output file
try (java.io.FileWriter writer = new java.io.FileWriter(BASE_PATH + args[0])) {
writer.write(sum);
String outPath = BASE_PATH + outputRef;
System.out.println("Writing sum " + sum + " to " + outPath);
try (java.io.FileWriter writer = new java.io.FileWriter(outPath)) {
writer.write(String.valueOf(sum));
} catch (Exception e) {
System.err.println("Error: Failed to write sum to " + args[0] + ": " + e.getMessage());
System.err.println("Error: Failed to write sum to " + outputRef + ": " + e.getMessage());
}
}
}

View file

@ -0,0 +1,17 @@
sh_test(
name = "generate_number_test",
srcs = ["generate_number_test.sh"],
data = [
"//:generate_number_job.cfg",
"//:generate_number_job.exec",
],
)
sh_test(
name = "sum_test",
srcs = ["sum_test.sh"],
data = [
"//:sum_job.cfg",
"//:sum_job.exec",
],
)

View file

@ -0,0 +1,14 @@
#!/bin/bash
# Test configure
generate_number_job.cfg pippin salem sadie
# Test run
generate_number_job.cfg pippin | generate_number_job.exec
# Validate that contents of pippin is 43
if [[ "$(cat /tmp/databuild/examples/basic_graph/pippin)" != "43" ]]; then
echo "Assertion failed: File does not contain 43"
cat /tmp/databuild/examples/basic_graph/pippin
exit 1
fi

View file

@ -0,0 +1,17 @@
#!/bin/bash
# Test configure
sum_job.cfg pippin_salem_sadie
# Test run
echo -n 43 > /tmp/databuild/examples/basic_graph/pippin
echo -n 56 > /tmp/databuild/examples/basic_graph/salem
echo -n 40 > /tmp/databuild/examples/basic_graph/sadie
sum_job.cfg pippin_salem_sadie | sum_job.exec
# Validate that contents of pippin is 43
if [[ "$(cat /tmp/databuild/examples/basic_graph/pippin_salem_sadie)" != "139" ]]; then
echo "Assertion failed: File does not contain 139"
cat /tmp/databuild/examples/basic_graph/pippin_salem_sadie
exit 1
fi

View file

@ -4,6 +4,7 @@ databuild_job(
name = "test_job",
configure = ":test_job_configure",
execute = ":test_job_execute",
visibility = ["//visibility:public"],
)
sh_binary(

View file

@ -121,7 +121,8 @@
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
"https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95",
"https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",

View file

@ -0,0 +1,8 @@
sh_test(
name = "test",
srcs = ["test.sh"],
data = [
"//:test_job.cfg",
"//:test_job.exec",
],
)

View file

@ -0,0 +1,5 @@
#!/usr/bin/env bash
test_job.cfg nice
test_job.cfg cool | test_job.exec

4
job/BUILD.bazel Normal file
View file

@ -0,0 +1,4 @@
exports_files([
"configure_wrapper.sh.tpl",
"execute_wrapper.sh.tpl",
])

38
job/configure_wrapper.sh.tpl Executable file
View file

@ -0,0 +1,38 @@
#!/bin/bash
set -e
# --- begin runfiles.bash initialization v3 ---
# Copy-pasted from the Bazel Bash runfiles library v3.
set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
source "$0.runfiles/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
# --- end runfiles.bash initialization v3 ---
CONFIGURE_BINARY="%{CONFIGURE_PATH}"
# Set up JAVA_RUNFILES if not already set
if [[ -z "${JAVA_RUNFILES:-}" ]]; then
if [[ -d "$0.runfiles" ]]; then
export JAVA_RUNFILES="$0.runfiles"
elif [[ -f "${RUNFILES_MANIFEST_FILE:-}" ]]; then
export JAVA_RUNFILES="$(dirname "${RUNFILES_MANIFEST_FILE}")"
fi
fi
if [[ ! -x "$CONFIGURE_BINARY" ]]; then
CONFIGURE_BASENAME=$(basename "$CONFIGURE_BINARY")
if [[ ! -x "$CONFIGURE_BASENAME" ]]; then
echo "Error: CONFIGURE binary not found or not executable at $CONFIGURE_BINARY and $CONFIGURE_BASENAME" >&2
exit 1
else
CONFIGURE_BINARY=./$CONFIGURE_BASENAME
fi
fi
# Run the configuration
exec "${CONFIGURE_BINARY}" "$@"

120
rules.bzl
View file

@ -16,26 +16,70 @@ def databuild_job(
**kwargs: Additional attributes to pass to the underlying rule
"""
# Create the configuration entry point
native.sh_binary(
_databuild_job_cfg_rule(
name = name + ".cfg",
srcs = ["@databuild//runtime:configure_wrapper.sh"],
args = ["$(location %s)" % configure],
data = [configure] + deps,
configure = configure,
visibility = visibility,
)
# Create the main rule that serves as a provider for other targets
_databuild_job_rule(
name = name,
_databuild_job_exec_rule(
name = name + ".exec",
configure = ":%s.cfg" % name,
execute = execute,
deps = deps,
visibility = visibility,
**kwargs
)
def _databuild_job_target_impl(ctx):
def _databuild_job_cfg_impl(ctx):
configure_file = ctx.executable.configure
configure_path = ctx.attr.configure.files_to_run.executable.path
script = ctx.actions.declare_file(ctx.label.name)
ctx.actions.expand_template(
template = ctx.file._template,
output = script,
substitutions = {
"%{CONFIGURE_PATH}": configure_path,
},
is_executable = True,
)
runfiles = ctx.runfiles(
files = [configure_file],
).merge(ctx.attr.configure.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
_databuild_job_cfg_rule = rule(
implementation = _databuild_job_cfg_impl,
attrs = {
"configure": attr.label(
doc = "Target that implements the configuration logic",
executable = True,
cfg = "exec",
mandatory = True,
),
"_template": attr.label(
default = "@databuild//job:configure_wrapper.sh.tpl",
allow_single_file = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
},
executable = True,
)
def _databuild_job_exec_impl(ctx):
deps_providers = []
for dep in ctx.attr.deps:
if DataBuildJobInfo in dep:
@ -44,15 +88,15 @@ def _databuild_job_target_impl(ctx):
execute_file = ctx.executable.execute
jq_file = ctx.executable._jq
exec_script = ctx.actions.declare_file(ctx.label.name)
script = ctx.actions.declare_file(ctx.label.name)
# Get the correct runfiles paths
jq_path = ctx.attr._jq.files_to_run.executable.path
execute_path = ctx.attr.execute.files_to_run.executable.path
ctx.actions.expand_template(
template = ctx.file._wrapper_template,
output = exec_script,
template = ctx.file._template,
output = script,
substitutions = {
"%{JQ_PATH}": jq_path,
"%{EXECUTE_PATH}": execute_path,
@ -66,12 +110,12 @@ def _databuild_job_target_impl(ctx):
return [
DefaultInfo(
executable = exec_script,
executable = script,
runfiles = runfiles,
),
DataBuildJobInfo(
configure = ctx.attr.configure,
execute = exec_script,
execute = script,
deps = deps_providers,
),
]
@ -86,8 +130,8 @@ DataBuildJobInfo = provider(
},
)
_databuild_job_rule = rule(
implementation = _databuild_job_target_impl,
_databuild_job_exec_rule = rule(
implementation = _databuild_job_exec_impl,
attrs = {
"configure": attr.label(
doc = "Target that implements the configuration logic",
@ -103,8 +147,8 @@ _databuild_job_rule = rule(
doc = "Dependencies (other job targets)",
default = [],
),
"_wrapper_template": attr.label(
default = "@databuild//runtime:execute_wrapper.sh.tpl",
"_template": attr.label(
default = "@databuild//job:execute_wrapper.sh.tpl",
allow_single_file = True,
),
"_jq": attr.label(
@ -116,20 +160,28 @@ _databuild_job_rule = rule(
executable = True,
)
def _graph_impl(name):
pass
def databuild_graph(name, jobs, plan, visibility = None):
"""Creates a databuild graph target."""
databuild_graph = rule(
implementation = _graph_impl,
attrs = {
"jobs": attr.label_list(
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
allow_empty = False,
),
"plan": attr.label(
doc = "The binary that is run to produce a `JobGraph` that builds the requested partition refs",
executable = True,
cfg = "exec",
),
},
)
#def _graph_impl(name):
# """
#
# """
#
# # Lets do this
# pass
#
#databuild_graph = rule(
# implementation = _graph_impl,
# attrs = {
# "jobs": attr.label_list(
# doc = "The list of jobs that are candidates for building partitions in this databuild graph",
# allow_empty = False,
# ),
# "plan": attr.label(
# doc = "The binary that is run to produce a `JobGraph` that builds the requested partition refs",
# executable = True,
# cfg = "exec",
# ),
# },
#)

View file

@ -1,11 +1,6 @@
# In modules/jq/BUILD.bazel
load("@bazel_skylib//lib:selects.bzl", "selects")
exports_files([
"configure_wrapper.sh",
"execute_wrapper.sh.tpl",
])
# Platform detection
config_setting(
name = "darwin",

View file

@ -1,9 +0,0 @@
#!/bin/bash
set -e
# The first argument is the path to the configure binary
CONFIGURE_BINARY="$1"
shift
# Run the configuration
exec "${CONFIGURE_BINARY}" "$@"