From daba2e2b126b2910dededa9a7518cb3938e71efc Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke Date: Fri, 18 Apr 2025 11:01:11 -0700 Subject: [PATCH] Runfiles working (for now) + tests --- examples/basic_graph/.bazelrc | 2 + examples/basic_graph/BUILD.bazel | 6 +- examples/basic_graph/GenerateConfigure.java | 10 -- examples/basic_graph/SumConfigure.java | 12 +- examples/basic_graph/SumExecute.java | 11 +- examples/basic_graph/test/BUILD.bazel | 17 +++ .../basic_graph/test/generate_number_test.sh | 14 ++ examples/basic_graph/test/sum_test.sh | 17 +++ examples/hello_world/BUILD.bazel | 1 + examples/hello_world/MODULE.bazel.lock | 3 +- examples/hello_world/test/BUILD.bazel | 8 ++ examples/hello_world/test/test.sh | 5 + job/BUILD.bazel | 4 + job/configure_wrapper.sh.tpl | 38 ++++++ {runtime => job}/execute_wrapper.sh.tpl | 0 rules.bzl | 120 +++++++++++++----- runtime/BUILD.bazel | 5 - runtime/configure_wrapper.sh | 9 -- 18 files changed, 208 insertions(+), 74 deletions(-) create mode 100644 examples/basic_graph/test/BUILD.bazel create mode 100755 examples/basic_graph/test/generate_number_test.sh create mode 100755 examples/basic_graph/test/sum_test.sh create mode 100644 examples/hello_world/test/BUILD.bazel create mode 100755 examples/hello_world/test/test.sh create mode 100644 job/BUILD.bazel create mode 100755 job/configure_wrapper.sh.tpl rename {runtime => job}/execute_wrapper.sh.tpl (100%) delete mode 100755 runtime/configure_wrapper.sh diff --git a/examples/basic_graph/.bazelrc b/examples/basic_graph/.bazelrc index 874d6a5..8ffb96b 100644 --- a/examples/basic_graph/.bazelrc +++ b/examples/basic_graph/.bazelrc @@ -1,6 +1,8 @@ build --java_runtime_version=21 build --tool_java_runtime_version=21 +test --test_output=errors + # Default to quiet mode for run commands run --ui_event_filters=-info,-stdout,-stderr run --noshow_progress diff --git a/examples/basic_graph/BUILD.bazel b/examples/basic_graph/BUILD.bazel index f08698b..0018345 100644 --- a/examples/basic_graph/BUILD.bazel +++ b/examples/basic_graph/BUILD.bazel @@ -8,6 +8,7 @@ databuild_graph( ":sum_job", ], plan = ":basic_graph_plan", + visibility = ["//visibility:public"], ) py_binary( @@ -20,12 +21,13 @@ databuild_job( name = "generate_number_job", configure = ":generate_number_configure", execute = ":generate_number_execute", + visibility = ["//visibility:public"], ) java_binary( name = "generate_number_configure", srcs = glob(["*.java"]), - data = ["@databuild//:json_schema"], + create_executable = True, main_class = "com.databuild.examples.basic_graph.GenerateConfigure", deps = [ "@maven//:com_fasterxml_jackson_core_jackson_annotations", @@ -45,12 +47,12 @@ databuild_job( name = "sum_job", configure = ":sum_configure", execute = ":sum_execute", + visibility = ["//visibility:public"], ) java_binary( name = "sum_configure", srcs = glob(["*.java"]), - data = ["@databuild//:json_schema"], main_class = "com.databuild.examples.basic_graph.SumConfigure", deps = [ "@maven//:com_fasterxml_jackson_core_jackson_annotations", diff --git a/examples/basic_graph/GenerateConfigure.java b/examples/basic_graph/GenerateConfigure.java index 075c032..520bc04 100644 --- a/examples/basic_graph/GenerateConfigure.java +++ b/examples/basic_graph/GenerateConfigure.java @@ -1,11 +1,7 @@ package com.databuild.examples.basic_graph; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.module.jsonSchema.JsonSchema; -import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator; import java.io.File; import java.util.Arrays; @@ -32,12 +28,6 @@ public class GenerateConfigure { try { ObjectMapper mapper = new ObjectMapper(); - // Load the schema - JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json")); - - // Create JSON Schema validator - JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper); - JsonSchema schema = schemaGen.generateSchema(JobConfig.class); // Convert config to JsonNode and serialize JsonNode configNode = mapper.valueToTree(config); diff --git a/examples/basic_graph/SumConfigure.java b/examples/basic_graph/SumConfigure.java index db1b759..5430a72 100644 --- a/examples/basic_graph/SumConfigure.java +++ b/examples/basic_graph/SumConfigure.java @@ -1,11 +1,7 @@ package com.databuild.examples.basic_graph; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.module.jsonSchema.JsonSchema; -import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator; import java.io.File; import java.util.Arrays; @@ -29,16 +25,12 @@ public class SumConfigure { JobConfig config = new JobConfig(); config.outputs = Collections.singletonList(partitionRef); config.args = Arrays.asList(upstreams); + // Create a hashmap for env with {"OUTPUT_REF": "foo"} + config.env = Collections.singletonMap("OUTPUT_REF", args[0]); // inputs and env are already initialized as empty collections in the constructor try { ObjectMapper mapper = new ObjectMapper(); - // Load the schema - JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json")); - - // Create JSON Schema validator - JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper); - JsonSchema schema = schemaGen.generateSchema(JobConfig.class); // Convert config to JsonNode and serialize JsonNode configNode = mapper.valueToTree(config); diff --git a/examples/basic_graph/SumExecute.java b/examples/basic_graph/SumExecute.java index 6b0d8d3..38b2840 100644 --- a/examples/basic_graph/SumExecute.java +++ b/examples/basic_graph/SumExecute.java @@ -9,6 +9,9 @@ public class SumExecute { System.exit(1); } + // Get output ref from env var OUTPUT_REF + String outputRef = System.getenv("OUTPUT_REF"); + // For each arg, load it from the file system and add it to the sum int sum = 0; for (String partitionRef : args) { @@ -24,10 +27,12 @@ public class SumExecute { } System.out.println("Sum of " + args.length + " partitions: " + sum); // Write the sum to the output file - try (java.io.FileWriter writer = new java.io.FileWriter(BASE_PATH + args[0])) { - writer.write(sum); + String outPath = BASE_PATH + outputRef; + System.out.println("Writing sum " + sum + " to " + outPath); + try (java.io.FileWriter writer = new java.io.FileWriter(outPath)) { + writer.write(String.valueOf(sum)); } catch (Exception e) { - System.err.println("Error: Failed to write sum to " + args[0] + ": " + e.getMessage()); + System.err.println("Error: Failed to write sum to " + outputRef + ": " + e.getMessage()); } } } diff --git a/examples/basic_graph/test/BUILD.bazel b/examples/basic_graph/test/BUILD.bazel new file mode 100644 index 0000000..d30ae8c --- /dev/null +++ b/examples/basic_graph/test/BUILD.bazel @@ -0,0 +1,17 @@ +sh_test( + name = "generate_number_test", + srcs = ["generate_number_test.sh"], + data = [ + "//:generate_number_job.cfg", + "//:generate_number_job.exec", + ], +) + +sh_test( + name = "sum_test", + srcs = ["sum_test.sh"], + data = [ + "//:sum_job.cfg", + "//:sum_job.exec", + ], +) diff --git a/examples/basic_graph/test/generate_number_test.sh b/examples/basic_graph/test/generate_number_test.sh new file mode 100755 index 0000000..a3e3741 --- /dev/null +++ b/examples/basic_graph/test/generate_number_test.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Test configure +generate_number_job.cfg pippin salem sadie + +# Test run +generate_number_job.cfg pippin | generate_number_job.exec +# Validate that contents of pippin is 43 +if [[ "$(cat /tmp/databuild/examples/basic_graph/pippin)" != "43" ]]; then + echo "Assertion failed: File does not contain 43" + cat /tmp/databuild/examples/basic_graph/pippin + exit 1 +fi + diff --git a/examples/basic_graph/test/sum_test.sh b/examples/basic_graph/test/sum_test.sh new file mode 100755 index 0000000..fac7e3e --- /dev/null +++ b/examples/basic_graph/test/sum_test.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Test configure +sum_job.cfg pippin_salem_sadie + +# Test run +echo -n 43 > /tmp/databuild/examples/basic_graph/pippin +echo -n 56 > /tmp/databuild/examples/basic_graph/salem +echo -n 40 > /tmp/databuild/examples/basic_graph/sadie +sum_job.cfg pippin_salem_sadie | sum_job.exec +# Validate that contents of pippin is 43 +if [[ "$(cat /tmp/databuild/examples/basic_graph/pippin_salem_sadie)" != "139" ]]; then + echo "Assertion failed: File does not contain 139" + cat /tmp/databuild/examples/basic_graph/pippin_salem_sadie + exit 1 +fi + diff --git a/examples/hello_world/BUILD.bazel b/examples/hello_world/BUILD.bazel index b932404..474bedb 100644 --- a/examples/hello_world/BUILD.bazel +++ b/examples/hello_world/BUILD.bazel @@ -4,6 +4,7 @@ databuild_job( name = "test_job", configure = ":test_job_configure", execute = ":test_job_execute", + visibility = ["//visibility:public"], ) sh_binary( diff --git a/examples/hello_world/MODULE.bazel.lock b/examples/hello_world/MODULE.bazel.lock index 73f2913..4c1e3c4 100644 --- a/examples/hello_world/MODULE.bazel.lock +++ b/examples/hello_world/MODULE.bazel.lock @@ -121,7 +121,8 @@ "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", "https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320", "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c", - "https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95", + "https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0", + "https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95", "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", diff --git a/examples/hello_world/test/BUILD.bazel b/examples/hello_world/test/BUILD.bazel new file mode 100644 index 0000000..327adc5 --- /dev/null +++ b/examples/hello_world/test/BUILD.bazel @@ -0,0 +1,8 @@ +sh_test( + name = "test", + srcs = ["test.sh"], + data = [ + "//:test_job.cfg", + "//:test_job.exec", + ], +) diff --git a/examples/hello_world/test/test.sh b/examples/hello_world/test/test.sh new file mode 100755 index 0000000..05c6b3f --- /dev/null +++ b/examples/hello_world/test/test.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +test_job.cfg nice + +test_job.cfg cool | test_job.exec diff --git a/job/BUILD.bazel b/job/BUILD.bazel new file mode 100644 index 0000000..dea8f9e --- /dev/null +++ b/job/BUILD.bazel @@ -0,0 +1,4 @@ +exports_files([ + "configure_wrapper.sh.tpl", + "execute_wrapper.sh.tpl", +]) diff --git a/job/configure_wrapper.sh.tpl b/job/configure_wrapper.sh.tpl new file mode 100755 index 0000000..066abe2 --- /dev/null +++ b/job/configure_wrapper.sh.tpl @@ -0,0 +1,38 @@ +#!/bin/bash +set -e + +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- + +CONFIGURE_BINARY="%{CONFIGURE_PATH}" + +# Set up JAVA_RUNFILES if not already set +if [[ -z "${JAVA_RUNFILES:-}" ]]; then + if [[ -d "$0.runfiles" ]]; then + export JAVA_RUNFILES="$0.runfiles" + elif [[ -f "${RUNFILES_MANIFEST_FILE:-}" ]]; then + export JAVA_RUNFILES="$(dirname "${RUNFILES_MANIFEST_FILE}")" + fi +fi + + +if [[ ! -x "$CONFIGURE_BINARY" ]]; then + CONFIGURE_BASENAME=$(basename "$CONFIGURE_BINARY") + if [[ ! -x "$CONFIGURE_BASENAME" ]]; then + echo "Error: CONFIGURE binary not found or not executable at $CONFIGURE_BINARY and $CONFIGURE_BASENAME" >&2 + exit 1 + else + CONFIGURE_BINARY=./$CONFIGURE_BASENAME + fi +fi + +# Run the configuration +exec "${CONFIGURE_BINARY}" "$@" diff --git a/runtime/execute_wrapper.sh.tpl b/job/execute_wrapper.sh.tpl similarity index 100% rename from runtime/execute_wrapper.sh.tpl rename to job/execute_wrapper.sh.tpl diff --git a/rules.bzl b/rules.bzl index caeb27d..fde260b 100644 --- a/rules.bzl +++ b/rules.bzl @@ -16,26 +16,70 @@ def databuild_job( **kwargs: Additional attributes to pass to the underlying rule """ - # Create the configuration entry point - native.sh_binary( + _databuild_job_cfg_rule( name = name + ".cfg", - srcs = ["@databuild//runtime:configure_wrapper.sh"], - args = ["$(location %s)" % configure], - data = [configure] + deps, + configure = configure, visibility = visibility, ) # Create the main rule that serves as a provider for other targets - _databuild_job_rule( - name = name, + _databuild_job_exec_rule( + name = name + ".exec", configure = ":%s.cfg" % name, execute = execute, deps = deps, visibility = visibility, - **kwargs ) -def _databuild_job_target_impl(ctx): +def _databuild_job_cfg_impl(ctx): + configure_file = ctx.executable.configure + configure_path = ctx.attr.configure.files_to_run.executable.path + script = ctx.actions.declare_file(ctx.label.name) + + ctx.actions.expand_template( + template = ctx.file._template, + output = script, + substitutions = { + "%{CONFIGURE_PATH}": configure_path, + }, + is_executable = True, + ) + + runfiles = ctx.runfiles( + files = [configure_file], + ).merge(ctx.attr.configure.default_runfiles).merge( + ctx.attr._bash_runfiles.default_runfiles, + ) + + return [ + DefaultInfo( + executable = script, + runfiles = runfiles, + ), + ] + +_databuild_job_cfg_rule = rule( + implementation = _databuild_job_cfg_impl, + attrs = { + "configure": attr.label( + doc = "Target that implements the configuration logic", + executable = True, + cfg = "exec", + mandatory = True, + ), + "_template": attr.label( + default = "@databuild//job:configure_wrapper.sh.tpl", + allow_single_file = True, + ), + "_bash_runfiles": attr.label( + default = Label("@bazel_tools//tools/bash/runfiles"), + allow_files = True, + ), + }, + executable = True, +) + +def _databuild_job_exec_impl(ctx): deps_providers = [] for dep in ctx.attr.deps: if DataBuildJobInfo in dep: @@ -44,15 +88,15 @@ def _databuild_job_target_impl(ctx): execute_file = ctx.executable.execute jq_file = ctx.executable._jq - exec_script = ctx.actions.declare_file(ctx.label.name) + script = ctx.actions.declare_file(ctx.label.name) # Get the correct runfiles paths jq_path = ctx.attr._jq.files_to_run.executable.path execute_path = ctx.attr.execute.files_to_run.executable.path ctx.actions.expand_template( - template = ctx.file._wrapper_template, - output = exec_script, + template = ctx.file._template, + output = script, substitutions = { "%{JQ_PATH}": jq_path, "%{EXECUTE_PATH}": execute_path, @@ -66,12 +110,12 @@ def _databuild_job_target_impl(ctx): return [ DefaultInfo( - executable = exec_script, + executable = script, runfiles = runfiles, ), DataBuildJobInfo( configure = ctx.attr.configure, - execute = exec_script, + execute = script, deps = deps_providers, ), ] @@ -86,8 +130,8 @@ DataBuildJobInfo = provider( }, ) -_databuild_job_rule = rule( - implementation = _databuild_job_target_impl, +_databuild_job_exec_rule = rule( + implementation = _databuild_job_exec_impl, attrs = { "configure": attr.label( doc = "Target that implements the configuration logic", @@ -103,8 +147,8 @@ _databuild_job_rule = rule( doc = "Dependencies (other job targets)", default = [], ), - "_wrapper_template": attr.label( - default = "@databuild//runtime:execute_wrapper.sh.tpl", + "_template": attr.label( + default = "@databuild//job:execute_wrapper.sh.tpl", allow_single_file = True, ), "_jq": attr.label( @@ -116,20 +160,28 @@ _databuild_job_rule = rule( executable = True, ) -def _graph_impl(name): - pass +def databuild_graph(name, jobs, plan, visibility = None): + """Creates a databuild graph target.""" -databuild_graph = rule( - implementation = _graph_impl, - attrs = { - "jobs": attr.label_list( - doc = "The list of jobs that are candidates for building partitions in this databuild graph", - allow_empty = False, - ), - "plan": attr.label( - doc = "The binary that is run to produce a `JobGraph` that builds the requested partition refs", - executable = True, - cfg = "exec", - ), - }, -) +#def _graph_impl(name): +# """ +# +# """ +# +# # Lets do this +# pass +# +#databuild_graph = rule( +# implementation = _graph_impl, +# attrs = { +# "jobs": attr.label_list( +# doc = "The list of jobs that are candidates for building partitions in this databuild graph", +# allow_empty = False, +# ), +# "plan": attr.label( +# doc = "The binary that is run to produce a `JobGraph` that builds the requested partition refs", +# executable = True, +# cfg = "exec", +# ), +# }, +#) diff --git a/runtime/BUILD.bazel b/runtime/BUILD.bazel index cfa3ea5..4d310c9 100644 --- a/runtime/BUILD.bazel +++ b/runtime/BUILD.bazel @@ -1,11 +1,6 @@ # In modules/jq/BUILD.bazel load("@bazel_skylib//lib:selects.bzl", "selects") -exports_files([ - "configure_wrapper.sh", - "execute_wrapper.sh.tpl", -]) - # Platform detection config_setting( name = "darwin", diff --git a/runtime/configure_wrapper.sh b/runtime/configure_wrapper.sh deleted file mode 100755 index 01802e3..0000000 --- a/runtime/configure_wrapper.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -set -e - -# The first argument is the path to the configure binary -CONFIGURE_BINARY="$1" -shift - -# Run the configuration -exec "${CONFIGURE_BINARY}" "$@"