From c72e122975d2c2d97c59ef5ab55f72f2f38b94d7 Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke Date: Sun, 29 Jun 2025 20:08:21 -0700 Subject: [PATCH] Implement single target strategy --- databuild/job/execute_wrapper.sh.tpl | 8 +- databuild/rules.bzl | 83 ++++++++++--- .../runtime/simple_executable_wrapper.sh.tpl | 6 +- examples/basic_graph/BUILD.bazel | 18 +++ .../basic_graph/UnifiedGenerateNumber.java | 109 ++++++++++++++++++ examples/basic_job/BUILD.bazel | 12 ++ examples/basic_job/unified_job.sh | 21 ++++ 7 files changed, 237 insertions(+), 20 deletions(-) create mode 100644 examples/basic_graph/UnifiedGenerateNumber.java create mode 100755 examples/basic_job/unified_job.sh diff --git a/databuild/job/execute_wrapper.sh.tpl b/databuild/job/execute_wrapper.sh.tpl index 2cb2c84..8b27fd9 100755 --- a/databuild/job/execute_wrapper.sh.tpl +++ b/databuild/job/execute_wrapper.sh.tpl @@ -3,6 +3,8 @@ set -e %{RUNFILES_PREFIX} +%{PREFIX} + EXECUTE_BINARY="$(rlocation "_main/$(basename "%{EXECUTE_PATH}")")" JQ="$(rlocation "databuild+/databuild/runtime/$(basename "%{JQ_PATH}")")" @@ -44,4 +46,8 @@ while IFS= read -r arg; do done < <("$JQ" -r '.args[]' "$CONFIG_FILE") # Run the execution with both environment variables (already set) and arguments -exec "$EXECUTE_BINARY" "${ARGS[@]}" +if [[ -n "${EXECUTE_SUBCOMMAND:-}" ]]; then + exec "$EXECUTE_BINARY" "${EXECUTE_SUBCOMMAND}" "${ARGS[@]}" +else + exec "$EXECUTE_BINARY" "${ARGS[@]}" +fi diff --git a/databuild/rules.bzl b/databuild/rules.bzl index e0a173d..50dc9bd 100644 --- a/databuild/rules.bzl +++ b/databuild/rules.bzl @@ -30,32 +30,56 @@ source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ def databuild_job( name, - configure, - execute, + configure = None, + execute = None, + binary = None, visibility = None): """Creates a DataBuild job target with configuration and execution capabilities. Args: name: Name of the job target - configure: Target that implements the configuration logic - execute: Target that implements the execution logic - deps: List of other job_targets this job depends on + configure: Target that implements the configuration logic (optional if binary is provided) + execute: Target that implements the execution logic (optional if binary is provided) + binary: Single binary target that handles both config and exec via subcommands (optional) visibility: Visibility specification - **kwargs: Additional attributes to pass to the underlying rule """ - _databuild_job_cfg_rule( - name = name + ".cfg", - configure = configure, - visibility = visibility, - ) + # Validate arguments + if binary and (configure or execute): + fail("Cannot specify both 'binary' and 'configure'/'execute' parameters") + if not binary and not (configure and execute): + fail("Must specify either 'binary' or both 'configure' and 'execute' parameters") - # Create the main rule that serves as a provider for other targets - _databuild_job_exec_rule( - name = name + ".exec", - execute = execute, - visibility = visibility, - ) + if binary: + # Single binary approach - use subcommands + _databuild_job_cfg_rule( + name = name + ".cfg", + configure = binary, + use_subcommand = True, + visibility = visibility, + ) + + _databuild_job_exec_rule( + name = name + ".exec", + execute = binary, + use_subcommand = True, + visibility = visibility, + ) + else: + # Legacy approach - separate binaries + _databuild_job_cfg_rule( + name = name + ".cfg", + configure = configure, + use_subcommand = False, + visibility = visibility, + ) + + _databuild_job_exec_rule( + name = name + ".exec", + execute = execute, + use_subcommand = False, + visibility = visibility, + ) # Create a job target that configures then executes _databuild_job_rule( @@ -70,13 +94,21 @@ def _databuild_job_cfg_impl(ctx): configure_path = ctx.attr.configure.files_to_run.executable.path script = ctx.actions.declare_file(ctx.label.name) + # Handle subcommand for single binary approach + if ctx.attr.use_subcommand: + executable_cmd = configure_path + " config" + prefix = "EXECUTABLE_SUBCOMMAND=\"config\"\n" + else: + executable_cmd = configure_path + prefix = "" + ctx.actions.expand_template( template = ctx.file._template, output = script, substitutions = { "%{EXECUTABLE_PATH}": configure_path, "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, - "%{PREFIX}": "", + "%{PREFIX}": prefix, }, is_executable = True, ) @@ -103,6 +135,10 @@ _databuild_job_cfg_rule = rule( cfg = "target", mandatory = True, ), + "use_subcommand": attr.bool( + doc = "Whether to append 'config' subcommand to executable", + default = False, + ), "_template": attr.label( default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl", allow_single_file = True, @@ -124,6 +160,12 @@ def _databuild_job_exec_impl(ctx): # Get the correct runfiles paths jq_path = ctx.attr._jq.files_to_run.executable.path execute_path = ctx.attr.execute.files_to_run.executable.path + + # Handle subcommand for single binary approach + if ctx.attr.use_subcommand: + prefix = "EXECUTE_SUBCOMMAND=\"exec\"\n" + else: + prefix = "" ctx.actions.expand_template( template = ctx.file._template, @@ -132,6 +174,7 @@ def _databuild_job_exec_impl(ctx): "%{JQ_PATH}": jq_path, "%{EXECUTE_PATH}": execute_path, "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, + "%{PREFIX}": prefix, }, is_executable = True, ) @@ -168,6 +211,10 @@ _databuild_job_exec_rule = rule( executable = True, cfg = "target", ), + "use_subcommand": attr.bool( + doc = "Whether to append 'exec' subcommand to executable", + default = False, + ), "_template": attr.label( default = "@databuild//databuild/job:execute_wrapper.sh.tpl", allow_single_file = True, diff --git a/databuild/runtime/simple_executable_wrapper.sh.tpl b/databuild/runtime/simple_executable_wrapper.sh.tpl index e67b4b5..454ec6b 100755 --- a/databuild/runtime/simple_executable_wrapper.sh.tpl +++ b/databuild/runtime/simple_executable_wrapper.sh.tpl @@ -8,4 +8,8 @@ set -e EXECUTABLE_BINARY="$(rlocation "_main/$(basename "%{EXECUTABLE_PATH}")")" # Run the configuration -exec "${EXECUTABLE_BINARY}" "$@" +if [[ -n "${EXECUTABLE_SUBCOMMAND:-}" ]]; then + exec "${EXECUTABLE_BINARY}" "${EXECUTABLE_SUBCOMMAND}" "$@" +else + exec "${EXECUTABLE_BINARY}" "$@" +fi diff --git a/examples/basic_graph/BUILD.bazel b/examples/basic_graph/BUILD.bazel index 17676e8..12a9ae0 100644 --- a/examples/basic_graph/BUILD.bazel +++ b/examples/basic_graph/BUILD.bazel @@ -66,6 +66,13 @@ databuild_job( visibility = ["//visibility:public"], ) +# New unified approach test +databuild_job( + name = "unified_generate_job", + binary = ":unified_generate_binary", + visibility = ["//visibility:public"], +) + java_binary( name = "sum_configure", srcs = glob(["*.java"]), @@ -86,3 +93,14 @@ java_binary( ]), main_class = "com.databuild.examples.basic_graph.SumExecute", ) + +java_binary( + name = "unified_generate_binary", + srcs = ["UnifiedGenerateNumber.java"], + main_class = "com.databuild.examples.basic_graph.UnifiedGenerateNumber", + deps = [ + "@maven//:com_fasterxml_jackson_core_jackson_annotations", + "@maven//:com_fasterxml_jackson_core_jackson_core", + "@maven//:com_fasterxml_jackson_core_jackson_databind", + ], +) diff --git a/examples/basic_graph/UnifiedGenerateNumber.java b/examples/basic_graph/UnifiedGenerateNumber.java new file mode 100644 index 0000000..7b7cb13 --- /dev/null +++ b/examples/basic_graph/UnifiedGenerateNumber.java @@ -0,0 +1,109 @@ +package com.databuild.examples.basic_graph; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.JsonNode; + +import java.util.ArrayList; +import java.util.List; +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Random; + +/** + * Unified job that handles both configuration and execution via subcommands. + */ +public class UnifiedGenerateNumber { + public static String BASE_PATH = "/tmp/databuild_test/examples/basic_graph/"; + + public static void main(String[] args) { + if (args.length < 1) { + System.err.println("Usage: UnifiedGenerateNumber {config|exec} [args...]"); + System.exit(1); + } + + String command = args[0]; + switch (command) { + case "config": + handleConfig(Arrays.copyOfRange(args, 1, args.length)); + break; + case "exec": + handleExec(Arrays.copyOfRange(args, 1, args.length)); + break; + default: + System.err.println("Unknown command: " + command); + System.err.println("Usage: UnifiedGenerateNumber {config|exec} [args...]"); + System.exit(1); + } + } + + private static void handleConfig(String[] args) { + if (args.length < 1) { + System.err.println("Config mode requires partition ref"); + System.exit(1); + } + + String partitionRef = args[0]; + + try { + ObjectMapper mapper = new ObjectMapper(); + + // Create job configuration + var config = mapper.createObjectNode(); + config.set("outputs", mapper.createArrayNode().add(partitionRef)); + config.set("inputs", mapper.createArrayNode()); + config.set("args", mapper.createArrayNode().add("will").add("generate").add(partitionRef)); + config.set("env", mapper.createObjectNode().put("PARTITION_REF", partitionRef)); + + var response = mapper.createObjectNode(); + response.set("configs", mapper.createArrayNode().add(config)); + + System.out.println(mapper.writeValueAsString(response)); + } catch (Exception e) { + System.err.println("Error creating config: " + e.getMessage()); + System.exit(1); + } + } + + private static void handleExec(String[] args) { + if (args.length < 3) { + System.err.println("Execute mode requires: will generate "); + System.exit(1); + } + + String partitionRef = args[2]; + + try { + // Generate a random number based on the partition ref + MessageDigest md = MessageDigest.getInstance("SHA-256"); + byte[] hash = md.digest(partitionRef.getBytes(StandardCharsets.UTF_8)); + long seed = 0; + for (int i = 0; i < 8; i++) { + seed = (seed << 8) | (hash[i] & 0xFF); + } + + Random random = new Random(seed); + int randomNumber = random.nextInt(100) + 1; + + // Write to file + File outputDir = new File(BASE_PATH); + outputDir.mkdirs(); + + File outputFile = new File(outputDir, partitionRef + ".txt"); + try (FileWriter writer = new FileWriter(outputFile)) { + writer.write(String.valueOf(randomNumber)); + } + + System.out.println("Generated number " + randomNumber + " for partition " + partitionRef); + + } catch (Exception e) { + System.err.println("Error in execution: " + e.getMessage()); + System.exit(1); + } + } +} \ No newline at end of file diff --git a/examples/basic_job/BUILD.bazel b/examples/basic_job/BUILD.bazel index 1e85353..68798f4 100644 --- a/examples/basic_job/BUILD.bazel +++ b/examples/basic_job/BUILD.bazel @@ -16,3 +16,15 @@ sh_binary( name = "test_job_execute", srcs = ["execute.sh"], ) + +# New unified approach +databuild_job( + name = "unified_test_job", + binary = ":unified_job_binary", + visibility = ["//visibility:public"], +) + +sh_binary( + name = "unified_job_binary", + srcs = ["unified_job.sh"], +) diff --git a/examples/basic_job/unified_job.sh b/examples/basic_job/unified_job.sh new file mode 100755 index 0000000..8726246 --- /dev/null +++ b/examples/basic_job/unified_job.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Simple unified job that handles both config and exec via subcommands + +case "${1:-}" in + "config") + # Configuration mode - output job config JSON + partition_ref="${2:-}" + echo "{\"configs\":[{\"outputs\":[\"${partition_ref}\"],\"inputs\":[],\"args\":[\"will\", \"build\", \"${partition_ref}\"],\"env\":{\"foo\":\"bar\"}}]}" + ;; + "exec") + # Execution mode - run the job + echo 'EXECUTE UNIFIED!' + echo "foo=$foo" + echo "args=$@" + ;; + *) + echo "Usage: $0 {config|exec} [args...]" + exit 1 + ;; +esac \ No newline at end of file