Implement single target strategy
Some checks are pending
/ setup (push) Waiting to run

This commit is contained in:
Stuart Axelbrooke 2025-06-29 20:08:21 -07:00
parent 1192caeb5d
commit c72e122975
7 changed files with 237 additions and 20 deletions

View file

@ -3,6 +3,8 @@ set -e
%{RUNFILES_PREFIX}
%{PREFIX}
EXECUTE_BINARY="$(rlocation "_main/$(basename "%{EXECUTE_PATH}")")"
JQ="$(rlocation "databuild+/databuild/runtime/$(basename "%{JQ_PATH}")")"
@ -44,4 +46,8 @@ while IFS= read -r arg; do
done < <("$JQ" -r '.args[]' "$CONFIG_FILE")
# Run the execution with both environment variables (already set) and arguments
exec "$EXECUTE_BINARY" "${ARGS[@]}"
if [[ -n "${EXECUTE_SUBCOMMAND:-}" ]]; then
exec "$EXECUTE_BINARY" "${EXECUTE_SUBCOMMAND}" "${ARGS[@]}"
else
exec "$EXECUTE_BINARY" "${ARGS[@]}"
fi

View file

@ -30,32 +30,56 @@ source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
def databuild_job(
name,
configure,
execute,
configure = None,
execute = None,
binary = None,
visibility = None):
"""Creates a DataBuild job target with configuration and execution capabilities.
Args:
name: Name of the job target
configure: Target that implements the configuration logic
execute: Target that implements the execution logic
deps: List of other job_targets this job depends on
configure: Target that implements the configuration logic (optional if binary is provided)
execute: Target that implements the execution logic (optional if binary is provided)
binary: Single binary target that handles both config and exec via subcommands (optional)
visibility: Visibility specification
**kwargs: Additional attributes to pass to the underlying rule
"""
_databuild_job_cfg_rule(
name = name + ".cfg",
configure = configure,
visibility = visibility,
)
# Validate arguments
if binary and (configure or execute):
fail("Cannot specify both 'binary' and 'configure'/'execute' parameters")
if not binary and not (configure and execute):
fail("Must specify either 'binary' or both 'configure' and 'execute' parameters")
# Create the main rule that serves as a provider for other targets
_databuild_job_exec_rule(
name = name + ".exec",
execute = execute,
visibility = visibility,
)
if binary:
# Single binary approach - use subcommands
_databuild_job_cfg_rule(
name = name + ".cfg",
configure = binary,
use_subcommand = True,
visibility = visibility,
)
_databuild_job_exec_rule(
name = name + ".exec",
execute = binary,
use_subcommand = True,
visibility = visibility,
)
else:
# Legacy approach - separate binaries
_databuild_job_cfg_rule(
name = name + ".cfg",
configure = configure,
use_subcommand = False,
visibility = visibility,
)
_databuild_job_exec_rule(
name = name + ".exec",
execute = execute,
use_subcommand = False,
visibility = visibility,
)
# Create a job target that configures then executes
_databuild_job_rule(
@ -70,13 +94,21 @@ def _databuild_job_cfg_impl(ctx):
configure_path = ctx.attr.configure.files_to_run.executable.path
script = ctx.actions.declare_file(ctx.label.name)
# Handle subcommand for single binary approach
if ctx.attr.use_subcommand:
executable_cmd = configure_path + " config"
prefix = "EXECUTABLE_SUBCOMMAND=\"config\"\n"
else:
executable_cmd = configure_path
prefix = ""
ctx.actions.expand_template(
template = ctx.file._template,
output = script,
substitutions = {
"%{EXECUTABLE_PATH}": configure_path,
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
"%{PREFIX}": "",
"%{PREFIX}": prefix,
},
is_executable = True,
)
@ -103,6 +135,10 @@ _databuild_job_cfg_rule = rule(
cfg = "target",
mandatory = True,
),
"use_subcommand": attr.bool(
doc = "Whether to append 'config' subcommand to executable",
default = False,
),
"_template": attr.label(
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
allow_single_file = True,
@ -124,6 +160,12 @@ def _databuild_job_exec_impl(ctx):
# Get the correct runfiles paths
jq_path = ctx.attr._jq.files_to_run.executable.path
execute_path = ctx.attr.execute.files_to_run.executable.path
# Handle subcommand for single binary approach
if ctx.attr.use_subcommand:
prefix = "EXECUTE_SUBCOMMAND=\"exec\"\n"
else:
prefix = ""
ctx.actions.expand_template(
template = ctx.file._template,
@ -132,6 +174,7 @@ def _databuild_job_exec_impl(ctx):
"%{JQ_PATH}": jq_path,
"%{EXECUTE_PATH}": execute_path,
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
"%{PREFIX}": prefix,
},
is_executable = True,
)
@ -168,6 +211,10 @@ _databuild_job_exec_rule = rule(
executable = True,
cfg = "target",
),
"use_subcommand": attr.bool(
doc = "Whether to append 'exec' subcommand to executable",
default = False,
),
"_template": attr.label(
default = "@databuild//databuild/job:execute_wrapper.sh.tpl",
allow_single_file = True,

View file

@ -8,4 +8,8 @@ set -e
EXECUTABLE_BINARY="$(rlocation "_main/$(basename "%{EXECUTABLE_PATH}")")"
# Run the configuration
exec "${EXECUTABLE_BINARY}" "$@"
if [[ -n "${EXECUTABLE_SUBCOMMAND:-}" ]]; then
exec "${EXECUTABLE_BINARY}" "${EXECUTABLE_SUBCOMMAND}" "$@"
else
exec "${EXECUTABLE_BINARY}" "$@"
fi

View file

@ -66,6 +66,13 @@ databuild_job(
visibility = ["//visibility:public"],
)
# New unified approach test
databuild_job(
name = "unified_generate_job",
binary = ":unified_generate_binary",
visibility = ["//visibility:public"],
)
java_binary(
name = "sum_configure",
srcs = glob(["*.java"]),
@ -86,3 +93,14 @@ java_binary(
]),
main_class = "com.databuild.examples.basic_graph.SumExecute",
)
java_binary(
name = "unified_generate_binary",
srcs = ["UnifiedGenerateNumber.java"],
main_class = "com.databuild.examples.basic_graph.UnifiedGenerateNumber",
deps = [
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
"@maven//:com_fasterxml_jackson_core_jackson_core",
"@maven//:com_fasterxml_jackson_core_jackson_databind",
],
)

View file

@ -0,0 +1,109 @@
package com.databuild.examples.basic_graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import java.util.ArrayList;
import java.util.List;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Random;
/**
* Unified job that handles both configuration and execution via subcommands.
*/
public class UnifiedGenerateNumber {
public static String BASE_PATH = "/tmp/databuild_test/examples/basic_graph/";
public static void main(String[] args) {
if (args.length < 1) {
System.err.println("Usage: UnifiedGenerateNumber {config|exec} [args...]");
System.exit(1);
}
String command = args[0];
switch (command) {
case "config":
handleConfig(Arrays.copyOfRange(args, 1, args.length));
break;
case "exec":
handleExec(Arrays.copyOfRange(args, 1, args.length));
break;
default:
System.err.println("Unknown command: " + command);
System.err.println("Usage: UnifiedGenerateNumber {config|exec} [args...]");
System.exit(1);
}
}
private static void handleConfig(String[] args) {
if (args.length < 1) {
System.err.println("Config mode requires partition ref");
System.exit(1);
}
String partitionRef = args[0];
try {
ObjectMapper mapper = new ObjectMapper();
// Create job configuration
var config = mapper.createObjectNode();
config.set("outputs", mapper.createArrayNode().add(partitionRef));
config.set("inputs", mapper.createArrayNode());
config.set("args", mapper.createArrayNode().add("will").add("generate").add(partitionRef));
config.set("env", mapper.createObjectNode().put("PARTITION_REF", partitionRef));
var response = mapper.createObjectNode();
response.set("configs", mapper.createArrayNode().add(config));
System.out.println(mapper.writeValueAsString(response));
} catch (Exception e) {
System.err.println("Error creating config: " + e.getMessage());
System.exit(1);
}
}
private static void handleExec(String[] args) {
if (args.length < 3) {
System.err.println("Execute mode requires: will generate <partition_ref>");
System.exit(1);
}
String partitionRef = args[2];
try {
// Generate a random number based on the partition ref
MessageDigest md = MessageDigest.getInstance("SHA-256");
byte[] hash = md.digest(partitionRef.getBytes(StandardCharsets.UTF_8));
long seed = 0;
for (int i = 0; i < 8; i++) {
seed = (seed << 8) | (hash[i] & 0xFF);
}
Random random = new Random(seed);
int randomNumber = random.nextInt(100) + 1;
// Write to file
File outputDir = new File(BASE_PATH);
outputDir.mkdirs();
File outputFile = new File(outputDir, partitionRef + ".txt");
try (FileWriter writer = new FileWriter(outputFile)) {
writer.write(String.valueOf(randomNumber));
}
System.out.println("Generated number " + randomNumber + " for partition " + partitionRef);
} catch (Exception e) {
System.err.println("Error in execution: " + e.getMessage());
System.exit(1);
}
}
}

View file

@ -16,3 +16,15 @@ sh_binary(
name = "test_job_execute",
srcs = ["execute.sh"],
)
# New unified approach
databuild_job(
name = "unified_test_job",
binary = ":unified_job_binary",
visibility = ["//visibility:public"],
)
sh_binary(
name = "unified_job_binary",
srcs = ["unified_job.sh"],
)

View file

@ -0,0 +1,21 @@
#!/bin/bash
# Simple unified job that handles both config and exec via subcommands
case "${1:-}" in
"config")
# Configuration mode - output job config JSON
partition_ref="${2:-}"
echo "{\"configs\":[{\"outputs\":[\"${partition_ref}\"],\"inputs\":[],\"args\":[\"will\", \"build\", \"${partition_ref}\"],\"env\":{\"foo\":\"bar\"}}]}"
;;
"exec")
# Execution mode - run the job
echo 'EXECUTE UNIFIED!'
echo "foo=$foo"
echo "args=$@"
;;
*)
echo "Usage: $0 {config|exec} [args...]"
exit 1
;;
esac