This commit is contained in:
parent
c72e122975
commit
48a1288891
13 changed files with 215 additions and 360 deletions
|
|
@ -30,56 +30,28 @@ source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
|
||||||
|
|
||||||
def databuild_job(
|
def databuild_job(
|
||||||
name,
|
name,
|
||||||
configure = None,
|
binary,
|
||||||
execute = None,
|
|
||||||
binary = None,
|
|
||||||
visibility = None):
|
visibility = None):
|
||||||
"""Creates a DataBuild job target with configuration and execution capabilities.
|
"""Creates a DataBuild job target with configuration and execution capabilities.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
name: Name of the job target
|
name: Name of the job target
|
||||||
configure: Target that implements the configuration logic (optional if binary is provided)
|
binary: Single binary target that handles both config and exec via subcommands
|
||||||
execute: Target that implements the execution logic (optional if binary is provided)
|
|
||||||
binary: Single binary target that handles both config and exec via subcommands (optional)
|
|
||||||
visibility: Visibility specification
|
visibility: Visibility specification
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Validate arguments
|
# Single binary approach - use subcommands
|
||||||
if binary and (configure or execute):
|
_databuild_job_cfg_rule(
|
||||||
fail("Cannot specify both 'binary' and 'configure'/'execute' parameters")
|
name = name + ".cfg",
|
||||||
if not binary and not (configure and execute):
|
configure = binary,
|
||||||
fail("Must specify either 'binary' or both 'configure' and 'execute' parameters")
|
visibility = visibility,
|
||||||
|
)
|
||||||
|
|
||||||
if binary:
|
_databuild_job_exec_rule(
|
||||||
# Single binary approach - use subcommands
|
name = name + ".exec",
|
||||||
_databuild_job_cfg_rule(
|
execute = binary,
|
||||||
name = name + ".cfg",
|
visibility = visibility,
|
||||||
configure = binary,
|
)
|
||||||
use_subcommand = True,
|
|
||||||
visibility = visibility,
|
|
||||||
)
|
|
||||||
|
|
||||||
_databuild_job_exec_rule(
|
|
||||||
name = name + ".exec",
|
|
||||||
execute = binary,
|
|
||||||
use_subcommand = True,
|
|
||||||
visibility = visibility,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Legacy approach - separate binaries
|
|
||||||
_databuild_job_cfg_rule(
|
|
||||||
name = name + ".cfg",
|
|
||||||
configure = configure,
|
|
||||||
use_subcommand = False,
|
|
||||||
visibility = visibility,
|
|
||||||
)
|
|
||||||
|
|
||||||
_databuild_job_exec_rule(
|
|
||||||
name = name + ".exec",
|
|
||||||
execute = execute,
|
|
||||||
use_subcommand = False,
|
|
||||||
visibility = visibility,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create a job target that configures then executes
|
# Create a job target that configures then executes
|
||||||
_databuild_job_rule(
|
_databuild_job_rule(
|
||||||
|
|
@ -94,21 +66,13 @@ def _databuild_job_cfg_impl(ctx):
|
||||||
configure_path = ctx.attr.configure.files_to_run.executable.path
|
configure_path = ctx.attr.configure.files_to_run.executable.path
|
||||||
script = ctx.actions.declare_file(ctx.label.name)
|
script = ctx.actions.declare_file(ctx.label.name)
|
||||||
|
|
||||||
# Handle subcommand for single binary approach
|
|
||||||
if ctx.attr.use_subcommand:
|
|
||||||
executable_cmd = configure_path + " config"
|
|
||||||
prefix = "EXECUTABLE_SUBCOMMAND=\"config\"\n"
|
|
||||||
else:
|
|
||||||
executable_cmd = configure_path
|
|
||||||
prefix = ""
|
|
||||||
|
|
||||||
ctx.actions.expand_template(
|
ctx.actions.expand_template(
|
||||||
template = ctx.file._template,
|
template = ctx.file._template,
|
||||||
output = script,
|
output = script,
|
||||||
substitutions = {
|
substitutions = {
|
||||||
"%{EXECUTABLE_PATH}": configure_path,
|
"%{EXECUTABLE_PATH}": configure_path,
|
||||||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||||
"%{PREFIX}": prefix,
|
"%{PREFIX}": "EXECUTABLE_SUBCOMMAND=\"config\"\n",
|
||||||
},
|
},
|
||||||
is_executable = True,
|
is_executable = True,
|
||||||
)
|
)
|
||||||
|
|
@ -135,10 +99,6 @@ _databuild_job_cfg_rule = rule(
|
||||||
cfg = "target",
|
cfg = "target",
|
||||||
mandatory = True,
|
mandatory = True,
|
||||||
),
|
),
|
||||||
"use_subcommand": attr.bool(
|
|
||||||
doc = "Whether to append 'config' subcommand to executable",
|
|
||||||
default = False,
|
|
||||||
),
|
|
||||||
"_template": attr.label(
|
"_template": attr.label(
|
||||||
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
|
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
|
||||||
allow_single_file = True,
|
allow_single_file = True,
|
||||||
|
|
@ -160,12 +120,6 @@ def _databuild_job_exec_impl(ctx):
|
||||||
# Get the correct runfiles paths
|
# Get the correct runfiles paths
|
||||||
jq_path = ctx.attr._jq.files_to_run.executable.path
|
jq_path = ctx.attr._jq.files_to_run.executable.path
|
||||||
execute_path = ctx.attr.execute.files_to_run.executable.path
|
execute_path = ctx.attr.execute.files_to_run.executable.path
|
||||||
|
|
||||||
# Handle subcommand for single binary approach
|
|
||||||
if ctx.attr.use_subcommand:
|
|
||||||
prefix = "EXECUTE_SUBCOMMAND=\"exec\"\n"
|
|
||||||
else:
|
|
||||||
prefix = ""
|
|
||||||
|
|
||||||
ctx.actions.expand_template(
|
ctx.actions.expand_template(
|
||||||
template = ctx.file._template,
|
template = ctx.file._template,
|
||||||
|
|
@ -174,7 +128,7 @@ def _databuild_job_exec_impl(ctx):
|
||||||
"%{JQ_PATH}": jq_path,
|
"%{JQ_PATH}": jq_path,
|
||||||
"%{EXECUTE_PATH}": execute_path,
|
"%{EXECUTE_PATH}": execute_path,
|
||||||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||||
"%{PREFIX}": prefix,
|
"%{PREFIX}": "EXECUTE_SUBCOMMAND=\"exec\"\n",
|
||||||
},
|
},
|
||||||
is_executable = True,
|
is_executable = True,
|
||||||
)
|
)
|
||||||
|
|
@ -211,10 +165,6 @@ _databuild_job_exec_rule = rule(
|
||||||
executable = True,
|
executable = True,
|
||||||
cfg = "target",
|
cfg = "target",
|
||||||
),
|
),
|
||||||
"use_subcommand": attr.bool(
|
|
||||||
doc = "Whether to append 'exec' subcommand to executable",
|
|
||||||
default = False,
|
|
||||||
),
|
|
||||||
"_template": attr.label(
|
"_template": attr.label(
|
||||||
default = "@databuild//databuild/job:execute_wrapper.sh.tpl",
|
default = "@databuild//databuild/job:execute_wrapper.sh.tpl",
|
||||||
allow_single_file = True,
|
allow_single_file = True,
|
||||||
|
|
|
||||||
|
|
@ -35,67 +35,12 @@ py_binary(
|
||||||
|
|
||||||
databuild_job(
|
databuild_job(
|
||||||
name = "generate_number_job",
|
name = "generate_number_job",
|
||||||
configure = ":generate_number_configure",
|
binary = ":generate_number_binary",
|
||||||
execute = ":generate_number_execute",
|
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
)
|
)
|
||||||
|
|
||||||
java_binary(
|
java_binary(
|
||||||
name = "generate_number_configure",
|
name = "generate_number_binary",
|
||||||
srcs = glob(["*.java"]),
|
|
||||||
create_executable = True,
|
|
||||||
main_class = "com.databuild.examples.basic_graph.GenerateConfigure",
|
|
||||||
deps = [
|
|
||||||
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
|
|
||||||
"@maven//:com_fasterxml_jackson_core_jackson_core",
|
|
||||||
"@maven//:com_fasterxml_jackson_core_jackson_databind",
|
|
||||||
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
java_binary(
|
|
||||||
name = "generate_number_execute",
|
|
||||||
srcs = glob(["GenerateExecute.java"]),
|
|
||||||
main_class = "com.databuild.examples.basic_graph.GenerateExecute",
|
|
||||||
)
|
|
||||||
|
|
||||||
databuild_job(
|
|
||||||
name = "sum_job",
|
|
||||||
configure = ":sum_configure",
|
|
||||||
execute = ":sum_execute",
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# New unified approach test
|
|
||||||
databuild_job(
|
|
||||||
name = "unified_generate_job",
|
|
||||||
binary = ":unified_generate_binary",
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
java_binary(
|
|
||||||
name = "sum_configure",
|
|
||||||
srcs = glob(["*.java"]),
|
|
||||||
main_class = "com.databuild.examples.basic_graph.SumConfigure",
|
|
||||||
deps = [
|
|
||||||
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
|
|
||||||
"@maven//:com_fasterxml_jackson_core_jackson_core",
|
|
||||||
"@maven//:com_fasterxml_jackson_core_jackson_databind",
|
|
||||||
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
java_binary(
|
|
||||||
name = "sum_execute",
|
|
||||||
srcs = glob([
|
|
||||||
"SumExecute.java",
|
|
||||||
"GenerateExecute.java",
|
|
||||||
]),
|
|
||||||
main_class = "com.databuild.examples.basic_graph.SumExecute",
|
|
||||||
)
|
|
||||||
|
|
||||||
java_binary(
|
|
||||||
name = "unified_generate_binary",
|
|
||||||
srcs = ["UnifiedGenerateNumber.java"],
|
srcs = ["UnifiedGenerateNumber.java"],
|
||||||
main_class = "com.databuild.examples.basic_graph.UnifiedGenerateNumber",
|
main_class = "com.databuild.examples.basic_graph.UnifiedGenerateNumber",
|
||||||
deps = [
|
deps = [
|
||||||
|
|
@ -104,3 +49,20 @@ java_binary(
|
||||||
"@maven//:com_fasterxml_jackson_core_jackson_databind",
|
"@maven//:com_fasterxml_jackson_core_jackson_databind",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
databuild_job(
|
||||||
|
name = "sum_job",
|
||||||
|
binary = ":sum_binary",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
java_binary(
|
||||||
|
name = "sum_binary",
|
||||||
|
srcs = ["UnifiedSum.java"],
|
||||||
|
main_class = "com.databuild.examples.basic_graph.UnifiedSum",
|
||||||
|
deps = [
|
||||||
|
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
|
||||||
|
"@maven//:com_fasterxml_jackson_core_jackson_core",
|
||||||
|
"@maven//:com_fasterxml_jackson_core_jackson_databind",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
package com.databuild.examples.basic_graph;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Configure class for generating a random number.
|
|
||||||
* This class creates a job configuration for generating a random number based on the partition ref.
|
|
||||||
*/
|
|
||||||
public class GenerateConfigure {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
if (args.length < 1) {
|
|
||||||
System.err.println("Error: Partition ref is required");
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
List<JobConfig> configList = new ArrayList<>();
|
|
||||||
|
|
||||||
// Process each partition ref from input arguments
|
|
||||||
Arrays.stream(args).forEach(partitionRef -> {
|
|
||||||
// Create and populate JobConfig object
|
|
||||||
JobConfig config = new JobConfig();
|
|
||||||
config.outputs = Collections.singletonList(partitionRef);
|
|
||||||
config.args = Arrays.asList(partitionRef);
|
|
||||||
// inputs and env are already initialized as empty collections in the constructor
|
|
||||||
configList.add(config);
|
|
||||||
});
|
|
||||||
|
|
||||||
try {
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
|
||||||
|
|
||||||
// Convert config list to JsonNode and serialize
|
|
||||||
JsonNode configNode = mapper.valueToTree(configList);
|
|
||||||
String jsonConfig = mapper.writeValueAsString(configNode);
|
|
||||||
System.out.println(jsonConfig);
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,70 +0,0 @@
|
||||||
package com.databuild.examples.basic_graph;
|
|
||||||
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.security.MessageDigest;
|
|
||||||
import java.security.NoSuchAlgorithmException;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Execute class for generating a random number.
|
|
||||||
* This class generates a random number based on the partition ref.
|
|
||||||
*/
|
|
||||||
public class GenerateExecute {
|
|
||||||
public static String BASE_PATH = "/tmp/databuild_test/examples/basic_graph/";
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
if (args.length < 1) {
|
|
||||||
System.err.println("Error: Partition ref (output path) is required");
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
String partitionRef = args[0];
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Create a hash of the partition ref to use as a seed
|
|
||||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
|
||||||
byte[] hashBytes = md.digest(partitionRef.getBytes(StandardCharsets.UTF_8));
|
|
||||||
|
|
||||||
// Convert the first 8 bytes of the hash to a long to use as a seed
|
|
||||||
long seed = 0;
|
|
||||||
for (int i = 0; i < Math.min(8, hashBytes.length); i++) {
|
|
||||||
seed = (seed << 8) | (hashBytes[i] & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a random number generator with the seed
|
|
||||||
Random random = new Random(seed);
|
|
||||||
|
|
||||||
// Generate a random number
|
|
||||||
int randomNumber = random.nextInt(100);
|
|
||||||
|
|
||||||
// Write the random number to the output file
|
|
||||||
// Ensure dir exists
|
|
||||||
java.io.File parent = new java.io.File(partitionRef).getParentFile();
|
|
||||||
if (parent != null) {
|
|
||||||
parent.mkdirs();
|
|
||||||
}
|
|
||||||
try (FileWriter writer = new FileWriter(partitionRef)) {
|
|
||||||
writer.write("Random number for partition " + partitionRef + ": " + randomNumber);
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.println("Generated random number " + randomNumber + " for partition " + partitionRef);
|
|
||||||
|
|
||||||
// Write the random number to the output file
|
|
||||||
String outputPath = partitionRef;
|
|
||||||
System.out.println("Writing random number " + randomNumber + " to " + outputPath);
|
|
||||||
// Ensure dir exists
|
|
||||||
new java.io.File(outputPath).getParentFile().mkdirs();
|
|
||||||
// Write number (overwrite)
|
|
||||||
try (FileWriter writer = new FileWriter(outputPath)) {
|
|
||||||
writer.write(String.valueOf(randomNumber));
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (NoSuchAlgorithmException | IOException e) {
|
|
||||||
System.err.println("Error: " + e.getMessage());
|
|
||||||
e.printStackTrace();
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,58 +0,0 @@
|
||||||
package com.databuild.examples.basic_graph;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static com.databuild.examples.basic_graph.GenerateExecute.BASE_PATH;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Configure class for generating a random number.
|
|
||||||
* This class creates a job configuration for generating a random number based on the partition ref.
|
|
||||||
*/
|
|
||||||
public class SumConfigure {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
if (args.length != 1) {
|
|
||||||
System.err.println("Error: Must provide exactly one partition ref as an argument");
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
String partitionRef = args[0];
|
|
||||||
String[] pathParts = partitionRef.split("/");
|
|
||||||
String[] upstreams = Arrays.stream(pathParts[pathParts.length - 1].split("_"))
|
|
||||||
.map(part -> BASE_PATH + "generated_number/" + part)
|
|
||||||
.toArray(String[]::new);
|
|
||||||
|
|
||||||
// Create and populate JobConfig object
|
|
||||||
JobConfig config = new JobConfig();
|
|
||||||
config.outputs = Collections.singletonList(BASE_PATH + "sum/" +partitionRef);
|
|
||||||
config.inputs = Arrays.stream(upstreams)
|
|
||||||
.map(upstream -> {
|
|
||||||
DataDep dep = new DataDep();
|
|
||||||
dep.depType = "materialize";
|
|
||||||
dep.ref = upstream;
|
|
||||||
return dep;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
config.args = Arrays.asList(upstreams);
|
|
||||||
// Create a hashmap for env with {"OUTPUT_REF": "foo"}
|
|
||||||
config.env = Collections.singletonMap("OUTPUT_REF", args[0]);
|
|
||||||
// inputs and env are already initialized as empty collections in the constructor
|
|
||||||
|
|
||||||
try {
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
|
||||||
|
|
||||||
// Convert config to JsonNode and serialize
|
|
||||||
JsonNode configNode = mapper.valueToTree(Collections.singletonList(config));
|
|
||||||
String jsonConfig = mapper.writeValueAsString(configNode);
|
|
||||||
System.out.println(jsonConfig);
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,44 +0,0 @@
|
||||||
package com.databuild.examples.basic_graph;
|
|
||||||
|
|
||||||
import static com.databuild.examples.basic_graph.GenerateExecute.BASE_PATH;
|
|
||||||
|
|
||||||
public class SumExecute {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
if (args.length < 1) {
|
|
||||||
System.err.println("Error: Partition ref (output path) is required");
|
|
||||||
System.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get output ref from env var OUTPUT_REF
|
|
||||||
String outputRef = System.getenv("OUTPUT_REF");
|
|
||||||
|
|
||||||
// For each arg, load it from the file system and add it to the sum
|
|
||||||
int sum = 0;
|
|
||||||
for (String partitionRef : args) {
|
|
||||||
try {
|
|
||||||
String path = partitionRef;
|
|
||||||
int partitionValue = Integer.parseInt(new String(java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(path))));
|
|
||||||
System.out.println("Summing partition " + partitionRef + " with value " + partitionValue);
|
|
||||||
sum += partitionValue;
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.err.println("Error: Failed to read partition " + partitionRef + ": " + e.getMessage());
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
System.out.println("Sum of " + args.length + " partitions: " + sum);
|
|
||||||
// Write the sum to the output file
|
|
||||||
String outPath = outputRef;
|
|
||||||
System.out.println("Writing sum " + sum + " to " + outPath);
|
|
||||||
|
|
||||||
java.io.File parent = new java.io.File(outPath).getParentFile();
|
|
||||||
if (parent != null) {
|
|
||||||
parent.mkdirs();
|
|
||||||
}
|
|
||||||
|
|
||||||
try (java.io.FileWriter writer = new java.io.FileWriter(outPath)) {
|
|
||||||
writer.write(String.valueOf(sum));
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.err.println("Error: Failed to write sum to " + outputRef + ": " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
130
examples/basic_graph/UnifiedSum.java
Normal file
130
examples/basic_graph/UnifiedSum.java
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
package com.databuild.examples.basic_graph;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
// import static com.databuild.examples.basic_graph.GenerateExecute.BASE_PATH;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unified sum job that handles both configuration and execution via subcommands.
|
||||||
|
*/
|
||||||
|
public class UnifiedSum {
|
||||||
|
public static String BASE_PATH = "/tmp/databuild_test/examples/basic_graph/";
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
if (args.length < 1) {
|
||||||
|
System.err.println("Usage: UnifiedSum {config|exec} [args...]");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
String command = args[0];
|
||||||
|
switch (command) {
|
||||||
|
case "config":
|
||||||
|
handleConfig(Arrays.copyOfRange(args, 1, args.length));
|
||||||
|
break;
|
||||||
|
case "exec":
|
||||||
|
handleExec(Arrays.copyOfRange(args, 1, args.length));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
System.err.println("Unknown command: " + command);
|
||||||
|
System.err.println("Usage: UnifiedSum {config|exec} [args...]");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void handleConfig(String[] args) {
|
||||||
|
if (args.length != 1) {
|
||||||
|
System.err.println("Config mode requires exactly one partition ref");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
String partitionRef = args[0];
|
||||||
|
String[] pathParts = partitionRef.split("/");
|
||||||
|
String[] upstreams = Arrays.stream(pathParts[pathParts.length - 1].split("_"))
|
||||||
|
.map(part -> BASE_PATH + "generated_number/" + part)
|
||||||
|
.toArray(String[]::new);
|
||||||
|
|
||||||
|
try {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
// Create data dependencies
|
||||||
|
var inputs = mapper.createArrayNode();
|
||||||
|
for (String upstream : upstreams) {
|
||||||
|
var dataDep = mapper.createObjectNode();
|
||||||
|
dataDep.put("dep_type", 0); // QUERY
|
||||||
|
var partRef = mapper.createObjectNode();
|
||||||
|
partRef.put("str", upstream);
|
||||||
|
dataDep.set("partition_ref", partRef);
|
||||||
|
inputs.add(dataDep);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create job configuration
|
||||||
|
var config = mapper.createObjectNode();
|
||||||
|
config.set("outputs", mapper.createArrayNode().add(partitionRef));
|
||||||
|
config.set("inputs", inputs);
|
||||||
|
var argsArray = mapper.createArrayNode();
|
||||||
|
for (String upstream : upstreams) {
|
||||||
|
argsArray.add(upstream);
|
||||||
|
}
|
||||||
|
config.set("args", argsArray);
|
||||||
|
config.set("env", mapper.createObjectNode().put("OUTPUT_REF", partitionRef));
|
||||||
|
|
||||||
|
var response = mapper.createObjectNode();
|
||||||
|
response.set("configs", mapper.createArrayNode().add(config));
|
||||||
|
|
||||||
|
System.out.println(mapper.writeValueAsString(response));
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Error creating config: " + e.getMessage());
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void handleExec(String[] args) {
|
||||||
|
// Get output ref from env var OUTPUT_REF
|
||||||
|
String outputRef = System.getenv("OUTPUT_REF");
|
||||||
|
|
||||||
|
if (outputRef == null) {
|
||||||
|
System.err.println("Error: OUTPUT_REF environment variable is required");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each arg, load it from the file system and add it to the sum
|
||||||
|
int sum = 0;
|
||||||
|
for (String partitionRef : args) {
|
||||||
|
try {
|
||||||
|
String path = partitionRef;
|
||||||
|
int partitionValue = Integer.parseInt(new String(java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(path))));
|
||||||
|
System.out.println("Summing partition " + partitionRef + " with value " + partitionValue);
|
||||||
|
sum += partitionValue;
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Error: Failed to read partition " + partitionRef + ": " + e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("Sum of " + args.length + " partitions: " + sum);
|
||||||
|
|
||||||
|
// Write the sum to the output file
|
||||||
|
try {
|
||||||
|
File outputDir = new File(outputRef).getParentFile();
|
||||||
|
if (outputDir != null) {
|
||||||
|
outputDir.mkdirs();
|
||||||
|
}
|
||||||
|
try (FileWriter writer = new FileWriter(outputRef)) {
|
||||||
|
writer.write(String.valueOf(sum));
|
||||||
|
}
|
||||||
|
System.out.println("Wrote sum " + sum + " to " + outputRef);
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Error writing output: " + e.getMessage());
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
# Create a test job config
|
|
||||||
echo "{\"configs\":[{\"outputs\":[\"$1\"],\"inputs\":[],\"args\":[\"will\", \"build\", \"$1\"],\"env\":{\"foo\":\"bar\"}}]}"
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
echo 'EXECUTE!'
|
|
||||||
echo "foo=$foo"
|
|
||||||
echo "args=$@"
|
|
||||||
|
|
@ -11,20 +11,13 @@ compile_pip_requirements(
|
||||||
|
|
||||||
databuild_job(
|
databuild_job(
|
||||||
name = "test_job",
|
name = "test_job",
|
||||||
configure = ":test_job_configure",
|
binary = ":test_job_binary",
|
||||||
execute = ":test_job_execute",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
py_binary(
|
py_binary(
|
||||||
name = "test_job_configure",
|
name = "test_job_binary",
|
||||||
srcs = ["configure.py"],
|
srcs = ["unified_job.py"],
|
||||||
main = "configure.py",
|
main = "unified_job.py",
|
||||||
)
|
|
||||||
|
|
||||||
py_binary(
|
|
||||||
name = "test_job_execute",
|
|
||||||
srcs = ["execute.py"],
|
|
||||||
main = "execute.py",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
py_repl(
|
py_repl(
|
||||||
|
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
|
|
||||||
print("Hello, gorgeous.")
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
|
|
||||||
print("What a time to be alive.")
|
|
||||||
47
examples/podcast_reviews/unified_job.py
Normal file
47
examples/podcast_reviews/unified_job.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: unified_job.py {config|exec} [args...]", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
command = sys.argv[1]
|
||||||
|
|
||||||
|
if command == "config":
|
||||||
|
handle_config(sys.argv[2:])
|
||||||
|
elif command == "exec":
|
||||||
|
handle_exec(sys.argv[2:])
|
||||||
|
else:
|
||||||
|
print(f"Unknown command: {command}", file=sys.stderr)
|
||||||
|
print("Usage: unified_job.py {config|exec} [args...]", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def handle_config(args):
|
||||||
|
if len(args) < 1:
|
||||||
|
print("Config mode requires partition ref", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
partition_ref = args[0]
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"configs": [{
|
||||||
|
"outputs": [partition_ref],
|
||||||
|
"inputs": [],
|
||||||
|
"args": ["Hello", "gorgeous", partition_ref],
|
||||||
|
"env": {"PARTITION_REF": partition_ref}
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
print(json.dumps(config))
|
||||||
|
|
||||||
|
def handle_exec(args):
|
||||||
|
print("What a time to be alive.")
|
||||||
|
print(f"Partition ref: {os.getenv('PARTITION_REF', 'unknown')}")
|
||||||
|
print(f"Args: {args}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import os
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue