This commit is contained in:
Stuart Axelbrooke 2025-04-17 21:51:15 -07:00
parent 6bff5ce658
commit 68608e412f
No known key found for this signature in database
GPG key ID: 1B0A848C29D46A35
13 changed files with 167 additions and 46 deletions

View file

@ -3,3 +3,9 @@ filegroup(
srcs = ["//runtime:jq"], srcs = ["//runtime:jq"],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )
filegroup(
name = "json_schema",
srcs = ["databuild.schema.json"],
visibility = ["//visibility:public"],
)

View file

@ -5,3 +5,4 @@ module(
bazel_dep(name = "bazel_skylib", version = "1.7.1") bazel_dep(name = "bazel_skylib", version = "1.7.1")
bazel_dep(name = "platforms", version = "0.0.11") bazel_dep(name = "platforms", version = "0.0.11")
bazel_dep(name = "rules_shell", version = "0.4.0")

View file

@ -121,7 +121,8 @@
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
"https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320", "https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c", "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95", "https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95",
"https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",

View file

@ -1,6 +1,21 @@
load("@databuild//:rules.bzl", "databuild_graph", "databuild_job") load("@databuild//:rules.bzl", "databuild_graph", "databuild_job")
load("@rules_java//java:defs.bzl", "java_binary") load("@rules_java//java:defs.bzl", "java_binary")
databuild_graph(
name = "basic_graph",
jobs = [
":generate_number_job",
":sum_job",
],
plan = ":basic_graph_plan",
)
py_binary(
name = "basic_graph_plan",
srcs = ["basic_graph.py"],
main = "basic_graph.py",
)
databuild_job( databuild_job(
name = "generate_number_job", name = "generate_number_job",
configure = ":generate_number_configure", configure = ":generate_number_configure",
@ -9,13 +24,20 @@ databuild_job(
java_binary( java_binary(
name = "generate_number_configure", name = "generate_number_configure",
srcs = ["GenerateConfigure.java"], srcs = glob(["*.java"]),
data = ["@databuild//:json_schema"],
main_class = "com.databuild.examples.basic_graph.GenerateConfigure", main_class = "com.databuild.examples.basic_graph.GenerateConfigure",
deps = [
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
"@maven//:com_fasterxml_jackson_core_jackson_core",
"@maven//:com_fasterxml_jackson_core_jackson_databind",
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
],
) )
java_binary( java_binary(
name = "generate_number_execute", name = "generate_number_execute",
srcs = ["GenerateExecute.java"], srcs = glob(["GenerateExecute.java"]),
main_class = "com.databuild.examples.basic_graph.GenerateExecute", main_class = "com.databuild.examples.basic_graph.GenerateExecute",
) )
@ -27,15 +49,22 @@ databuild_job(
java_binary( java_binary(
name = "sum_configure", name = "sum_configure",
srcs = ["SumConfigure.java"], srcs = glob(["*.java"]),
data = ["@databuild//:json_schema"],
main_class = "com.databuild.examples.basic_graph.SumConfigure", main_class = "com.databuild.examples.basic_graph.SumConfigure",
deps = [
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
"@maven//:com_fasterxml_jackson_core_jackson_core",
"@maven//:com_fasterxml_jackson_core_jackson_databind",
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
],
) )
java_binary( java_binary(
name = "sum_execute", name = "sum_execute",
srcs = [ srcs = glob([
"GenerateExecute.java",
"SumExecute.java", "SumExecute.java",
], "GenerateExecute.java",
]),
main_class = "com.databuild.examples.basic_graph.SumExecute", main_class = "com.databuild.examples.basic_graph.SumExecute",
) )

View file

@ -0,0 +1,7 @@
package com.databuild.examples.basic_graph;
public class DataDep {
private String depType; // "query" or "materialize"
private String ref;
// getters, setters, constructors
}

View file

@ -1,6 +1,15 @@
package com.databuild.examples.basic_graph; package com.databuild.examples.basic_graph;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import java.io.File;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
/** /**
* Configure class for generating a random number. * Configure class for generating a random number.
@ -15,12 +24,29 @@ public class GenerateConfigure {
// Process each partition ref from input arguments // Process each partition ref from input arguments
Arrays.stream(args).forEach(partitionRef -> { Arrays.stream(args).forEach(partitionRef -> {
// Create a job config for generating a random number // Create and populate JobConfig object
String config = String.format( JobConfig config = new JobConfig();
"{\"outputs\":[\"%s\"],\"inputs\":[],\"args\":[\"%s\"],\"env\":{}}", config.outputs = Collections.singletonList(partitionRef);
partitionRef, partitionRef config.args = Arrays.asList(partitionRef);
); // inputs and env are already initialized as empty collections in the constructor
System.out.println(config);
try {
ObjectMapper mapper = new ObjectMapper();
// Load the schema
JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
// Create JSON Schema validator
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);
String jsonConfig = mapper.writeValueAsString(configNode);
System.out.println(jsonConfig);
} catch (Exception e) {
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
System.exit(1);
}
}); });
} }
} }

View file

@ -0,0 +1,24 @@
package com.databuild.examples.basic_graph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
@JsonAutoDetect(fieldVisibility = Visibility.ANY)
public class JobConfig {
public List<String> inputs;
public List<String> outputs;
public List<String> args;
public Map<String, String> env;
// Just one constructor if you want defaults
public JobConfig() {
this.inputs = new ArrayList<>();
this.outputs = new ArrayList<>();
this.args = new ArrayList<>();
this.env = new HashMap<>();
}
}

View file

@ -15,3 +15,19 @@ bazel_dep(name = "rules_java", version = "8.11.0")
# Configure JDK 17 # Configure JDK 17
register_toolchains("@rules_java//toolchains:all") register_toolchains("@rules_java//toolchains:all")
bazel_dep(name = "rules_jvm_external", version = "6.3")
maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven")
maven.install(
artifacts = [
"com.fasterxml.jackson.core:jackson-core:2.15.2",
"com.fasterxml.jackson.core:jackson-databind:2.15.2",
"com.fasterxml.jackson.core:jackson-annotations:2.15.2",
"com.fasterxml.jackson.module:jackson-module-jsonSchema:2.15.2",
],
repositories = [
"https://repo1.maven.org/maven2",
],
)
use_repo(maven, "maven")

View file

@ -121,7 +121,8 @@
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
"https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320", "https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c", "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95", "https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95",
"https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",

View file

@ -2,11 +2,16 @@
This example demonstrates a databuild_job that generates a random number seeded based on the partition ref. This example demonstrates a databuild_job that generates a random number seeded based on the partition ref.
## Configure ## Multiple Configs
We can generate numbers for any partition provided (written to `/tmp/databuild/examples/basic_graph`), and so we have
a config per partition for demonstration purposes:
```bash ```bash
$ bazel run //:generate_number_job.cfg test_output $ bazel run //:generate_number_job.cfg pippin salem sadie
{"outputs":["test_output"],"inputs":[],"args":["test_output"],"env":{}} {"outputs":["pippin"],"inputs":[],"args":["pippin"],"env":{}}
{"outputs":["salem"],"inputs":[],"args":["salem"],"env":{}}
{"outputs":["sadie"],"inputs":[],"args":["sadie"],"env":{}}
``` ```
## Execute ## Execute
@ -14,21 +19,5 @@ $ bazel run //:generate_number_job.cfg test_output
Generates a random number based on the hash of the partition ref and writes it to the output file. Generates a random number based on the hash of the partition ref and writes it to the output file.
```bash ```bash
$ bazel run //:generate_number_job.cfg test_output | bazel run //:generate_number_job bazel run //:sum_job.cfg pippin_salem_sadie | bazel run //:sum_job
``` ```
This will generate a random number for the partition "test_output" and write it to a file named "test_output".
You can verify that the random number is stable for the same partition ref by running the command multiple times:
```bash
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
```
And you can verify that different partition refs produce different random numbers:
```bash
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
$ bazel run //:generate_number_job.cfg test_output2 | bazel run //:generate_number_job
```

View file

@ -1,7 +1,15 @@
package com.databuild.examples.basic_graph; package com.databuild.examples.basic_graph;
import java.util.ArrayList; import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import java.io.File;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
/** /**
* Configure class for generating a random number. * Configure class for generating a random number.
@ -17,15 +25,28 @@ public class SumConfigure {
String partitionRef = args[0]; String partitionRef = args[0];
String[] upstreams = partitionRef.split("_"); String[] upstreams = partitionRef.split("_");
// Create a list of quoted upstream values // Create and populate JobConfig object
ArrayList<String> quotedUpstreams = new ArrayList<>(); JobConfig config = new JobConfig();
Arrays.stream(upstreams).forEach(s -> quotedUpstreams.add("\"" + s + "\"")); config.outputs = Collections.singletonList(partitionRef);
config.args = Arrays.asList(upstreams);
// inputs and env are already initialized as empty collections in the constructor
// Create a job config for generating a random number try {
String config = String.format( ObjectMapper mapper = new ObjectMapper();
"{\"outputs\":[\"%s\"],\"inputs\":[],\"args\":[%s],\"env\":{}}", // Load the schema
partitionRef, String.join(",", quotedUpstreams) JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
);
System.out.println(config); // Create JSON Schema validator
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);
String jsonConfig = mapper.writeValueAsString(configNode);
System.out.println(jsonConfig);
} catch (Exception e) {
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
System.exit(1);
}
} }
} }

View file

View file

@ -116,7 +116,7 @@ _databuild_job_rule = rule(
executable = True, executable = True,
) )
def _graph_impl(name, jobs, plan): def _graph_impl(name):
pass pass
databuild_graph = rule( databuild_graph = rule(