This commit is contained in:
Stuart Axelbrooke 2025-04-17 21:51:15 -07:00
parent 6bff5ce658
commit 68608e412f
No known key found for this signature in database
GPG key ID: 1B0A848C29D46A35
13 changed files with 167 additions and 46 deletions

View file

@ -3,3 +3,9 @@ filegroup(
srcs = ["//runtime:jq"],
visibility = ["//visibility:public"],
)
filegroup(
name = "json_schema",
srcs = ["databuild.schema.json"],
visibility = ["//visibility:public"],
)

View file

@ -5,3 +5,4 @@ module(
bazel_dep(name = "bazel_skylib", version = "1.7.1")
bazel_dep(name = "platforms", version = "0.0.11")
bazel_dep(name = "rules_shell", version = "0.4.0")

View file

@ -121,7 +121,8 @@
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
"https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95",
"https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",

View file

@ -1,6 +1,21 @@
load("@databuild//:rules.bzl", "databuild_graph", "databuild_job")
load("@rules_java//java:defs.bzl", "java_binary")
databuild_graph(
name = "basic_graph",
jobs = [
":generate_number_job",
":sum_job",
],
plan = ":basic_graph_plan",
)
py_binary(
name = "basic_graph_plan",
srcs = ["basic_graph.py"],
main = "basic_graph.py",
)
databuild_job(
name = "generate_number_job",
configure = ":generate_number_configure",
@ -9,13 +24,20 @@ databuild_job(
java_binary(
name = "generate_number_configure",
srcs = ["GenerateConfigure.java"],
srcs = glob(["*.java"]),
data = ["@databuild//:json_schema"],
main_class = "com.databuild.examples.basic_graph.GenerateConfigure",
deps = [
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
"@maven//:com_fasterxml_jackson_core_jackson_core",
"@maven//:com_fasterxml_jackson_core_jackson_databind",
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
],
)
java_binary(
name = "generate_number_execute",
srcs = ["GenerateExecute.java"],
srcs = glob(["GenerateExecute.java"]),
main_class = "com.databuild.examples.basic_graph.GenerateExecute",
)
@ -27,15 +49,22 @@ databuild_job(
java_binary(
name = "sum_configure",
srcs = ["SumConfigure.java"],
srcs = glob(["*.java"]),
data = ["@databuild//:json_schema"],
main_class = "com.databuild.examples.basic_graph.SumConfigure",
deps = [
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
"@maven//:com_fasterxml_jackson_core_jackson_core",
"@maven//:com_fasterxml_jackson_core_jackson_databind",
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
],
)
java_binary(
name = "sum_execute",
srcs = [
"GenerateExecute.java",
srcs = glob([
"SumExecute.java",
],
"GenerateExecute.java",
]),
main_class = "com.databuild.examples.basic_graph.SumExecute",
)

View file

@ -0,0 +1,7 @@
package com.databuild.examples.basic_graph;
public class DataDep {
private String depType; // "query" or "materialize"
private String ref;
// getters, setters, constructors
}

View file

@ -1,6 +1,15 @@
package com.databuild.examples.basic_graph;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
/**
* Configure class for generating a random number.
@ -15,12 +24,29 @@ public class GenerateConfigure {
// Process each partition ref from input arguments
Arrays.stream(args).forEach(partitionRef -> {
// Create a job config for generating a random number
String config = String.format(
"{\"outputs\":[\"%s\"],\"inputs\":[],\"args\":[\"%s\"],\"env\":{}}",
partitionRef, partitionRef
);
System.out.println(config);
// Create and populate JobConfig object
JobConfig config = new JobConfig();
config.outputs = Collections.singletonList(partitionRef);
config.args = Arrays.asList(partitionRef);
// inputs and env are already initialized as empty collections in the constructor
try {
ObjectMapper mapper = new ObjectMapper();
// Load the schema
JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
// Create JSON Schema validator
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);
String jsonConfig = mapper.writeValueAsString(configNode);
System.out.println(jsonConfig);
} catch (Exception e) {
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
System.exit(1);
}
});
}
}

View file

@ -0,0 +1,24 @@
package com.databuild.examples.basic_graph;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
@JsonAutoDetect(fieldVisibility = Visibility.ANY)
public class JobConfig {
public List<String> inputs;
public List<String> outputs;
public List<String> args;
public Map<String, String> env;
// Just one constructor if you want defaults
public JobConfig() {
this.inputs = new ArrayList<>();
this.outputs = new ArrayList<>();
this.args = new ArrayList<>();
this.env = new HashMap<>();
}
}

View file

@ -15,3 +15,19 @@ bazel_dep(name = "rules_java", version = "8.11.0")
# Configure JDK 17
register_toolchains("@rules_java//toolchains:all")
bazel_dep(name = "rules_jvm_external", version = "6.3")
maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven")
maven.install(
artifacts = [
"com.fasterxml.jackson.core:jackson-core:2.15.2",
"com.fasterxml.jackson.core:jackson-databind:2.15.2",
"com.fasterxml.jackson.core:jackson-annotations:2.15.2",
"com.fasterxml.jackson.module:jackson-module-jsonSchema:2.15.2",
],
repositories = [
"https://repo1.maven.org/maven2",
],
)
use_repo(maven, "maven")

View file

@ -121,7 +121,8 @@
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
"https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
"https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0",
"https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95",
"https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",

View file

@ -2,11 +2,16 @@
This example demonstrates a databuild_job that generates a random number seeded based on the partition ref.
## Configure
## Multiple Configs
We can generate numbers for any partition provided (written to `/tmp/databuild/examples/basic_graph`), and so we have
a config per partition for demonstration purposes:
```bash
$ bazel run //:generate_number_job.cfg test_output
{"outputs":["test_output"],"inputs":[],"args":["test_output"],"env":{}}
$ bazel run //:generate_number_job.cfg pippin salem sadie
{"outputs":["pippin"],"inputs":[],"args":["pippin"],"env":{}}
{"outputs":["salem"],"inputs":[],"args":["salem"],"env":{}}
{"outputs":["sadie"],"inputs":[],"args":["sadie"],"env":{}}
```
## Execute
@ -14,21 +19,5 @@ $ bazel run //:generate_number_job.cfg test_output
Generates a random number based on the hash of the partition ref and writes it to the output file.
```bash
$ bazel run //:generate_number_job.cfg test_output | bazel run //:generate_number_job
```
This will generate a random number for the partition "test_output" and write it to a file named "test_output".
You can verify that the random number is stable for the same partition ref by running the command multiple times:
```bash
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
```
And you can verify that different partition refs produce different random numbers:
```bash
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
$ bazel run //:generate_number_job.cfg test_output2 | bazel run //:generate_number_job
bazel run //:sum_job.cfg pippin_salem_sadie | bazel run //:sum_job
```

View file

@ -1,7 +1,15 @@
package com.databuild.examples.basic_graph;
import java.util.ArrayList;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
/**
* Configure class for generating a random number.
@ -17,15 +25,28 @@ public class SumConfigure {
String partitionRef = args[0];
String[] upstreams = partitionRef.split("_");
// Create a list of quoted upstream values
ArrayList<String> quotedUpstreams = new ArrayList<>();
Arrays.stream(upstreams).forEach(s -> quotedUpstreams.add("\"" + s + "\""));
// Create and populate JobConfig object
JobConfig config = new JobConfig();
config.outputs = Collections.singletonList(partitionRef);
config.args = Arrays.asList(upstreams);
// inputs and env are already initialized as empty collections in the constructor
// Create a job config for generating a random number
String config = String.format(
"{\"outputs\":[\"%s\"],\"inputs\":[],\"args\":[%s],\"env\":{}}",
partitionRef, String.join(",", quotedUpstreams)
);
System.out.println(config);
try {
ObjectMapper mapper = new ObjectMapper();
// Load the schema
JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
// Create JSON Schema validator
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);
String jsonConfig = mapper.writeValueAsString(configNode);
System.out.println(jsonConfig);
} catch (Exception e) {
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
System.exit(1);
}
}
}

View file

View file

@ -116,7 +116,7 @@ _databuild_job_rule = rule(
executable = True,
)
def _graph_impl(name, jobs, plan):
def _graph_impl(name):
pass
databuild_graph = rule(