Ugh java
This commit is contained in:
parent
6bff5ce658
commit
68608e412f
13 changed files with 167 additions and 46 deletions
|
|
@ -3,3 +3,9 @@ filegroup(
|
|||
srcs = ["//runtime:jq"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "json_schema",
|
||||
srcs = ["databuild.schema.json"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -5,3 +5,4 @@ module(
|
|||
|
||||
bazel_dep(name = "bazel_skylib", version = "1.7.1")
|
||||
bazel_dep(name = "platforms", version = "0.0.11")
|
||||
bazel_dep(name = "rules_shell", version = "0.4.0")
|
||||
|
|
|
|||
|
|
@ -121,7 +121,8 @@
|
|||
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
|
||||
"https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95",
|
||||
"https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
|
||||
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
|
||||
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,21 @@
|
|||
load("@databuild//:rules.bzl", "databuild_graph", "databuild_job")
|
||||
load("@rules_java//java:defs.bzl", "java_binary")
|
||||
|
||||
databuild_graph(
|
||||
name = "basic_graph",
|
||||
jobs = [
|
||||
":generate_number_job",
|
||||
":sum_job",
|
||||
],
|
||||
plan = ":basic_graph_plan",
|
||||
)
|
||||
|
||||
py_binary(
|
||||
name = "basic_graph_plan",
|
||||
srcs = ["basic_graph.py"],
|
||||
main = "basic_graph.py",
|
||||
)
|
||||
|
||||
databuild_job(
|
||||
name = "generate_number_job",
|
||||
configure = ":generate_number_configure",
|
||||
|
|
@ -9,13 +24,20 @@ databuild_job(
|
|||
|
||||
java_binary(
|
||||
name = "generate_number_configure",
|
||||
srcs = ["GenerateConfigure.java"],
|
||||
srcs = glob(["*.java"]),
|
||||
data = ["@databuild//:json_schema"],
|
||||
main_class = "com.databuild.examples.basic_graph.GenerateConfigure",
|
||||
deps = [
|
||||
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
|
||||
"@maven//:com_fasterxml_jackson_core_jackson_core",
|
||||
"@maven//:com_fasterxml_jackson_core_jackson_databind",
|
||||
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
|
||||
],
|
||||
)
|
||||
|
||||
java_binary(
|
||||
name = "generate_number_execute",
|
||||
srcs = ["GenerateExecute.java"],
|
||||
srcs = glob(["GenerateExecute.java"]),
|
||||
main_class = "com.databuild.examples.basic_graph.GenerateExecute",
|
||||
)
|
||||
|
||||
|
|
@ -27,15 +49,22 @@ databuild_job(
|
|||
|
||||
java_binary(
|
||||
name = "sum_configure",
|
||||
srcs = ["SumConfigure.java"],
|
||||
srcs = glob(["*.java"]),
|
||||
data = ["@databuild//:json_schema"],
|
||||
main_class = "com.databuild.examples.basic_graph.SumConfigure",
|
||||
deps = [
|
||||
"@maven//:com_fasterxml_jackson_core_jackson_annotations",
|
||||
"@maven//:com_fasterxml_jackson_core_jackson_core",
|
||||
"@maven//:com_fasterxml_jackson_core_jackson_databind",
|
||||
"@maven//:com_fasterxml_jackson_module_jackson_module_jsonSchema",
|
||||
],
|
||||
)
|
||||
|
||||
java_binary(
|
||||
name = "sum_execute",
|
||||
srcs = [
|
||||
"GenerateExecute.java",
|
||||
srcs = glob([
|
||||
"SumExecute.java",
|
||||
],
|
||||
"GenerateExecute.java",
|
||||
]),
|
||||
main_class = "com.databuild.examples.basic_graph.SumExecute",
|
||||
)
|
||||
|
|
|
|||
7
examples/basic_graph/DataDep.java
Normal file
7
examples/basic_graph/DataDep.java
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
package com.databuild.examples.basic_graph;
|
||||
|
||||
public class DataDep {
|
||||
private String depType; // "query" or "materialize"
|
||||
private String ref;
|
||||
// getters, setters, constructors
|
||||
}
|
||||
|
|
@ -1,6 +1,15 @@
|
|||
package com.databuild.examples.basic_graph;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
|
||||
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Configure class for generating a random number.
|
||||
|
|
@ -15,12 +24,29 @@ public class GenerateConfigure {
|
|||
|
||||
// Process each partition ref from input arguments
|
||||
Arrays.stream(args).forEach(partitionRef -> {
|
||||
// Create a job config for generating a random number
|
||||
String config = String.format(
|
||||
"{\"outputs\":[\"%s\"],\"inputs\":[],\"args\":[\"%s\"],\"env\":{}}",
|
||||
partitionRef, partitionRef
|
||||
);
|
||||
System.out.println(config);
|
||||
// Create and populate JobConfig object
|
||||
JobConfig config = new JobConfig();
|
||||
config.outputs = Collections.singletonList(partitionRef);
|
||||
config.args = Arrays.asList(partitionRef);
|
||||
// inputs and env are already initialized as empty collections in the constructor
|
||||
|
||||
try {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
// Load the schema
|
||||
JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
|
||||
|
||||
// Create JSON Schema validator
|
||||
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
|
||||
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
|
||||
|
||||
// Convert config to JsonNode and serialize
|
||||
JsonNode configNode = mapper.valueToTree(config);
|
||||
String jsonConfig = mapper.writeValueAsString(configNode);
|
||||
System.out.println(jsonConfig);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
|
||||
System.exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
24
examples/basic_graph/JobConfig.java
Normal file
24
examples/basic_graph/JobConfig.java
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
package com.databuild.examples.basic_graph;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
|
||||
|
||||
@JsonAutoDetect(fieldVisibility = Visibility.ANY)
|
||||
public class JobConfig {
|
||||
public List<String> inputs;
|
||||
public List<String> outputs;
|
||||
public List<String> args;
|
||||
public Map<String, String> env;
|
||||
|
||||
// Just one constructor if you want defaults
|
||||
public JobConfig() {
|
||||
this.inputs = new ArrayList<>();
|
||||
this.outputs = new ArrayList<>();
|
||||
this.args = new ArrayList<>();
|
||||
this.env = new HashMap<>();
|
||||
}
|
||||
}
|
||||
|
|
@ -15,3 +15,19 @@ bazel_dep(name = "rules_java", version = "8.11.0")
|
|||
|
||||
# Configure JDK 17
|
||||
register_toolchains("@rules_java//toolchains:all")
|
||||
|
||||
bazel_dep(name = "rules_jvm_external", version = "6.3")
|
||||
|
||||
maven = use_extension("@rules_jvm_external//:extensions.bzl", "maven")
|
||||
maven.install(
|
||||
artifacts = [
|
||||
"com.fasterxml.jackson.core:jackson-core:2.15.2",
|
||||
"com.fasterxml.jackson.core:jackson-databind:2.15.2",
|
||||
"com.fasterxml.jackson.core:jackson-annotations:2.15.2",
|
||||
"com.fasterxml.jackson.module:jackson-module-jsonSchema:2.15.2",
|
||||
],
|
||||
repositories = [
|
||||
"https://repo1.maven.org/maven2",
|
||||
],
|
||||
)
|
||||
use_repo(maven, "maven")
|
||||
|
|
|
|||
|
|
@ -121,7 +121,8 @@
|
|||
"https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
|
||||
"https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.4.0/MODULE.bazel": "0f8f11bb3cd11755f0b48c1de0bbcf62b4b34421023aa41a2fc74ef68d9584f0",
|
||||
"https://bcr.bazel.build/modules/rules_shell/0.4.0/source.json": "1d7fa7f941cd41dc2704ba5b4edc2e2230eea1cc600d80bd2b65838204c50b95",
|
||||
"https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
|
||||
"https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
|
||||
"https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",
|
||||
|
|
|
|||
|
|
@ -2,11 +2,16 @@
|
|||
|
||||
This example demonstrates a databuild_job that generates a random number seeded based on the partition ref.
|
||||
|
||||
## Configure
|
||||
## Multiple Configs
|
||||
|
||||
We can generate numbers for any partition provided (written to `/tmp/databuild/examples/basic_graph`), and so we have
|
||||
a config per partition for demonstration purposes:
|
||||
|
||||
```bash
|
||||
$ bazel run //:generate_number_job.cfg test_output
|
||||
{"outputs":["test_output"],"inputs":[],"args":["test_output"],"env":{}}
|
||||
$ bazel run //:generate_number_job.cfg pippin salem sadie
|
||||
{"outputs":["pippin"],"inputs":[],"args":["pippin"],"env":{}}
|
||||
{"outputs":["salem"],"inputs":[],"args":["salem"],"env":{}}
|
||||
{"outputs":["sadie"],"inputs":[],"args":["sadie"],"env":{}}
|
||||
```
|
||||
|
||||
## Execute
|
||||
|
|
@ -14,21 +19,5 @@ $ bazel run //:generate_number_job.cfg test_output
|
|||
Generates a random number based on the hash of the partition ref and writes it to the output file.
|
||||
|
||||
```bash
|
||||
$ bazel run //:generate_number_job.cfg test_output | bazel run //:generate_number_job
|
||||
```
|
||||
|
||||
This will generate a random number for the partition "test_output" and write it to a file named "test_output".
|
||||
|
||||
You can verify that the random number is stable for the same partition ref by running the command multiple times:
|
||||
|
||||
```bash
|
||||
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
|
||||
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
|
||||
```
|
||||
|
||||
And you can verify that different partition refs produce different random numbers:
|
||||
|
||||
```bash
|
||||
$ bazel run //:generate_number_job.cfg test_output1 | bazel run //:generate_number_job
|
||||
$ bazel run //:generate_number_job.cfg test_output2 | bazel run //:generate_number_job
|
||||
bazel run //:sum_job.cfg pippin_salem_sadie | bazel run //:sum_job
|
||||
```
|
||||
|
|
@ -1,7 +1,15 @@
|
|||
package com.databuild.examples.basic_graph;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
|
||||
import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Configure class for generating a random number.
|
||||
|
|
@ -17,15 +25,28 @@ public class SumConfigure {
|
|||
String partitionRef = args[0];
|
||||
String[] upstreams = partitionRef.split("_");
|
||||
|
||||
// Create a list of quoted upstream values
|
||||
ArrayList<String> quotedUpstreams = new ArrayList<>();
|
||||
Arrays.stream(upstreams).forEach(s -> quotedUpstreams.add("\"" + s + "\""));
|
||||
// Create and populate JobConfig object
|
||||
JobConfig config = new JobConfig();
|
||||
config.outputs = Collections.singletonList(partitionRef);
|
||||
config.args = Arrays.asList(upstreams);
|
||||
// inputs and env are already initialized as empty collections in the constructor
|
||||
|
||||
// Create a job config for generating a random number
|
||||
String config = String.format(
|
||||
"{\"outputs\":[\"%s\"],\"inputs\":[],\"args\":[%s],\"env\":{}}",
|
||||
partitionRef, String.join(",", quotedUpstreams)
|
||||
);
|
||||
System.out.println(config);
|
||||
try {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
// Load the schema
|
||||
JsonNode schemaNode = mapper.readTree(new File("../databuild+/databuild.schema.json"));
|
||||
|
||||
// Create JSON Schema validator
|
||||
JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(mapper);
|
||||
JsonSchema schema = schemaGen.generateSchema(JobConfig.class);
|
||||
|
||||
// Convert config to JsonNode and serialize
|
||||
JsonNode configNode = mapper.valueToTree(config);
|
||||
String jsonConfig = mapper.writeValueAsString(configNode);
|
||||
System.out.println(jsonConfig);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
0
examples/basic_graph/basic_graph.py
Normal file
0
examples/basic_graph/basic_graph.py
Normal file
|
|
@ -116,7 +116,7 @@ _databuild_job_rule = rule(
|
|||
executable = True,
|
||||
)
|
||||
|
||||
def _graph_impl(name, jobs, plan):
|
||||
def _graph_impl(name):
|
||||
pass
|
||||
|
||||
databuild_graph = rule(
|
||||
|
|
|
|||
Loading…
Reference in a new issue