graph analyze

This commit is contained in:
Stuart Axelbrooke 2025-04-18 22:35:20 -07:00
parent db993ca747
commit 3c6b2b034d
No known key found for this signature in database
GPG key ID: 1B0A848C29D46A35
11 changed files with 64 additions and 25 deletions

View file

@ -1,7 +1,6 @@
package com.databuild.examples.basic_graph;
public class DataDep {
private String depType; // "query" or "materialize"
private String ref;
// getters, setters, constructors
public String depType; // "query" or "materialize"
public String ref;
}

View file

@ -3,6 +3,8 @@ package com.databuild.examples.basic_graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import java.util.ArrayList;
import java.util.List;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
@ -17,6 +19,7 @@ public class GenerateConfigure {
System.err.println("Error: Partition ref is required");
System.exit(1);
}
List<JobConfig> configList = new ArrayList<>();
// Process each partition ref from input arguments
Arrays.stream(args).forEach(partitionRef -> {
@ -25,18 +28,19 @@ public class GenerateConfigure {
config.outputs = Collections.singletonList(partitionRef);
config.args = Arrays.asList(partitionRef);
// inputs and env are already initialized as empty collections in the constructor
try {
ObjectMapper mapper = new ObjectMapper();
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);
String jsonConfig = mapper.writeValueAsString(configNode);
System.out.println(jsonConfig);
} catch (Exception e) {
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
System.exit(1);
}
configList.add(config);
});
try {
ObjectMapper mapper = new ObjectMapper();
// Convert config list to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(configList);
String jsonConfig = mapper.writeValueAsString(configNode);
System.out.println(jsonConfig);
} catch (Exception e) {
System.err.println("Error: Failed to validate or serialize config: " + e.getMessage());
System.exit(1);
}
}
}

View file

@ -9,7 +9,7 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
@JsonAutoDetect(fieldVisibility = Visibility.ANY)
public class JobConfig {
public List<String> inputs;
public List<DataDep> inputs;
public List<String> outputs;
public List<String> args;
public Map<String, String> env;

View file

@ -6,6 +6,9 @@ import com.fasterxml.jackson.databind.JsonNode;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.stream.Collectors;
import static com.databuild.examples.basic_graph.GenerateExecute.BASE_PATH;
/**
* Configure class for generating a random number.
@ -19,12 +22,23 @@ public class SumConfigure {
}
String partitionRef = args[0];
String[] upstreams = partitionRef.split("_");
String[] pathParts = partitionRef.split("/");
String[] upstreams = Arrays.stream(pathParts[pathParts.length - 1].split("_"))
.map(part -> BASE_PATH + "generated_number/" + part)
.toArray(String[]::new);
// Create and populate JobConfig object
JobConfig config = new JobConfig();
config.outputs = Collections.singletonList(partitionRef);
config.args = Arrays.asList(upstreams);
config.inputs = Arrays.stream(upstreams)
.map(upstream -> {
DataDep dep = new DataDep();
dep.depType = "materialize";
dep.ref = upstream;
return dep;
})
.collect(Collectors.toList());
config.args = Collections.singletonList(partitionRef);
// Create a hashmap for env with {"OUTPUT_REF": "foo"}
config.env = Collections.singletonMap("OUTPUT_REF", args[0]);
// inputs and env are already initialized as empty collections in the constructor
@ -33,7 +47,7 @@ public class SumConfigure {
ObjectMapper mapper = new ObjectMapper();
// Convert config to JsonNode and serialize
JsonNode configNode = mapper.valueToTree(config);
JsonNode configNode = mapper.valueToTree(Collections.singletonList(config));
String jsonConfig = mapper.writeValueAsString(configNode);
System.out.println(jsonConfig);
} catch (Exception e) {

View file

@ -12,6 +12,7 @@ def main():
# Partition output prefix makes it obvious which job should fulfill
for ref in output_refs:
print(ref, file=sys.stderr)
body, tail = ref.rsplit("/", 1)
if "generated_number" in body:
result["//:generate_number_job"].append(ref)

View file

@ -1,14 +1,15 @@
#!/bin/bash
set -e
# Test configure
generate_number_job.cfg pippin salem sadie
# Test run
generate_number_job.cfg pippin | generate_number_job.exec
generate_number_job.cfg pippin | jq -c ".[0]" | generate_number_job.exec
# Validate that contents of pippin is 43
if [[ "$(cat /tmp/databuild/examples/basic_graph/generated_number/pippin)" != "43" ]]; then
echo "Assertion failed: File does not contain 43"
cat /tmp/databuild/examples/basic_graph/generated_number/pippin
exit 1
fi

View file

@ -1,4 +1,5 @@
#!/usr/bin/env bash
set -e
basic_graph.lookup /tmp/databuild/examples/basic_graph/generated_number/pippin_salem_sadie

View file

@ -1,4 +1,5 @@
#!/bin/bash
set -e
# Test configure
sum_job.cfg pippin_salem_sadie
@ -7,11 +8,12 @@ sum_job.cfg pippin_salem_sadie
echo -n 43 > /tmp/databuild/examples/basic_graph/generated_number/pippin
echo -n 56 > /tmp/databuild/examples/basic_graph/generated_number/salem
echo -n 40 > /tmp/databuild/examples/basic_graph/generated_number/sadie
sum_job.cfg pippin_salem_sadie | sum_job.exec
sum_job.cfg pippin_salem_sadie | jq -c ".[0]" | sum_job.exec
# Validate that contents of pippin is 43
if [[ "$(cat /tmp/databuild/examples/basic_graph/pippin_salem_sadie)" != "139" ]]; then
echo "Assertion failed: File does not contain 139"
cat /tmp/databuild/examples/basic_graph/sum/pippin_salem_sadie
exit 1
fi

View file

@ -1,2 +1,2 @@
# Create a test job config
echo "{\"outputs\":[\"$1\"],\"inputs\":[],\"args\":[\"will\", \"build\", \"$1\"],\"env\":{\"foo\":\"bar\"}}"
echo "[{\"outputs\":[\"$1\"],\"inputs\":[],\"args\":[\"will\", \"build\", \"$1\"],\"env\":{\"foo\":\"bar\"}}]"

View file

@ -2,4 +2,4 @@
test_job.cfg nice
test_job.cfg cool | test_job.exec
test_job.cfg cool | jq -c ".[0]" | test_job.exec

View file

@ -17,6 +17,23 @@ if [[ -z "$CONFIG_FILE" ]] || [[ "$CONFIG_FILE" == "-" ]]; then
trap 'rm -f "$TMP_CONFIG"' EXIT
fi
# Use jq to validate the config file
# First check if the file starts with { and ends with }
if [[ $(head -c 1 "$CONFIG_FILE") != "{" ]] || [[ $(tail -c 2 "$CONFIG_FILE" | head -c 1) != "}" ]]; then
echo "The config file must be a non-empty JSON object:"
cat $CONFIG_FILE
exit 1
fi
# Then validate that it parses
if ! $JQ 'type == "object"' $CONFIG_FILE > /dev/null 2>&1; then
echo "The config file must be a non-empty JSON object:"
cat $CONFIG_FILE
exit 1
fi
# Should be a single JSON object
# Extract and set environment variables from the config
eval "$("$JQ" -r '.env | to_entries | .[] | "export " + .key + "=\"" + .value + "\""' "$CONFIG_FILE")"