From fda0dedea3414c379c0649774db1077577d83bb6 Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke Date: Thu, 17 Apr 2025 16:50:13 -0700 Subject: [PATCH] Switch to java for basic graph impl --- examples/basic_graph/.bazelrc | 2 + examples/basic_graph/BUILD.bazel | 11 +++-- examples/basic_graph/Configure.java | 24 +++++++++++ examples/basic_graph/Execute.java | 63 +++++++++++++++++++++++++++++ examples/basic_graph/MODULE.bazel | 6 +++ examples/basic_graph/configure.sh | 5 --- examples/basic_graph/execute.sh | 19 --------- 7 files changed, 102 insertions(+), 28 deletions(-) create mode 100644 examples/basic_graph/.bazelrc create mode 100644 examples/basic_graph/Configure.java create mode 100644 examples/basic_graph/Execute.java delete mode 100755 examples/basic_graph/configure.sh delete mode 100755 examples/basic_graph/execute.sh diff --git a/examples/basic_graph/.bazelrc b/examples/basic_graph/.bazelrc new file mode 100644 index 0000000..aec09ba --- /dev/null +++ b/examples/basic_graph/.bazelrc @@ -0,0 +1,2 @@ +build --java_runtime_version=21 +build --tool_java_runtime_version=21 diff --git a/examples/basic_graph/BUILD.bazel b/examples/basic_graph/BUILD.bazel index c8aa0df..a774521 100644 --- a/examples/basic_graph/BUILD.bazel +++ b/examples/basic_graph/BUILD.bazel @@ -1,4 +1,5 @@ load("@databuild//:rules.bzl", "databuild_graph", "databuild_job") +load("@rules_java//java:defs.bzl", "java_binary") databuild_job( name = "generate_number_job", @@ -6,12 +7,14 @@ databuild_job( execute = ":generate_number_execute", ) -sh_binary( +java_binary( name = "generate_number_configure", - srcs = ["configure.sh"], + srcs = ["Configure.java"], + main_class = "com.databuild.examples.basic_graph.Configure", ) -sh_binary( +java_binary( name = "generate_number_execute", - srcs = ["execute.sh"], + srcs = ["Execute.java"], + main_class = "com.databuild.examples.basic_graph.Execute", ) diff --git a/examples/basic_graph/Configure.java b/examples/basic_graph/Configure.java new file mode 100644 index 0000000..b0277f3 --- /dev/null +++ b/examples/basic_graph/Configure.java @@ -0,0 +1,24 @@ +package com.databuild.examples.basic_graph; + +/** + * Configure class for generating a random number. + * This class creates a job configuration for generating a random number based on the partition ref. + */ +public class Configure { + public static void main(String[] args) { + if (args.length < 1) { + System.err.println("Error: Partition ref (output path) is required"); + System.exit(1); + } + + String partitionRef = args[0]; + + // Create a job config for generating a random number + String config = String.format( + "{\"outputs\":[\"%s\"],\"inputs\":[],\"args\":[\"%s\"],\"env\":{}}", + partitionRef, partitionRef + ); + + System.out.println(config); + } +} \ No newline at end of file diff --git a/examples/basic_graph/Execute.java b/examples/basic_graph/Execute.java new file mode 100644 index 0000000..1377718 --- /dev/null +++ b/examples/basic_graph/Execute.java @@ -0,0 +1,63 @@ +package com.databuild.examples.basic_graph; + +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Random; + +/** + * Execute class for generating a random number. + * This class generates a random number based on the partition ref. + */ +public class Execute { + public static void main(String[] args) { + if (args.length < 1) { + System.err.println("Error: Partition ref (output path) is required"); + System.exit(1); + } + + String partitionRef = args[0]; + + try { + // Create a hash of the partition ref to use as a seed + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] hashBytes = md.digest(partitionRef.getBytes(StandardCharsets.UTF_8)); + + // Convert the first 8 bytes of the hash to a long to use as a seed + long seed = 0; + for (int i = 0; i < Math.min(8, hashBytes.length); i++) { + seed = (seed << 8) | (hashBytes[i] & 0xff); + } + + // Create a random number generator with the seed + Random random = new Random(seed); + + // Generate a random number + int randomNumber = random.nextInt(Integer.MAX_VALUE); + + // Write the random number to the output file + try (FileWriter writer = new FileWriter(partitionRef)) { + writer.write("Random number for partition " + partitionRef + ": " + randomNumber); + } + + System.out.println("Generated random number " + randomNumber + " for partition " + partitionRef); + + // Write the random number to the output file + String outputPath = "/tmp/databuild/examples/basic_graph/" + partitionRef; + System.out.println("Writing random number " + randomNumber + " to " + outputPath); + // Ensure dir exists + new java.io.File(outputPath).getParentFile().mkdirs(); + // Write number + try (FileWriter writer = new FileWriter(outputPath)) { + writer.write(String.valueOf(randomNumber)); + } + + } catch (NoSuchAlgorithmException | IOException e) { + System.err.println("Error: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } + } +} \ No newline at end of file diff --git a/examples/basic_graph/MODULE.bazel b/examples/basic_graph/MODULE.bazel index acdc3b1..62a1915 100644 --- a/examples/basic_graph/MODULE.bazel +++ b/examples/basic_graph/MODULE.bazel @@ -9,3 +9,9 @@ local_path_override( module_name = "databuild", path = "../..", ) + +# Java dependencies +bazel_dep(name = "rules_java", version = "8.11.0") + +# Configure JDK 17 +register_toolchains("@rules_java//toolchains:all") diff --git a/examples/basic_graph/configure.sh b/examples/basic_graph/configure.sh deleted file mode 100755 index 3ae6754..0000000 --- a/examples/basic_graph/configure.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -# Create a job config for generating a random number -# $1 is the partition ref (output path) -echo "{\"outputs\":[\"$1\"],\"inputs\":[],\"args\":[\"$1\"],\"env\":{}}" \ No newline at end of file diff --git a/examples/basic_graph/execute.sh b/examples/basic_graph/execute.sh deleted file mode 100755 index 1657517..0000000 --- a/examples/basic_graph/execute.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Generate a random number based on the partition ref -# $1 is the partition ref (output path) - -# Create a hash of the partition ref to use as a seed -SEED=$(echo "$1" | md5sum | awk '{print $1}') -# Convert the hex hash to a decimal number (using the first 8 characters) -SEED_DEC=$((16#${SEED:0:8})) - -# Set the random seed -RANDOM=$SEED_DEC - -# Generate a random number -RANDOM_NUMBER=$RANDOM - -# Write the random number to the output file -echo "Random number for partition $1: $RANDOM_NUMBER" > "$1" -echo "Generated random number $RANDOM_NUMBER for partition $1" \ No newline at end of file