From f92cfeb9b54f75692c5dd10c8e2ea0d7d606fa41 Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke Date: Sat, 16 Aug 2025 15:37:47 -0700 Subject: [PATCH] Add test app generated package --- databuild/test/app/dsl/generated/BUILD.bazel | 71 +++++++++++++++++++ .../dsl/generated/aggregate_color_votes.py | 58 +++++++++++++++ .../dsl/generated/color_vote_report_calc.py | 58 +++++++++++++++ .../test/app/dsl/generated/dsl_job_lookup.py | 53 ++++++++++++++ .../app/dsl/generated/ingest_color_votes.py | 58 +++++++++++++++ .../app/dsl/generated/trailing_color_votes.py | 58 +++++++++++++++ 6 files changed, 356 insertions(+) create mode 100644 databuild/test/app/dsl/generated/BUILD.bazel create mode 100755 databuild/test/app/dsl/generated/aggregate_color_votes.py create mode 100755 databuild/test/app/dsl/generated/color_vote_report_calc.py create mode 100755 databuild/test/app/dsl/generated/dsl_job_lookup.py create mode 100755 databuild/test/app/dsl/generated/ingest_color_votes.py create mode 100755 databuild/test/app/dsl/generated/trailing_color_votes.py diff --git a/databuild/test/app/dsl/generated/BUILD.bazel b/databuild/test/app/dsl/generated/BUILD.bazel new file mode 100644 index 0000000..215a549 --- /dev/null +++ b/databuild/test/app/dsl/generated/BUILD.bazel @@ -0,0 +1,71 @@ +load("@databuild//databuild:rules.bzl", "databuild_job", "databuild_graph") + +# Generated by DataBuild DSL - do not edit manually +# This file is generated in a subdirectory to avoid overwriting the original BUILD.bazel + +py_binary( + name = "aggregate_color_votes_binary", + srcs = ["aggregate_color_votes.py"], + main = "aggregate_color_votes.py", + deps = ["@@//databuild/test/app/dsl:dsl_src"], +) + +databuild_job( + name = "aggregate_color_votes", + binary = ":aggregate_color_votes_binary", +) +py_binary( + name = "color_vote_report_calc_binary", + srcs = ["color_vote_report_calc.py"], + main = "color_vote_report_calc.py", + deps = ["@@//databuild/test/app/dsl:dsl_src"], +) + +databuild_job( + name = "color_vote_report_calc", + binary = ":color_vote_report_calc_binary", +) +py_binary( + name = "ingest_color_votes_binary", + srcs = ["ingest_color_votes.py"], + main = "ingest_color_votes.py", + deps = ["@@//databuild/test/app/dsl:dsl_src"], +) + +databuild_job( + name = "ingest_color_votes", + binary = ":ingest_color_votes_binary", +) +py_binary( + name = "trailing_color_votes_binary", + srcs = ["trailing_color_votes.py"], + main = "trailing_color_votes.py", + deps = ["@@//databuild/test/app/dsl:dsl_src"], +) + +databuild_job( + name = "trailing_color_votes", + binary = ":trailing_color_votes_binary", +) + +py_binary( + name = "dsl_job_lookup", + srcs = ["dsl_job_lookup.py"], + deps = ["@@//databuild/test/app/dsl:dsl_src"], +) + +databuild_graph( + name = "dsl_graph", + jobs = ["aggregate_color_votes", "color_vote_report_calc", "ingest_color_votes", "trailing_color_votes"], + lookup = ":dsl_job_lookup", + visibility = ["//visibility:public"], +) + +# Create tar archive of generated files for testing +genrule( + name = "existing_generated", + srcs = glob(["*.py", "BUILD.bazel"]), + outs = ["existing_generated.tar"], + cmd = "mkdir -p temp && cp $(SRCS) temp/ && find temp -exec touch -t 197001010000 {} + && tar -cf $@ -C temp .", + visibility = ["//visibility:public"], +) diff --git a/databuild/test/app/dsl/generated/aggregate_color_votes.py b/databuild/test/app/dsl/generated/aggregate_color_votes.py new file mode 100755 index 0000000..59af193 --- /dev/null +++ b/databuild/test/app/dsl/generated/aggregate_color_votes.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Generated job script for AggregateColorVotes. +""" + +import sys +import json +from databuild.test.app.dsl.graph import AggregateColorVotes +from databuild.proto import PartitionRef, JobConfigureResponse, to_dict + + +def parse_outputs_from_args(args: list[str]) -> list: + """Parse partition output references from command line arguments.""" + outputs = [] + for arg in args: + # Find which output type can deserialize this partition reference + for output_type in AggregateColorVotes.output_types: + try: + partition = output_type.deserialize(arg) + outputs.append(partition) + break + except ValueError: + continue + else: + raise ValueError(f"No output type in AggregateColorVotes can deserialize partition ref: {arg}") + + return outputs + + +if __name__ == "__main__": + if len(sys.argv) < 2: + raise Exception(f"Invalid command usage") + + command = sys.argv[1] + job_instance = AggregateColorVotes() + + if command == "config": + # Parse output partition references as PartitionRef objects (for Rust wrapper) + output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]] + + # Also parse them into DSL partition objects (for DSL job.config()) + outputs = parse_outputs_from_args(sys.argv[2:]) + + # Call job's config method - returns list[JobConfig] + configs = job_instance.config(outputs) + + # Wrap in JobConfigureResponse and serialize using to_dict() + response = JobConfigureResponse(configs=configs) + print(json.dumps(to_dict(response))) + + elif command == "exec": + # The exec method expects a JobConfig but the Rust wrapper passes args + # For now, let the DSL job handle the args directly + # TODO: This needs to be refined based on actual Rust wrapper interface + job_instance.exec(*sys.argv[2:]) + + else: + raise Exception(f"Invalid command `{sys.argv[1]}`") diff --git a/databuild/test/app/dsl/generated/color_vote_report_calc.py b/databuild/test/app/dsl/generated/color_vote_report_calc.py new file mode 100755 index 0000000..e538772 --- /dev/null +++ b/databuild/test/app/dsl/generated/color_vote_report_calc.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Generated job script for ColorVoteReportCalc. +""" + +import sys +import json +from databuild.test.app.dsl.graph import ColorVoteReportCalc +from databuild.proto import PartitionRef, JobConfigureResponse, to_dict + + +def parse_outputs_from_args(args: list[str]) -> list: + """Parse partition output references from command line arguments.""" + outputs = [] + for arg in args: + # Find which output type can deserialize this partition reference + for output_type in ColorVoteReportCalc.output_types: + try: + partition = output_type.deserialize(arg) + outputs.append(partition) + break + except ValueError: + continue + else: + raise ValueError(f"No output type in ColorVoteReportCalc can deserialize partition ref: {arg}") + + return outputs + + +if __name__ == "__main__": + if len(sys.argv) < 2: + raise Exception(f"Invalid command usage") + + command = sys.argv[1] + job_instance = ColorVoteReportCalc() + + if command == "config": + # Parse output partition references as PartitionRef objects (for Rust wrapper) + output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]] + + # Also parse them into DSL partition objects (for DSL job.config()) + outputs = parse_outputs_from_args(sys.argv[2:]) + + # Call job's config method - returns list[JobConfig] + configs = job_instance.config(outputs) + + # Wrap in JobConfigureResponse and serialize using to_dict() + response = JobConfigureResponse(configs=configs) + print(json.dumps(to_dict(response))) + + elif command == "exec": + # The exec method expects a JobConfig but the Rust wrapper passes args + # For now, let the DSL job handle the args directly + # TODO: This needs to be refined based on actual Rust wrapper interface + job_instance.exec(*sys.argv[2:]) + + else: + raise Exception(f"Invalid command `{sys.argv[1]}`") diff --git a/databuild/test/app/dsl/generated/dsl_job_lookup.py b/databuild/test/app/dsl/generated/dsl_job_lookup.py new file mode 100755 index 0000000..049f7e5 --- /dev/null +++ b/databuild/test/app/dsl/generated/dsl_job_lookup.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Generated job lookup for DataBuild DSL graph. +Maps partition patterns to job targets. +""" + +import sys +import re +import json +from collections import defaultdict + + +# Mapping from partition patterns to job targets +JOB_MAPPINGS = { + r"daily_color_votes/(?P\d{4}-\d{2}-\d{2})/(?P[^/]+)": "//databuild/test/app/dsl/generated:ingest_color_votes", + r"color_votes_1m/(?P\d{4}-\d{2}-\d{2})/(?P[^/]+)": "//databuild/test/app/dsl/generated:trailing_color_votes", + r"color_votes_1w/(?P\d{4}-\d{2}-\d{2})/(?P[^/]+)": "//databuild/test/app/dsl/generated:trailing_color_votes", + r"daily_votes/(?P\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes", + r"votes_1w/(?P\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes", + r"votes_1m/(?P\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes", + r"color_vote_report/(?P\d{4}-\d{2}-\d{2})/(?P[^/]+)": "//databuild/test/app/dsl/generated:color_vote_report_calc", +} + + +def lookup_job_for_partition(partition_ref: str) -> str: + """Look up which job can build the given partition reference.""" + for pattern, job_target in JOB_MAPPINGS.items(): + if re.match(pattern, partition_ref): + return job_target + + raise ValueError(f"No job found for partition: {partition_ref}") + + +def main(): + if len(sys.argv) < 2: + print("Usage: job_lookup.py [partition_ref...]", file=sys.stderr) + sys.exit(1) + + results = defaultdict(list) + try: + for partition_ref in sys.argv[1:]: + job_target = lookup_job_for_partition(partition_ref) + results[job_target].append(partition_ref) + + # Output the results as JSON (matching existing lookup format) + print(json.dumps(dict(results))) + except ValueError as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/databuild/test/app/dsl/generated/ingest_color_votes.py b/databuild/test/app/dsl/generated/ingest_color_votes.py new file mode 100755 index 0000000..af920c9 --- /dev/null +++ b/databuild/test/app/dsl/generated/ingest_color_votes.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Generated job script for IngestColorVotes. +""" + +import sys +import json +from databuild.test.app.dsl.graph import IngestColorVotes +from databuild.proto import PartitionRef, JobConfigureResponse, to_dict + + +def parse_outputs_from_args(args: list[str]) -> list: + """Parse partition output references from command line arguments.""" + outputs = [] + for arg in args: + # Find which output type can deserialize this partition reference + for output_type in IngestColorVotes.output_types: + try: + partition = output_type.deserialize(arg) + outputs.append(partition) + break + except ValueError: + continue + else: + raise ValueError(f"No output type in IngestColorVotes can deserialize partition ref: {arg}") + + return outputs + + +if __name__ == "__main__": + if len(sys.argv) < 2: + raise Exception(f"Invalid command usage") + + command = sys.argv[1] + job_instance = IngestColorVotes() + + if command == "config": + # Parse output partition references as PartitionRef objects (for Rust wrapper) + output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]] + + # Also parse them into DSL partition objects (for DSL job.config()) + outputs = parse_outputs_from_args(sys.argv[2:]) + + # Call job's config method - returns list[JobConfig] + configs = job_instance.config(outputs) + + # Wrap in JobConfigureResponse and serialize using to_dict() + response = JobConfigureResponse(configs=configs) + print(json.dumps(to_dict(response))) + + elif command == "exec": + # The exec method expects a JobConfig but the Rust wrapper passes args + # For now, let the DSL job handle the args directly + # TODO: This needs to be refined based on actual Rust wrapper interface + job_instance.exec(*sys.argv[2:]) + + else: + raise Exception(f"Invalid command `{sys.argv[1]}`") diff --git a/databuild/test/app/dsl/generated/trailing_color_votes.py b/databuild/test/app/dsl/generated/trailing_color_votes.py new file mode 100755 index 0000000..0936f87 --- /dev/null +++ b/databuild/test/app/dsl/generated/trailing_color_votes.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Generated job script for TrailingColorVotes. +""" + +import sys +import json +from databuild.test.app.dsl.graph import TrailingColorVotes +from databuild.proto import PartitionRef, JobConfigureResponse, to_dict + + +def parse_outputs_from_args(args: list[str]) -> list: + """Parse partition output references from command line arguments.""" + outputs = [] + for arg in args: + # Find which output type can deserialize this partition reference + for output_type in TrailingColorVotes.output_types: + try: + partition = output_type.deserialize(arg) + outputs.append(partition) + break + except ValueError: + continue + else: + raise ValueError(f"No output type in TrailingColorVotes can deserialize partition ref: {arg}") + + return outputs + + +if __name__ == "__main__": + if len(sys.argv) < 2: + raise Exception(f"Invalid command usage") + + command = sys.argv[1] + job_instance = TrailingColorVotes() + + if command == "config": + # Parse output partition references as PartitionRef objects (for Rust wrapper) + output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]] + + # Also parse them into DSL partition objects (for DSL job.config()) + outputs = parse_outputs_from_args(sys.argv[2:]) + + # Call job's config method - returns list[JobConfig] + configs = job_instance.config(outputs) + + # Wrap in JobConfigureResponse and serialize using to_dict() + response = JobConfigureResponse(configs=configs) + print(json.dumps(to_dict(response))) + + elif command == "exec": + # The exec method expects a JobConfig but the Rust wrapper passes args + # For now, let the DSL job handle the args directly + # TODO: This needs to be refined based on actual Rust wrapper interface + job_instance.exec(*sys.argv[2:]) + + else: + raise Exception(f"Invalid command `{sys.argv[1]}`")