From f92cfeb9b54f75692c5dd10c8e2ea0d7d606fa41 Mon Sep 17 00:00:00 2001
From: Stuart Axelbrooke <stuart@axelbrooke.com>
Date: Sat, 16 Aug 2025 15:37:47 -0700
Subject: [PATCH] Add test app generated package

---
 databuild/test/app/dsl/generated/BUILD.bazel  | 71 +++++++++++++++++++
 .../dsl/generated/aggregate_color_votes.py    | 58 +++++++++++++++
 .../dsl/generated/color_vote_report_calc.py   | 58 +++++++++++++++
 .../test/app/dsl/generated/dsl_job_lookup.py  | 53 ++++++++++++++
 .../app/dsl/generated/ingest_color_votes.py   | 58 +++++++++++++++
 .../app/dsl/generated/trailing_color_votes.py | 58 +++++++++++++++
 6 files changed, 356 insertions(+)
 create mode 100644 databuild/test/app/dsl/generated/BUILD.bazel
 create mode 100755 databuild/test/app/dsl/generated/aggregate_color_votes.py
 create mode 100755 databuild/test/app/dsl/generated/color_vote_report_calc.py
 create mode 100755 databuild/test/app/dsl/generated/dsl_job_lookup.py
 create mode 100755 databuild/test/app/dsl/generated/ingest_color_votes.py
 create mode 100755 databuild/test/app/dsl/generated/trailing_color_votes.py

diff --git a/databuild/test/app/dsl/generated/BUILD.bazel b/databuild/test/app/dsl/generated/BUILD.bazel
new file mode 100644
index 0000000..215a549
--- /dev/null
+++ b/databuild/test/app/dsl/generated/BUILD.bazel
@@ -0,0 +1,71 @@
+load("@databuild//databuild:rules.bzl", "databuild_job", "databuild_graph")
+
+# Generated by DataBuild DSL - do not edit manually
+# This file is generated in a subdirectory to avoid overwriting the original BUILD.bazel
+
+py_binary(
+    name = "aggregate_color_votes_binary",
+    srcs = ["aggregate_color_votes.py"],
+    main = "aggregate_color_votes.py",
+    deps = ["@@//databuild/test/app/dsl:dsl_src"],
+)
+
+databuild_job(
+    name = "aggregate_color_votes",
+    binary = ":aggregate_color_votes_binary",
+)
+py_binary(
+    name = "color_vote_report_calc_binary",
+    srcs = ["color_vote_report_calc.py"],
+    main = "color_vote_report_calc.py",
+    deps = ["@@//databuild/test/app/dsl:dsl_src"],
+)
+
+databuild_job(
+    name = "color_vote_report_calc",
+    binary = ":color_vote_report_calc_binary",
+)
+py_binary(
+    name = "ingest_color_votes_binary",
+    srcs = ["ingest_color_votes.py"],
+    main = "ingest_color_votes.py",
+    deps = ["@@//databuild/test/app/dsl:dsl_src"],
+)
+
+databuild_job(
+    name = "ingest_color_votes",
+    binary = ":ingest_color_votes_binary",
+)
+py_binary(
+    name = "trailing_color_votes_binary",
+    srcs = ["trailing_color_votes.py"],
+    main = "trailing_color_votes.py",
+    deps = ["@@//databuild/test/app/dsl:dsl_src"],
+)
+
+databuild_job(
+    name = "trailing_color_votes",
+    binary = ":trailing_color_votes_binary",
+)
+
+py_binary(
+    name = "dsl_job_lookup",
+    srcs = ["dsl_job_lookup.py"],
+    deps = ["@@//databuild/test/app/dsl:dsl_src"],
+)
+
+databuild_graph(
+    name = "dsl_graph",
+    jobs = ["aggregate_color_votes", "color_vote_report_calc", "ingest_color_votes", "trailing_color_votes"],
+    lookup = ":dsl_job_lookup",
+    visibility = ["//visibility:public"],
+)
+
+# Create tar archive of generated files for testing
+genrule(
+    name = "existing_generated",
+    srcs = glob(["*.py", "BUILD.bazel"]),
+    outs = ["existing_generated.tar"],
+    cmd = "mkdir -p temp && cp $(SRCS) temp/ && find temp -exec touch -t 197001010000 {} + && tar -cf $@ -C temp .",
+    visibility = ["//visibility:public"],
+)
diff --git a/databuild/test/app/dsl/generated/aggregate_color_votes.py b/databuild/test/app/dsl/generated/aggregate_color_votes.py
new file mode 100755
index 0000000..59af193
--- /dev/null
+++ b/databuild/test/app/dsl/generated/aggregate_color_votes.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""
+Generated job script for AggregateColorVotes.
+"""
+
+import sys
+import json
+from databuild.test.app.dsl.graph import AggregateColorVotes
+from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
+
+
+def parse_outputs_from_args(args: list[str]) -> list:
+    """Parse partition output references from command line arguments."""
+    outputs = []
+    for arg in args:
+        # Find which output type can deserialize this partition reference
+        for output_type in AggregateColorVotes.output_types:
+            try:
+                partition = output_type.deserialize(arg)
+                outputs.append(partition)
+                break
+            except ValueError:
+                continue
+        else:
+            raise ValueError(f"No output type in AggregateColorVotes can deserialize partition ref: {arg}")
+    
+    return outputs
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        raise Exception(f"Invalid command usage")
+
+    command = sys.argv[1]
+    job_instance = AggregateColorVotes()
+    
+    if command == "config":
+        # Parse output partition references as PartitionRef objects (for Rust wrapper)
+        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
+        
+        # Also parse them into DSL partition objects (for DSL job.config())
+        outputs = parse_outputs_from_args(sys.argv[2:])
+        
+        # Call job's config method - returns list[JobConfig]
+        configs = job_instance.config(outputs)
+        
+        # Wrap in JobConfigureResponse and serialize using to_dict()
+        response = JobConfigureResponse(configs=configs)
+        print(json.dumps(to_dict(response)))
+        
+    elif command == "exec":
+        # The exec method expects a JobConfig but the Rust wrapper passes args
+        # For now, let the DSL job handle the args directly 
+        # TODO: This needs to be refined based on actual Rust wrapper interface
+        job_instance.exec(*sys.argv[2:])
+        
+    else:
+        raise Exception(f"Invalid command `{sys.argv[1]}`")
diff --git a/databuild/test/app/dsl/generated/color_vote_report_calc.py b/databuild/test/app/dsl/generated/color_vote_report_calc.py
new file mode 100755
index 0000000..e538772
--- /dev/null
+++ b/databuild/test/app/dsl/generated/color_vote_report_calc.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""
+Generated job script for ColorVoteReportCalc.
+"""
+
+import sys
+import json
+from databuild.test.app.dsl.graph import ColorVoteReportCalc
+from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
+
+
+def parse_outputs_from_args(args: list[str]) -> list:
+    """Parse partition output references from command line arguments."""
+    outputs = []
+    for arg in args:
+        # Find which output type can deserialize this partition reference
+        for output_type in ColorVoteReportCalc.output_types:
+            try:
+                partition = output_type.deserialize(arg)
+                outputs.append(partition)
+                break
+            except ValueError:
+                continue
+        else:
+            raise ValueError(f"No output type in ColorVoteReportCalc can deserialize partition ref: {arg}")
+    
+    return outputs
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        raise Exception(f"Invalid command usage")
+
+    command = sys.argv[1]
+    job_instance = ColorVoteReportCalc()
+    
+    if command == "config":
+        # Parse output partition references as PartitionRef objects (for Rust wrapper)
+        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
+        
+        # Also parse them into DSL partition objects (for DSL job.config())
+        outputs = parse_outputs_from_args(sys.argv[2:])
+        
+        # Call job's config method - returns list[JobConfig]
+        configs = job_instance.config(outputs)
+        
+        # Wrap in JobConfigureResponse and serialize using to_dict()
+        response = JobConfigureResponse(configs=configs)
+        print(json.dumps(to_dict(response)))
+        
+    elif command == "exec":
+        # The exec method expects a JobConfig but the Rust wrapper passes args
+        # For now, let the DSL job handle the args directly 
+        # TODO: This needs to be refined based on actual Rust wrapper interface
+        job_instance.exec(*sys.argv[2:])
+        
+    else:
+        raise Exception(f"Invalid command `{sys.argv[1]}`")
diff --git a/databuild/test/app/dsl/generated/dsl_job_lookup.py b/databuild/test/app/dsl/generated/dsl_job_lookup.py
new file mode 100755
index 0000000..049f7e5
--- /dev/null
+++ b/databuild/test/app/dsl/generated/dsl_job_lookup.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+Generated job lookup for DataBuild DSL graph.
+Maps partition patterns to job targets.
+"""
+
+import sys
+import re
+import json
+from collections import defaultdict
+
+
+# Mapping from partition patterns to job targets
+JOB_MAPPINGS = {
+    r"daily_color_votes/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:ingest_color_votes",
+    r"color_votes_1m/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:trailing_color_votes",
+    r"color_votes_1w/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:trailing_color_votes",
+    r"daily_votes/(?P<data_date>\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes",
+    r"votes_1w/(?P<data_date>\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes",
+    r"votes_1m/(?P<data_date>\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes",
+    r"color_vote_report/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:color_vote_report_calc",
+}
+
+
+def lookup_job_for_partition(partition_ref: str) -> str:
+    """Look up which job can build the given partition reference."""
+    for pattern, job_target in JOB_MAPPINGS.items():
+        if re.match(pattern, partition_ref):
+            return job_target
+    
+    raise ValueError(f"No job found for partition: {partition_ref}")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: job_lookup.py <partition_ref> [partition_ref...]", file=sys.stderr)
+        sys.exit(1)
+    
+    results = defaultdict(list)
+    try:
+        for partition_ref in sys.argv[1:]:
+            job_target = lookup_job_for_partition(partition_ref)
+            results[job_target].append(partition_ref)
+        
+        # Output the results as JSON (matching existing lookup format)
+        print(json.dumps(dict(results)))
+    except ValueError as e:
+        print(f"ERROR: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/databuild/test/app/dsl/generated/ingest_color_votes.py b/databuild/test/app/dsl/generated/ingest_color_votes.py
new file mode 100755
index 0000000..af920c9
--- /dev/null
+++ b/databuild/test/app/dsl/generated/ingest_color_votes.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""
+Generated job script for IngestColorVotes.
+"""
+
+import sys
+import json
+from databuild.test.app.dsl.graph import IngestColorVotes
+from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
+
+
+def parse_outputs_from_args(args: list[str]) -> list:
+    """Parse partition output references from command line arguments."""
+    outputs = []
+    for arg in args:
+        # Find which output type can deserialize this partition reference
+        for output_type in IngestColorVotes.output_types:
+            try:
+                partition = output_type.deserialize(arg)
+                outputs.append(partition)
+                break
+            except ValueError:
+                continue
+        else:
+            raise ValueError(f"No output type in IngestColorVotes can deserialize partition ref: {arg}")
+    
+    return outputs
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        raise Exception(f"Invalid command usage")
+
+    command = sys.argv[1]
+    job_instance = IngestColorVotes()
+    
+    if command == "config":
+        # Parse output partition references as PartitionRef objects (for Rust wrapper)
+        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
+        
+        # Also parse them into DSL partition objects (for DSL job.config())
+        outputs = parse_outputs_from_args(sys.argv[2:])
+        
+        # Call job's config method - returns list[JobConfig]
+        configs = job_instance.config(outputs)
+        
+        # Wrap in JobConfigureResponse and serialize using to_dict()
+        response = JobConfigureResponse(configs=configs)
+        print(json.dumps(to_dict(response)))
+        
+    elif command == "exec":
+        # The exec method expects a JobConfig but the Rust wrapper passes args
+        # For now, let the DSL job handle the args directly 
+        # TODO: This needs to be refined based on actual Rust wrapper interface
+        job_instance.exec(*sys.argv[2:])
+        
+    else:
+        raise Exception(f"Invalid command `{sys.argv[1]}`")
diff --git a/databuild/test/app/dsl/generated/trailing_color_votes.py b/databuild/test/app/dsl/generated/trailing_color_votes.py
new file mode 100755
index 0000000..0936f87
--- /dev/null
+++ b/databuild/test/app/dsl/generated/trailing_color_votes.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""
+Generated job script for TrailingColorVotes.
+"""
+
+import sys
+import json
+from databuild.test.app.dsl.graph import TrailingColorVotes
+from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
+
+
+def parse_outputs_from_args(args: list[str]) -> list:
+    """Parse partition output references from command line arguments."""
+    outputs = []
+    for arg in args:
+        # Find which output type can deserialize this partition reference
+        for output_type in TrailingColorVotes.output_types:
+            try:
+                partition = output_type.deserialize(arg)
+                outputs.append(partition)
+                break
+            except ValueError:
+                continue
+        else:
+            raise ValueError(f"No output type in TrailingColorVotes can deserialize partition ref: {arg}")
+    
+    return outputs
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        raise Exception(f"Invalid command usage")
+
+    command = sys.argv[1]
+    job_instance = TrailingColorVotes()
+    
+    if command == "config":
+        # Parse output partition references as PartitionRef objects (for Rust wrapper)
+        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
+        
+        # Also parse them into DSL partition objects (for DSL job.config())
+        outputs = parse_outputs_from_args(sys.argv[2:])
+        
+        # Call job's config method - returns list[JobConfig]
+        configs = job_instance.config(outputs)
+        
+        # Wrap in JobConfigureResponse and serialize using to_dict()
+        response = JobConfigureResponse(configs=configs)
+        print(json.dumps(to_dict(response)))
+        
+    elif command == "exec":
+        # The exec method expects a JobConfig but the Rust wrapper passes args
+        # For now, let the DSL job handle the args directly 
+        # TODO: This needs to be refined based on actual Rust wrapper interface
+        job_instance.exec(*sys.argv[2:])
+        
+    else:
+        raise Exception(f"Invalid command `{sys.argv[1]}`")