Add test app generated package

2025-08-16 15:37:47 -07:00 · 2025-08-16 15:37:47 -07:00 · f92cfeb9b5
commit f92cfeb9b5
parent 07d2a9faec
6 changed files with 356 additions and 0 deletions
--- a/databuild/test/app/dsl/generated/BUILD.bazel
+++ b/databuild/test/app/dsl/generated/BUILD.bazel
@ -0,0 +1,71 @@
 load("@databuild//databuild:rules.bzl", "databuild_job", "databuild_graph")
 # Generated by DataBuild DSL - do not edit manually
 # This file is generated in a subdirectory to avoid overwriting the original BUILD.bazel
 py_binary(
    name = "aggregate_color_votes_binary",
    srcs = ["aggregate_color_votes.py"],
    main = "aggregate_color_votes.py",
    deps = ["@@//databuild/test/app/dsl:dsl_src"],
 )
 databuild_job(
    name = "aggregate_color_votes",
    binary = ":aggregate_color_votes_binary",
 )
 py_binary(
    name = "color_vote_report_calc_binary",
    srcs = ["color_vote_report_calc.py"],
    main = "color_vote_report_calc.py",
    deps = ["@@//databuild/test/app/dsl:dsl_src"],
 )
 databuild_job(
    name = "color_vote_report_calc",
    binary = ":color_vote_report_calc_binary",
 )
 py_binary(
    name = "ingest_color_votes_binary",
    srcs = ["ingest_color_votes.py"],
    main = "ingest_color_votes.py",
    deps = ["@@//databuild/test/app/dsl:dsl_src"],
 )
 databuild_job(
    name = "ingest_color_votes",
    binary = ":ingest_color_votes_binary",
 )
 py_binary(
    name = "trailing_color_votes_binary",
    srcs = ["trailing_color_votes.py"],
    main = "trailing_color_votes.py",
    deps = ["@@//databuild/test/app/dsl:dsl_src"],
 )
 databuild_job(
    name = "trailing_color_votes",
    binary = ":trailing_color_votes_binary",
 )
 py_binary(
    name = "dsl_job_lookup",
    srcs = ["dsl_job_lookup.py"],
    deps = ["@@//databuild/test/app/dsl:dsl_src"],
 )
 databuild_graph(
    name = "dsl_graph",
    jobs = ["aggregate_color_votes", "color_vote_report_calc", "ingest_color_votes", "trailing_color_votes"],
    lookup = ":dsl_job_lookup",
    visibility = ["//visibility:public"],
 )
 # Create tar archive of generated files for testing
 genrule(
    name = "existing_generated",
    srcs = glob(["*.py", "BUILD.bazel"]),
    outs = ["existing_generated.tar"],
    cmd = "mkdir -p temp && cp $(SRCS) temp/ && find temp -exec touch -t 197001010000 {} + && tar -cf $@ -C temp .",
    visibility = ["//visibility:public"],
 )
--- a/databuild/test/app/dsl/generated/aggregate_color_votes.py
+++ b/databuild/test/app/dsl/generated/aggregate_color_votes.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python3
 """
 Generated job script for AggregateColorVotes.
 """
 import sys
 import json
 from databuild.test.app.dsl.graph import AggregateColorVotes
 from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
 def parse_outputs_from_args(args: list[str]) -> list:
    """Parse partition output references from command line arguments."""
    outputs = []
    for arg in args:
        # Find which output type can deserialize this partition reference
        for output_type in AggregateColorVotes.output_types:
            try:
                partition = output_type.deserialize(arg)
                outputs.append(partition)
                break
            except ValueError:
                continue
        else:
            raise ValueError(f"No output type in AggregateColorVotes can deserialize partition ref: {arg}")
    return outputs
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        raise Exception(f"Invalid command usage")
    command = sys.argv[1]
    job_instance = AggregateColorVotes()
    if command == "config":
        # Parse output partition references as PartitionRef objects (for Rust wrapper)
        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
        # Also parse them into DSL partition objects (for DSL job.config())
        outputs = parse_outputs_from_args(sys.argv[2:])
        # Call job's config method - returns list[JobConfig]
        configs = job_instance.config(outputs)
        # Wrap in JobConfigureResponse and serialize using to_dict()
        response = JobConfigureResponse(configs=configs)
        print(json.dumps(to_dict(response)))
    elif command == "exec":
        # The exec method expects a JobConfig but the Rust wrapper passes args
        # For now, let the DSL job handle the args directly 
        # TODO: This needs to be refined based on actual Rust wrapper interface
        job_instance.exec(*sys.argv[2:])
    else:
        raise Exception(f"Invalid command `{sys.argv[1]}`")
--- a/databuild/test/app/dsl/generated/color_vote_report_calc.py
+++ b/databuild/test/app/dsl/generated/color_vote_report_calc.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python3
 """
 Generated job script for ColorVoteReportCalc.
 """
 import sys
 import json
 from databuild.test.app.dsl.graph import ColorVoteReportCalc
 from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
 def parse_outputs_from_args(args: list[str]) -> list:
    """Parse partition output references from command line arguments."""
    outputs = []
    for arg in args:
        # Find which output type can deserialize this partition reference
        for output_type in ColorVoteReportCalc.output_types:
            try:
                partition = output_type.deserialize(arg)
                outputs.append(partition)
                break
            except ValueError:
                continue
        else:
            raise ValueError(f"No output type in ColorVoteReportCalc can deserialize partition ref: {arg}")
    return outputs
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        raise Exception(f"Invalid command usage")
    command = sys.argv[1]
    job_instance = ColorVoteReportCalc()
    if command == "config":
        # Parse output partition references as PartitionRef objects (for Rust wrapper)
        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
        # Also parse them into DSL partition objects (for DSL job.config())
        outputs = parse_outputs_from_args(sys.argv[2:])
        # Call job's config method - returns list[JobConfig]
        configs = job_instance.config(outputs)
        # Wrap in JobConfigureResponse and serialize using to_dict()
        response = JobConfigureResponse(configs=configs)
        print(json.dumps(to_dict(response)))
    elif command == "exec":
        # The exec method expects a JobConfig but the Rust wrapper passes args
        # For now, let the DSL job handle the args directly 
        # TODO: This needs to be refined based on actual Rust wrapper interface
        job_instance.exec(*sys.argv[2:])
    else:
        raise Exception(f"Invalid command `{sys.argv[1]}`")
--- a/databuild/test/app/dsl/generated/dsl_job_lookup.py
+++ b/databuild/test/app/dsl/generated/dsl_job_lookup.py
@ -0,0 +1,53 @@
 #!/usr/bin/env python3
 """
 Generated job lookup for DataBuild DSL graph.
 Maps partition patterns to job targets.
 """
 import sys
 import re
 import json
 from collections import defaultdict
 # Mapping from partition patterns to job targets
 JOB_MAPPINGS = {
    r"daily_color_votes/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:ingest_color_votes",
    r"color_votes_1m/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:trailing_color_votes",
    r"color_votes_1w/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:trailing_color_votes",
    r"daily_votes/(?P<data_date>\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes",
    r"votes_1w/(?P<data_date>\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes",
    r"votes_1m/(?P<data_date>\d{4}-\d{2}-\d{2})": "//databuild/test/app/dsl/generated:aggregate_color_votes",
    r"color_vote_report/(?P<data_date>\d{4}-\d{2}-\d{2})/(?P<color>[^/]+)": "//databuild/test/app/dsl/generated:color_vote_report_calc",
 }
 def lookup_job_for_partition(partition_ref: str) -> str:
    """Look up which job can build the given partition reference."""
    for pattern, job_target in JOB_MAPPINGS.items():
        if re.match(pattern, partition_ref):
            return job_target
    raise ValueError(f"No job found for partition: {partition_ref}")
 def main():
    if len(sys.argv) < 2:
        print("Usage: job_lookup.py <partition_ref> [partition_ref...]", file=sys.stderr)
        sys.exit(1)
    results = defaultdict(list)
    try:
        for partition_ref in sys.argv[1:]:
            job_target = lookup_job_for_partition(partition_ref)
            results[job_target].append(partition_ref)
        # Output the results as JSON (matching existing lookup format)
        print(json.dumps(dict(results)))
    except ValueError as e:
        print(f"ERROR: {e}", file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/databuild/test/app/dsl/generated/ingest_color_votes.py
+++ b/databuild/test/app/dsl/generated/ingest_color_votes.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python3
 """
 Generated job script for IngestColorVotes.
 """
 import sys
 import json
 from databuild.test.app.dsl.graph import IngestColorVotes
 from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
 def parse_outputs_from_args(args: list[str]) -> list:
    """Parse partition output references from command line arguments."""
    outputs = []
    for arg in args:
        # Find which output type can deserialize this partition reference
        for output_type in IngestColorVotes.output_types:
            try:
                partition = output_type.deserialize(arg)
                outputs.append(partition)
                break
            except ValueError:
                continue
        else:
            raise ValueError(f"No output type in IngestColorVotes can deserialize partition ref: {arg}")
    return outputs
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        raise Exception(f"Invalid command usage")
    command = sys.argv[1]
    job_instance = IngestColorVotes()
    if command == "config":
        # Parse output partition references as PartitionRef objects (for Rust wrapper)
        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
        # Also parse them into DSL partition objects (for DSL job.config())
        outputs = parse_outputs_from_args(sys.argv[2:])
        # Call job's config method - returns list[JobConfig]
        configs = job_instance.config(outputs)
        # Wrap in JobConfigureResponse and serialize using to_dict()
        response = JobConfigureResponse(configs=configs)
        print(json.dumps(to_dict(response)))
    elif command == "exec":
        # The exec method expects a JobConfig but the Rust wrapper passes args
        # For now, let the DSL job handle the args directly 
        # TODO: This needs to be refined based on actual Rust wrapper interface
        job_instance.exec(*sys.argv[2:])
    else:
        raise Exception(f"Invalid command `{sys.argv[1]}`")
--- a/databuild/test/app/dsl/generated/trailing_color_votes.py
+++ b/databuild/test/app/dsl/generated/trailing_color_votes.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python3
 """
 Generated job script for TrailingColorVotes.
 """
 import sys
 import json
 from databuild.test.app.dsl.graph import TrailingColorVotes
 from databuild.proto import PartitionRef, JobConfigureResponse, to_dict
 def parse_outputs_from_args(args: list[str]) -> list:
    """Parse partition output references from command line arguments."""
    outputs = []
    for arg in args:
        # Find which output type can deserialize this partition reference
        for output_type in TrailingColorVotes.output_types:
            try:
                partition = output_type.deserialize(arg)
                outputs.append(partition)
                break
            except ValueError:
                continue
        else:
            raise ValueError(f"No output type in TrailingColorVotes can deserialize partition ref: {arg}")
    return outputs
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        raise Exception(f"Invalid command usage")
    command = sys.argv[1]
    job_instance = TrailingColorVotes()
    if command == "config":
        # Parse output partition references as PartitionRef objects (for Rust wrapper)
        output_refs = [PartitionRef(str=raw_ref) for raw_ref in sys.argv[2:]]
        # Also parse them into DSL partition objects (for DSL job.config())
        outputs = parse_outputs_from_args(sys.argv[2:])
        # Call job's config method - returns list[JobConfig]
        configs = job_instance.config(outputs)
        # Wrap in JobConfigureResponse and serialize using to_dict()
        response = JobConfigureResponse(configs=configs)
        print(json.dumps(to_dict(response)))
    elif command == "exec":
        # The exec method expects a JobConfig but the Rust wrapper passes args
        # For now, let the DSL job handle the args directly 
        # TODO: This needs to be refined based on actual Rust wrapper interface
        job_instance.exec(*sys.argv[2:])
    else:
        raise Exception(f"Invalid command `{sys.argv[1]}`")