Big bump
This commit is contained in:
parent
6d55d54267
commit
82e1d0eb26
21 changed files with 197 additions and 2873 deletions
|
|
@ -209,11 +209,11 @@ python.toolchain(
|
|||
|
||||
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
|
||||
pip.parse(
|
||||
hub_name = "pypi",
|
||||
hub_name = "databuild_pypi",
|
||||
python_version = "3.13",
|
||||
requirements_lock = "//:requirements_lock.txt",
|
||||
)
|
||||
use_repo(pip, "pypi")
|
||||
use_repo(pip, "databuild_pypi")
|
||||
|
||||
# OCI (Docker images)
|
||||
oci = use_extension("@rules_oci//oci:extensions.bzl", "oci")
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ py_binary(
|
|||
srcs = ["proto_wrapper.py"],
|
||||
main = "proto_wrapper.py",
|
||||
deps = [
|
||||
"@pypi//betterproto2_compiler",
|
||||
"@databuild_pypi//betterproto2_compiler",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ $(location @com_google_protobuf//:protoc) --python_betterproto2_out=$(GENDIR)/da
|
|||
":protoc-gen-python_betterproto2",
|
||||
"//:ruff_binary",
|
||||
"@com_google_protobuf//:protoc",
|
||||
"@pypi//betterproto2_compiler",
|
||||
"@databuild_pypi//betterproto2_compiler",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
@ -187,8 +187,8 @@ py_library(
|
|||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"@pypi//betterproto2_compiler",
|
||||
"@pypi//grpcio",
|
||||
"@pypi//pytest",
|
||||
"@databuild_pypi//betterproto2_compiler",
|
||||
"@databuild_pypi//grpcio",
|
||||
"@databuild_pypi//pytest",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,6 @@ py_test(
|
|||
srcs = glob(["*.py"]),
|
||||
deps = [
|
||||
"//databuild/dsl/python:dsl",
|
||||
"@pypi//pytest",
|
||||
"@databuild_pypi//pytest",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -79,8 +79,11 @@ fn resolve(output_refs: &[String]) -> Result<HashMap<String, Vec<String>>, Strin
|
|||
.map_err(|e| format!("Failed to execute job lookup: {}", e))?;
|
||||
|
||||
if !output.status.success() {
|
||||
error!("Job lookup failed: {}", output.status);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Job lookup failed: {}", stderr);
|
||||
error!("stderr: {}", stderr);
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
error!("stdout: {}", stdout);
|
||||
return Err(format!("Failed to run job lookup: {}", stderr));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1 +1,11 @@
|
|||
from databuild.py_proto_out.databuild.v1 import *
|
||||
from betterproto2 import Casing, OutputFormat
|
||||
|
||||
|
||||
def to_dict(d) -> dict:
|
||||
"""Helper for creating proper dicts from protobuf derived dataclasses."""
|
||||
return d.to_dict(
|
||||
casing=Casing.SNAKE,
|
||||
output_format=OutputFormat.PYTHON,
|
||||
include_default_values=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
|
|||
RUNFILES_PREFIX = """
|
||||
# ================= BEGIN RUNFILES INIT =================
|
||||
|
||||
SCRIPT_PATH="$(realpath "$0")"
|
||||
|
||||
# TODO should this be extracted to shared init script
|
||||
# Get the directory where the script is located
|
||||
if [[ -z "${RUNFILES_DIR:-}" ]]; then
|
||||
|
|
@ -71,6 +73,7 @@ def _databuild_job_cfg_impl(ctx):
|
|||
output = script,
|
||||
substitutions = {
|
||||
"%{EXECUTABLE_PATH}": configure_path,
|
||||
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.configure.files_to_run.executable.short_path,
|
||||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||
"%{PREFIX}": "EXECUTABLE_SUBCOMMAND=\"config\"\n",
|
||||
},
|
||||
|
|
@ -331,6 +334,7 @@ def _databuild_graph_lookup_impl(ctx):
|
|||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||
"%{PREFIX}": "",
|
||||
"%{EXECUTABLE_PATH}": ctx.attr.lookup.files_to_run.executable.path,
|
||||
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.lookup.files_to_run.executable.short_path,
|
||||
},
|
||||
is_executable = True,
|
||||
)
|
||||
|
|
@ -399,6 +403,7 @@ export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
|
|||
output = script,
|
||||
substitutions = {
|
||||
"%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path,
|
||||
"%{EXECUTABLE_SHORT_PATH}": ctx.attr._analyze.files_to_run.executable.short_path,
|
||||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||
"%{PREFIX}": script_prefix,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -5,7 +5,32 @@ set -e
|
|||
|
||||
%{PREFIX}
|
||||
|
||||
EXECUTABLE_BINARY="$(rlocation "_main/$(basename "%{EXECUTABLE_PATH}")")"
|
||||
# Check if rlocation function is available
|
||||
if ! type rlocation >/dev/null 2>&1; then
|
||||
echo "Error: rlocation function not available. Runfiles may not be properly initialized." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Resolve the executable using rlocation
|
||||
EXECUTABLE_BINARY="$(rlocation "_main/%{EXECUTABLE_SHORT_PATH}")"
|
||||
|
||||
# Check if rlocation returned something
|
||||
if [[ -z "${EXECUTABLE_BINARY}" ]]; then
|
||||
echo "Error: rlocation returned empty result for '_main/%{EXECUTABLE_SHORT_PATH}'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if the resolved binary exists
|
||||
if [[ ! -f "${EXECUTABLE_BINARY}" ]]; then
|
||||
echo "Error: Resolved executable '${EXECUTABLE_BINARY}' does not exist" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if the resolved binary is executable
|
||||
if [[ ! -x "${EXECUTABLE_BINARY}" ]]; then
|
||||
echo "Error: Resolved executable '${EXECUTABLE_BINARY}' is not executable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run the configuration
|
||||
if [[ -n "${EXECUTABLE_SUBCOMMAND:-}" ]]; then
|
||||
|
|
|
|||
|
|
@ -36,6 +36,17 @@ py_test(
|
|||
deps = [":job_src"],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_graph_analysis",
|
||||
srcs = ["graph/graph_test.py"],
|
||||
main = "graph/graph_test.py",
|
||||
data = [
|
||||
":bazel_graph.analyze",
|
||||
":bazel_graph_lookup",
|
||||
],
|
||||
deps = [":job_src"],
|
||||
)
|
||||
|
||||
# Bazel-defined
|
||||
## Graph
|
||||
databuild_graph(
|
||||
|
|
@ -51,8 +62,8 @@ databuild_graph(
|
|||
|
||||
py_binary(
|
||||
name = "bazel_graph_lookup",
|
||||
srcs = ["lookup.py"],
|
||||
main = "lookup.py",
|
||||
srcs = ["graph/lookup.py"],
|
||||
main = "graph/lookup.py",
|
||||
)
|
||||
|
||||
## Ingest Color Votes
|
||||
|
|
|
|||
91
databuild/test/app/graph/graph_test.py
Normal file
91
databuild/test/app/graph/graph_test.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration test for the databuild graph analysis.
|
||||
|
||||
This test verifies that when we request color vote reports, the graph analyzer
|
||||
correctly identifies all upstream dependencies and jobs required.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import unittest
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class GraphAnalysisTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Determine the path to bazel_graph.analyze
|
||||
# In bazel test, we need to find the executable in the runfiles
|
||||
runfiles_dir = os.environ.get('RUNFILES_DIR')
|
||||
test_srcdir = os.environ.get('TEST_SRCDIR')
|
||||
|
||||
possible_paths = []
|
||||
if runfiles_dir:
|
||||
possible_paths.append(os.path.join(runfiles_dir, '_main', 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||
possible_paths.append(os.path.join(runfiles_dir, 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||
|
||||
if test_srcdir:
|
||||
possible_paths.append(os.path.join(test_srcdir, '_main', 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||
possible_paths.append(os.path.join(test_srcdir, 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||
|
||||
# Fallback for local testing
|
||||
possible_paths.extend([
|
||||
'bazel-bin/databuild/test/app/bazel_graph.analyze',
|
||||
'./bazel_graph.analyze'
|
||||
])
|
||||
|
||||
self.graph_analyze = None
|
||||
for path in possible_paths:
|
||||
if os.path.exists(path):
|
||||
self.graph_analyze = path
|
||||
break
|
||||
|
||||
# Ensure the executable exists
|
||||
if not self.graph_analyze:
|
||||
self.skipTest(f"Graph analyze executable not found in any of these paths: {possible_paths}")
|
||||
|
||||
def run_graph_analyze(self, partition_refs):
|
||||
"""Run graph.analyze with the given partition references."""
|
||||
cmd = [self.graph_analyze] + partition_refs
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=os.getcwd())
|
||||
|
||||
if result.returncode != 0:
|
||||
self.fail(f"Graph analyze failed with return code {result.returncode}.\nStdout: {result.stdout}\nStderr: {result.stderr}")
|
||||
|
||||
# Parse the JSON output
|
||||
try:
|
||||
return json.loads(result.stdout)
|
||||
except json.JSONDecodeError as e:
|
||||
self.fail(f"Failed to parse JSON output: {e}\nOutput: {result.stdout}")
|
||||
|
||||
def test_single_color_report_dependencies(self):
|
||||
"""Test dependencies for a single color vote report."""
|
||||
partition_refs = ["color_vote_report/2024-01-15/red"]
|
||||
result = self.run_graph_analyze(partition_refs)
|
||||
self.assertIn('nodes', result)
|
||||
# TODO expand
|
||||
|
||||
def test_multiple_color_reports_same_date(self):
|
||||
"""Test dependencies when requesting multiple colors for the same date."""
|
||||
partition_refs = [
|
||||
"color_vote_report/2024-01-15/red",
|
||||
"color_vote_report/2024-01-15/blue"
|
||||
]
|
||||
result = self.run_graph_analyze(partition_refs)
|
||||
self.assertIn('nodes', result)
|
||||
# TODO expand
|
||||
|
||||
def test_multiple_dates_dependencies(self):
|
||||
"""Test dependencies when requesting reports for different dates."""
|
||||
partition_refs = [
|
||||
"color_vote_report/2024-01-15/red",
|
||||
"color_vote_report/2024-01-16/red"
|
||||
]
|
||||
result = self.run_graph_analyze(partition_refs)
|
||||
self.assertIn('nodes', result)
|
||||
# TODO expand
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -1,5 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from collections import defaultdict
|
||||
import sys
|
||||
import json
|
||||
|
||||
LABEL_BASE = "//databuild/test/app"
|
||||
|
||||
|
|
@ -21,3 +24,6 @@ if __name__ == "__main__":
|
|||
results = defaultdict(list)
|
||||
for raw_ref in sys.argv[1:]:
|
||||
results[lookup(raw_ref)].append(raw_ref)
|
||||
|
||||
# Output the results as JSON
|
||||
print(json.dumps(dict(results)))
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig
|
||||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig, DepType, DataDep
|
||||
from databuild.test.app.colors import COLORS
|
||||
from datetime import date
|
||||
|
||||
|
|
@ -29,7 +29,7 @@ def configure(outputs: list[PartitionRef]) -> JobConfigureResponse:
|
|||
|
||||
configs.append(JobConfig(
|
||||
outputs=[output],
|
||||
inputs=inputs,
|
||||
inputs=[DataDep(dep_type_code=DepType.MATERIALIZE, dep_type_name="materialize", partition_ref=ref) for ref in inputs],
|
||||
args=[],
|
||||
env={
|
||||
"DATA_DATE": data_date,
|
||||
|
|
|
|||
|
|
@ -3,17 +3,18 @@
|
|||
import sys
|
||||
import os
|
||||
import json
|
||||
from databuild.proto import PartitionRef
|
||||
from databuild.proto import PartitionRef, to_dict
|
||||
from databuild.test.app.jobs.aggregate_color_votes.config import configure
|
||||
from databuild.test.app.jobs.aggregate_color_votes.execute import execute
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.argv[1] == "config":
|
||||
response = configure([
|
||||
PartitionRef(str=raw_ref)
|
||||
for raw_ref in sys.argv[2:]
|
||||
])
|
||||
print(json.dumps(response.to_dict()))
|
||||
print(json.dumps(to_dict(response)))
|
||||
elif sys.argv[1] == "exec":
|
||||
execute(os.environ["DATA_DATE"], os.environ["AGGREGATE_TYPE"])
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ class TestAggregateColorVotesConfig(unittest.TestCase):
|
|||
# Check that inputs are from daily_color_votes
|
||||
for i, color in enumerate(COLORS):
|
||||
expected_input = f"daily_color_votes/2024-01-15/{color}"
|
||||
self.assertEqual(config.inputs[i].str, expected_input)
|
||||
self.assertEqual(config.inputs[i].partition_ref.str, expected_input)
|
||||
|
||||
def test_configure_weekly_votes(self):
|
||||
outputs = [PartitionRef(str="votes_1w/2024-01-21")]
|
||||
|
|
@ -31,7 +31,7 @@ class TestAggregateColorVotesConfig(unittest.TestCase):
|
|||
# Check that inputs are from color_votes_1w
|
||||
for i, color in enumerate(COLORS):
|
||||
expected_input = f"color_votes_1w/2024-01-21/{color}"
|
||||
self.assertEqual(config.inputs[i].str, expected_input)
|
||||
self.assertEqual(config.inputs[i].partition_ref.str, expected_input)
|
||||
|
||||
def test_configure_monthly_votes(self):
|
||||
outputs = [PartitionRef(str="votes_1m/2024-01-31")]
|
||||
|
|
@ -44,7 +44,7 @@ class TestAggregateColorVotesConfig(unittest.TestCase):
|
|||
# Check that inputs are from color_votes_1m
|
||||
for i, color in enumerate(COLORS):
|
||||
expected_input = f"color_votes_1m/2024-01-31/{color}"
|
||||
self.assertEqual(config.inputs[i].str, expected_input)
|
||||
self.assertEqual(config.inputs[i].partition_ref.str, expected_input)
|
||||
|
||||
def test_configure_multiple_outputs(self):
|
||||
outputs = [
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig
|
||||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig, DataDep, DepType
|
||||
from datetime import date
|
||||
from collections import defaultdict
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ def configure(outputs: list[PartitionRef]) -> JobConfigureResponse:
|
|||
# Single job config for all outputs - pass output partition refs as args
|
||||
config = JobConfig(
|
||||
outputs=outputs,
|
||||
inputs=inputs,
|
||||
inputs=[DataDep(dep_type_code=DepType.MATERIALIZE, dep_type_name="materialize", partition_ref=ref) for ref in inputs],
|
||||
args=[output.str for output in outputs],
|
||||
env={}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@
|
|||
import sys
|
||||
import os
|
||||
import json
|
||||
from databuild.proto import PartitionRef
|
||||
from databuild.proto import PartitionRef, to_dict
|
||||
from databuild.test.app.jobs.color_vote_report_calc.config import configure
|
||||
from databuild.test.app.jobs.color_vote_report_calc.execute import execute
|
||||
from betterproto2 import Casing, OutputFormat
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.argv[1] == "config":
|
||||
|
|
@ -13,7 +14,7 @@ if __name__ == "__main__":
|
|||
PartitionRef(str=raw_ref)
|
||||
for raw_ref in sys.argv[2:]
|
||||
])
|
||||
print(json.dumps(response.to_dict()))
|
||||
print(json.dumps(to_dict(response)))
|
||||
elif sys.argv[1] == "exec":
|
||||
execute(sys.argv[2:])
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ class TestColorVoteReportCalcConfig(unittest.TestCase):
|
|||
"color_votes_1w/2024-01-15/red",
|
||||
"color_votes_1m/2024-01-15/red"
|
||||
]
|
||||
actual_inputs = [inp.str for inp in config.inputs]
|
||||
actual_inputs = [inp.partition_ref.str for inp in config.inputs]
|
||||
for expected in expected_inputs:
|
||||
self.assertIn(expected, actual_inputs)
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ class TestColorVoteReportCalcConfig(unittest.TestCase):
|
|||
self.assertEqual(len(config.outputs), 2)
|
||||
|
||||
# Should have total vote inputs for both dates
|
||||
actual_inputs = [inp.str for inp in config.inputs]
|
||||
actual_inputs = [inp.partition_ref.str for inp in config.inputs]
|
||||
self.assertIn("daily_votes/2024-01-15", actual_inputs)
|
||||
self.assertIn("daily_votes/2024-01-16", actual_inputs)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@
|
|||
import sys
|
||||
import os
|
||||
import json
|
||||
from databuild.proto import PartitionRef
|
||||
from databuild.proto import PartitionRef, to_dict
|
||||
from databuild.test.app.jobs.ingest_color_votes.config import configure
|
||||
from databuild.test.app.jobs.ingest_color_votes.execute import execute
|
||||
from betterproto2 import Casing
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.argv[1] == "config":
|
||||
|
|
@ -13,7 +14,7 @@ if __name__ == "__main__":
|
|||
PartitionRef(str=raw_ref)
|
||||
for raw_ref in sys.argv[2:]
|
||||
])
|
||||
print(json.dumps(response.to_dict()))
|
||||
print(json.dumps(to_dict(response)))
|
||||
elif sys.argv[1] == "exec":
|
||||
execute(os.environ["DATA_DATE"], os.environ["COLOR"])
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig
|
||||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig, DepType, DataDep
|
||||
from datetime import date, timedelta
|
||||
from collections import defaultdict
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ def configure(outputs: list[PartitionRef]) -> JobConfigureResponse:
|
|||
|
||||
configs.append(JobConfig(
|
||||
outputs=output_partitions,
|
||||
inputs=inputs,
|
||||
inputs=[DataDep(dep_type_code=DepType.MATERIALIZE, dep_type_name="materialize", partition_ref=ref) for ref in inputs],
|
||||
args=[],
|
||||
env=env
|
||||
))
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@
|
|||
import sys
|
||||
import os
|
||||
import json
|
||||
from databuild.proto import PartitionRef
|
||||
from databuild.proto import PartitionRef, to_dict
|
||||
from databuild.test.app.jobs.trailing_color_votes.config import configure
|
||||
from databuild.test.app.jobs.trailing_color_votes.execute import execute
|
||||
from betterproto2 import Casing, OutputFormat
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.argv[1] == "config":
|
||||
|
|
@ -13,7 +14,7 @@ if __name__ == "__main__":
|
|||
PartitionRef(str=raw_ref)
|
||||
for raw_ref in sys.argv[2:]
|
||||
])
|
||||
print(json.dumps(response.to_dict()))
|
||||
print(json.dumps(to_dict(response)))
|
||||
elif sys.argv[1] == "exec":
|
||||
execute(os.environ["DATA_DATE"], os.environ["COLOR"])
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -25,29 +25,6 @@ pip.parse(
|
|||
)
|
||||
use_repo(pip, "pypi")
|
||||
|
||||
# Rules OCI - necessary for producing a docker container
|
||||
bazel_dep(name = "rules_oci", version = "2.2.6")
|
||||
# For testing, we also recommend https://registry.bazel.build/modules/container_structure_test
|
||||
|
||||
oci = use_extension("@rules_oci//oci:extensions.bzl", "oci")
|
||||
|
||||
# Declare external images you need to pull, for example:
|
||||
oci.pull(
|
||||
name = "debian",
|
||||
image = "docker.io/library/python",
|
||||
platforms = [
|
||||
"linux/arm64/v8",
|
||||
"linux/amd64",
|
||||
],
|
||||
# 'latest' is not reproducible, but it's convenient.
|
||||
# During the build we print a WARNING message that includes recommended 'digest' and 'platforms'
|
||||
# values which you can use here in place of 'tag' to pin for reproducibility.
|
||||
tag = "3.12-bookworm",
|
||||
)
|
||||
|
||||
# For each oci.pull call, repeat the "name" here to expose them as dependencies.
|
||||
use_repo(oci, "debian", "debian_linux_amd64", "debian_linux_arm64_v8")
|
||||
|
||||
# Platforms for specifying linux/arm
|
||||
bazel_dep(name = "platforms", version = "0.0.11")
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue