Big bump
This commit is contained in:
parent
6d55d54267
commit
82e1d0eb26
21 changed files with 197 additions and 2873 deletions
|
|
@ -209,11 +209,11 @@ python.toolchain(
|
||||||
|
|
||||||
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
|
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
|
||||||
pip.parse(
|
pip.parse(
|
||||||
hub_name = "pypi",
|
hub_name = "databuild_pypi",
|
||||||
python_version = "3.13",
|
python_version = "3.13",
|
||||||
requirements_lock = "//:requirements_lock.txt",
|
requirements_lock = "//:requirements_lock.txt",
|
||||||
)
|
)
|
||||||
use_repo(pip, "pypi")
|
use_repo(pip, "databuild_pypi")
|
||||||
|
|
||||||
# OCI (Docker images)
|
# OCI (Docker images)
|
||||||
oci = use_extension("@rules_oci//oci:extensions.bzl", "oci")
|
oci = use_extension("@rules_oci//oci:extensions.bzl", "oci")
|
||||||
|
|
|
||||||
|
|
@ -150,7 +150,7 @@ py_binary(
|
||||||
srcs = ["proto_wrapper.py"],
|
srcs = ["proto_wrapper.py"],
|
||||||
main = "proto_wrapper.py",
|
main = "proto_wrapper.py",
|
||||||
deps = [
|
deps = [
|
||||||
"@pypi//betterproto2_compiler",
|
"@databuild_pypi//betterproto2_compiler",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -175,7 +175,7 @@ $(location @com_google_protobuf//:protoc) --python_betterproto2_out=$(GENDIR)/da
|
||||||
":protoc-gen-python_betterproto2",
|
":protoc-gen-python_betterproto2",
|
||||||
"//:ruff_binary",
|
"//:ruff_binary",
|
||||||
"@com_google_protobuf//:protoc",
|
"@com_google_protobuf//:protoc",
|
||||||
"@pypi//betterproto2_compiler",
|
"@databuild_pypi//betterproto2_compiler",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -187,8 +187,8 @@ py_library(
|
||||||
],
|
],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
"@pypi//betterproto2_compiler",
|
"@databuild_pypi//betterproto2_compiler",
|
||||||
"@pypi//grpcio",
|
"@databuild_pypi//grpcio",
|
||||||
"@pypi//pytest",
|
"@databuild_pypi//pytest",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,6 @@ py_test(
|
||||||
srcs = glob(["*.py"]),
|
srcs = glob(["*.py"]),
|
||||||
deps = [
|
deps = [
|
||||||
"//databuild/dsl/python:dsl",
|
"//databuild/dsl/python:dsl",
|
||||||
"@pypi//pytest",
|
"@databuild_pypi//pytest",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -79,8 +79,11 @@ fn resolve(output_refs: &[String]) -> Result<HashMap<String, Vec<String>>, Strin
|
||||||
.map_err(|e| format!("Failed to execute job lookup: {}", e))?;
|
.map_err(|e| format!("Failed to execute job lookup: {}", e))?;
|
||||||
|
|
||||||
if !output.status.success() {
|
if !output.status.success() {
|
||||||
|
error!("Job lookup failed: {}", output.status);
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
error!("Job lookup failed: {}", stderr);
|
error!("stderr: {}", stderr);
|
||||||
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||||
|
error!("stdout: {}", stdout);
|
||||||
return Err(format!("Failed to run job lookup: {}", stderr));
|
return Err(format!("Failed to run job lookup: {}", stderr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1 +1,11 @@
|
||||||
from databuild.py_proto_out.databuild.v1 import *
|
from databuild.py_proto_out.databuild.v1 import *
|
||||||
|
from betterproto2 import Casing, OutputFormat
|
||||||
|
|
||||||
|
|
||||||
|
def to_dict(d) -> dict:
|
||||||
|
"""Helper for creating proper dicts from protobuf derived dataclasses."""
|
||||||
|
return d.to_dict(
|
||||||
|
casing=Casing.SNAKE,
|
||||||
|
output_format=OutputFormat.PYTHON,
|
||||||
|
include_default_values=True
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
|
||||||
RUNFILES_PREFIX = """
|
RUNFILES_PREFIX = """
|
||||||
# ================= BEGIN RUNFILES INIT =================
|
# ================= BEGIN RUNFILES INIT =================
|
||||||
|
|
||||||
|
SCRIPT_PATH="$(realpath "$0")"
|
||||||
|
|
||||||
# TODO should this be extracted to shared init script
|
# TODO should this be extracted to shared init script
|
||||||
# Get the directory where the script is located
|
# Get the directory where the script is located
|
||||||
if [[ -z "${RUNFILES_DIR:-}" ]]; then
|
if [[ -z "${RUNFILES_DIR:-}" ]]; then
|
||||||
|
|
@ -71,6 +73,7 @@ def _databuild_job_cfg_impl(ctx):
|
||||||
output = script,
|
output = script,
|
||||||
substitutions = {
|
substitutions = {
|
||||||
"%{EXECUTABLE_PATH}": configure_path,
|
"%{EXECUTABLE_PATH}": configure_path,
|
||||||
|
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.configure.files_to_run.executable.short_path,
|
||||||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||||
"%{PREFIX}": "EXECUTABLE_SUBCOMMAND=\"config\"\n",
|
"%{PREFIX}": "EXECUTABLE_SUBCOMMAND=\"config\"\n",
|
||||||
},
|
},
|
||||||
|
|
@ -331,6 +334,7 @@ def _databuild_graph_lookup_impl(ctx):
|
||||||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||||
"%{PREFIX}": "",
|
"%{PREFIX}": "",
|
||||||
"%{EXECUTABLE_PATH}": ctx.attr.lookup.files_to_run.executable.path,
|
"%{EXECUTABLE_PATH}": ctx.attr.lookup.files_to_run.executable.path,
|
||||||
|
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.lookup.files_to_run.executable.short_path,
|
||||||
},
|
},
|
||||||
is_executable = True,
|
is_executable = True,
|
||||||
)
|
)
|
||||||
|
|
@ -399,6 +403,7 @@ export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
|
||||||
output = script,
|
output = script,
|
||||||
substitutions = {
|
substitutions = {
|
||||||
"%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path,
|
"%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path,
|
||||||
|
"%{EXECUTABLE_SHORT_PATH}": ctx.attr._analyze.files_to_run.executable.short_path,
|
||||||
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
||||||
"%{PREFIX}": script_prefix,
|
"%{PREFIX}": script_prefix,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,32 @@ set -e
|
||||||
|
|
||||||
%{PREFIX}
|
%{PREFIX}
|
||||||
|
|
||||||
EXECUTABLE_BINARY="$(rlocation "_main/$(basename "%{EXECUTABLE_PATH}")")"
|
# Check if rlocation function is available
|
||||||
|
if ! type rlocation >/dev/null 2>&1; then
|
||||||
|
echo "Error: rlocation function not available. Runfiles may not be properly initialized." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Resolve the executable using rlocation
|
||||||
|
EXECUTABLE_BINARY="$(rlocation "_main/%{EXECUTABLE_SHORT_PATH}")"
|
||||||
|
|
||||||
|
# Check if rlocation returned something
|
||||||
|
if [[ -z "${EXECUTABLE_BINARY}" ]]; then
|
||||||
|
echo "Error: rlocation returned empty result for '_main/%{EXECUTABLE_SHORT_PATH}'" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if the resolved binary exists
|
||||||
|
if [[ ! -f "${EXECUTABLE_BINARY}" ]]; then
|
||||||
|
echo "Error: Resolved executable '${EXECUTABLE_BINARY}' does not exist" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if the resolved binary is executable
|
||||||
|
if [[ ! -x "${EXECUTABLE_BINARY}" ]]; then
|
||||||
|
echo "Error: Resolved executable '${EXECUTABLE_BINARY}' is not executable" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# Run the configuration
|
# Run the configuration
|
||||||
if [[ -n "${EXECUTABLE_SUBCOMMAND:-}" ]]; then
|
if [[ -n "${EXECUTABLE_SUBCOMMAND:-}" ]]; then
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,17 @@ py_test(
|
||||||
deps = [":job_src"],
|
deps = [":job_src"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
py_test(
|
||||||
|
name = "test_graph_analysis",
|
||||||
|
srcs = ["graph/graph_test.py"],
|
||||||
|
main = "graph/graph_test.py",
|
||||||
|
data = [
|
||||||
|
":bazel_graph.analyze",
|
||||||
|
":bazel_graph_lookup",
|
||||||
|
],
|
||||||
|
deps = [":job_src"],
|
||||||
|
)
|
||||||
|
|
||||||
# Bazel-defined
|
# Bazel-defined
|
||||||
## Graph
|
## Graph
|
||||||
databuild_graph(
|
databuild_graph(
|
||||||
|
|
@ -51,8 +62,8 @@ databuild_graph(
|
||||||
|
|
||||||
py_binary(
|
py_binary(
|
||||||
name = "bazel_graph_lookup",
|
name = "bazel_graph_lookup",
|
||||||
srcs = ["lookup.py"],
|
srcs = ["graph/lookup.py"],
|
||||||
main = "lookup.py",
|
main = "graph/lookup.py",
|
||||||
)
|
)
|
||||||
|
|
||||||
## Ingest Color Votes
|
## Ingest Color Votes
|
||||||
|
|
|
||||||
91
databuild/test/app/graph/graph_test.py
Normal file
91
databuild/test/app/graph/graph_test.py
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Integration test for the databuild graph analysis.
|
||||||
|
|
||||||
|
This test verifies that when we request color vote reports, the graph analyzer
|
||||||
|
correctly identifies all upstream dependencies and jobs required.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import json
|
||||||
|
import unittest
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class GraphAnalysisTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# Determine the path to bazel_graph.analyze
|
||||||
|
# In bazel test, we need to find the executable in the runfiles
|
||||||
|
runfiles_dir = os.environ.get('RUNFILES_DIR')
|
||||||
|
test_srcdir = os.environ.get('TEST_SRCDIR')
|
||||||
|
|
||||||
|
possible_paths = []
|
||||||
|
if runfiles_dir:
|
||||||
|
possible_paths.append(os.path.join(runfiles_dir, '_main', 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||||
|
possible_paths.append(os.path.join(runfiles_dir, 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||||
|
|
||||||
|
if test_srcdir:
|
||||||
|
possible_paths.append(os.path.join(test_srcdir, '_main', 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||||
|
possible_paths.append(os.path.join(test_srcdir, 'databuild', 'test', 'app', 'bazel_graph.analyze'))
|
||||||
|
|
||||||
|
# Fallback for local testing
|
||||||
|
possible_paths.extend([
|
||||||
|
'bazel-bin/databuild/test/app/bazel_graph.analyze',
|
||||||
|
'./bazel_graph.analyze'
|
||||||
|
])
|
||||||
|
|
||||||
|
self.graph_analyze = None
|
||||||
|
for path in possible_paths:
|
||||||
|
if os.path.exists(path):
|
||||||
|
self.graph_analyze = path
|
||||||
|
break
|
||||||
|
|
||||||
|
# Ensure the executable exists
|
||||||
|
if not self.graph_analyze:
|
||||||
|
self.skipTest(f"Graph analyze executable not found in any of these paths: {possible_paths}")
|
||||||
|
|
||||||
|
def run_graph_analyze(self, partition_refs):
|
||||||
|
"""Run graph.analyze with the given partition references."""
|
||||||
|
cmd = [self.graph_analyze] + partition_refs
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, cwd=os.getcwd())
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
self.fail(f"Graph analyze failed with return code {result.returncode}.\nStdout: {result.stdout}\nStderr: {result.stderr}")
|
||||||
|
|
||||||
|
# Parse the JSON output
|
||||||
|
try:
|
||||||
|
return json.loads(result.stdout)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.fail(f"Failed to parse JSON output: {e}\nOutput: {result.stdout}")
|
||||||
|
|
||||||
|
def test_single_color_report_dependencies(self):
|
||||||
|
"""Test dependencies for a single color vote report."""
|
||||||
|
partition_refs = ["color_vote_report/2024-01-15/red"]
|
||||||
|
result = self.run_graph_analyze(partition_refs)
|
||||||
|
self.assertIn('nodes', result)
|
||||||
|
# TODO expand
|
||||||
|
|
||||||
|
def test_multiple_color_reports_same_date(self):
|
||||||
|
"""Test dependencies when requesting multiple colors for the same date."""
|
||||||
|
partition_refs = [
|
||||||
|
"color_vote_report/2024-01-15/red",
|
||||||
|
"color_vote_report/2024-01-15/blue"
|
||||||
|
]
|
||||||
|
result = self.run_graph_analyze(partition_refs)
|
||||||
|
self.assertIn('nodes', result)
|
||||||
|
# TODO expand
|
||||||
|
|
||||||
|
def test_multiple_dates_dependencies(self):
|
||||||
|
"""Test dependencies when requesting reports for different dates."""
|
||||||
|
partition_refs = [
|
||||||
|
"color_vote_report/2024-01-15/red",
|
||||||
|
"color_vote_report/2024-01-16/red"
|
||||||
|
]
|
||||||
|
result = self.run_graph_analyze(partition_refs)
|
||||||
|
self.assertIn('nodes', result)
|
||||||
|
# TODO expand
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
|
|
@ -1,5 +1,8 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import sys
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
LABEL_BASE = "//databuild/test/app"
|
LABEL_BASE = "//databuild/test/app"
|
||||||
|
|
||||||
|
|
@ -21,3 +24,6 @@ if __name__ == "__main__":
|
||||||
results = defaultdict(list)
|
results = defaultdict(list)
|
||||||
for raw_ref in sys.argv[1:]:
|
for raw_ref in sys.argv[1:]:
|
||||||
results[lookup(raw_ref)].append(raw_ref)
|
results[lookup(raw_ref)].append(raw_ref)
|
||||||
|
|
||||||
|
# Output the results as JSON
|
||||||
|
print(json.dumps(dict(results)))
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig
|
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig, DepType, DataDep
|
||||||
from databuild.test.app.colors import COLORS
|
from databuild.test.app.colors import COLORS
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
|
|
@ -29,7 +29,7 @@ def configure(outputs: list[PartitionRef]) -> JobConfigureResponse:
|
||||||
|
|
||||||
configs.append(JobConfig(
|
configs.append(JobConfig(
|
||||||
outputs=[output],
|
outputs=[output],
|
||||||
inputs=inputs,
|
inputs=[DataDep(dep_type_code=DepType.MATERIALIZE, dep_type_name="materialize", partition_ref=ref) for ref in inputs],
|
||||||
args=[],
|
args=[],
|
||||||
env={
|
env={
|
||||||
"DATA_DATE": data_date,
|
"DATA_DATE": data_date,
|
||||||
|
|
|
||||||
|
|
@ -3,17 +3,18 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from databuild.proto import PartitionRef
|
from databuild.proto import PartitionRef, to_dict
|
||||||
from databuild.test.app.jobs.aggregate_color_votes.config import configure
|
from databuild.test.app.jobs.aggregate_color_votes.config import configure
|
||||||
from databuild.test.app.jobs.aggregate_color_votes.execute import execute
|
from databuild.test.app.jobs.aggregate_color_votes.execute import execute
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if sys.argv[1] == "config":
|
if sys.argv[1] == "config":
|
||||||
response = configure([
|
response = configure([
|
||||||
PartitionRef(str=raw_ref)
|
PartitionRef(str=raw_ref)
|
||||||
for raw_ref in sys.argv[2:]
|
for raw_ref in sys.argv[2:]
|
||||||
])
|
])
|
||||||
print(json.dumps(response.to_dict()))
|
print(json.dumps(to_dict(response)))
|
||||||
elif sys.argv[1] == "exec":
|
elif sys.argv[1] == "exec":
|
||||||
execute(os.environ["DATA_DATE"], os.environ["AGGREGATE_TYPE"])
|
execute(os.environ["DATA_DATE"], os.environ["AGGREGATE_TYPE"])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ class TestAggregateColorVotesConfig(unittest.TestCase):
|
||||||
# Check that inputs are from daily_color_votes
|
# Check that inputs are from daily_color_votes
|
||||||
for i, color in enumerate(COLORS):
|
for i, color in enumerate(COLORS):
|
||||||
expected_input = f"daily_color_votes/2024-01-15/{color}"
|
expected_input = f"daily_color_votes/2024-01-15/{color}"
|
||||||
self.assertEqual(config.inputs[i].str, expected_input)
|
self.assertEqual(config.inputs[i].partition_ref.str, expected_input)
|
||||||
|
|
||||||
def test_configure_weekly_votes(self):
|
def test_configure_weekly_votes(self):
|
||||||
outputs = [PartitionRef(str="votes_1w/2024-01-21")]
|
outputs = [PartitionRef(str="votes_1w/2024-01-21")]
|
||||||
|
|
@ -31,7 +31,7 @@ class TestAggregateColorVotesConfig(unittest.TestCase):
|
||||||
# Check that inputs are from color_votes_1w
|
# Check that inputs are from color_votes_1w
|
||||||
for i, color in enumerate(COLORS):
|
for i, color in enumerate(COLORS):
|
||||||
expected_input = f"color_votes_1w/2024-01-21/{color}"
|
expected_input = f"color_votes_1w/2024-01-21/{color}"
|
||||||
self.assertEqual(config.inputs[i].str, expected_input)
|
self.assertEqual(config.inputs[i].partition_ref.str, expected_input)
|
||||||
|
|
||||||
def test_configure_monthly_votes(self):
|
def test_configure_monthly_votes(self):
|
||||||
outputs = [PartitionRef(str="votes_1m/2024-01-31")]
|
outputs = [PartitionRef(str="votes_1m/2024-01-31")]
|
||||||
|
|
@ -44,7 +44,7 @@ class TestAggregateColorVotesConfig(unittest.TestCase):
|
||||||
# Check that inputs are from color_votes_1m
|
# Check that inputs are from color_votes_1m
|
||||||
for i, color in enumerate(COLORS):
|
for i, color in enumerate(COLORS):
|
||||||
expected_input = f"color_votes_1m/2024-01-31/{color}"
|
expected_input = f"color_votes_1m/2024-01-31/{color}"
|
||||||
self.assertEqual(config.inputs[i].str, expected_input)
|
self.assertEqual(config.inputs[i].partition_ref.str, expected_input)
|
||||||
|
|
||||||
def test_configure_multiple_outputs(self):
|
def test_configure_multiple_outputs(self):
|
||||||
outputs = [
|
outputs = [
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig
|
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig, DataDep, DepType
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
@ -40,7 +40,7 @@ def configure(outputs: list[PartitionRef]) -> JobConfigureResponse:
|
||||||
# Single job config for all outputs - pass output partition refs as args
|
# Single job config for all outputs - pass output partition refs as args
|
||||||
config = JobConfig(
|
config = JobConfig(
|
||||||
outputs=outputs,
|
outputs=outputs,
|
||||||
inputs=inputs,
|
inputs=[DataDep(dep_type_code=DepType.MATERIALIZE, dep_type_name="materialize", partition_ref=ref) for ref in inputs],
|
||||||
args=[output.str for output in outputs],
|
args=[output.str for output in outputs],
|
||||||
env={}
|
env={}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,10 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from databuild.proto import PartitionRef
|
from databuild.proto import PartitionRef, to_dict
|
||||||
from databuild.test.app.jobs.color_vote_report_calc.config import configure
|
from databuild.test.app.jobs.color_vote_report_calc.config import configure
|
||||||
from databuild.test.app.jobs.color_vote_report_calc.execute import execute
|
from databuild.test.app.jobs.color_vote_report_calc.execute import execute
|
||||||
|
from betterproto2 import Casing, OutputFormat
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if sys.argv[1] == "config":
|
if sys.argv[1] == "config":
|
||||||
|
|
@ -13,7 +14,7 @@ if __name__ == "__main__":
|
||||||
PartitionRef(str=raw_ref)
|
PartitionRef(str=raw_ref)
|
||||||
for raw_ref in sys.argv[2:]
|
for raw_ref in sys.argv[2:]
|
||||||
])
|
])
|
||||||
print(json.dumps(response.to_dict()))
|
print(json.dumps(to_dict(response)))
|
||||||
elif sys.argv[1] == "exec":
|
elif sys.argv[1] == "exec":
|
||||||
execute(sys.argv[2:])
|
execute(sys.argv[2:])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ class TestColorVoteReportCalcConfig(unittest.TestCase):
|
||||||
"color_votes_1w/2024-01-15/red",
|
"color_votes_1w/2024-01-15/red",
|
||||||
"color_votes_1m/2024-01-15/red"
|
"color_votes_1m/2024-01-15/red"
|
||||||
]
|
]
|
||||||
actual_inputs = [inp.str for inp in config.inputs]
|
actual_inputs = [inp.partition_ref.str for inp in config.inputs]
|
||||||
for expected in expected_inputs:
|
for expected in expected_inputs:
|
||||||
self.assertIn(expected, actual_inputs)
|
self.assertIn(expected, actual_inputs)
|
||||||
|
|
||||||
|
|
@ -52,7 +52,7 @@ class TestColorVoteReportCalcConfig(unittest.TestCase):
|
||||||
self.assertEqual(len(config.outputs), 2)
|
self.assertEqual(len(config.outputs), 2)
|
||||||
|
|
||||||
# Should have total vote inputs for both dates
|
# Should have total vote inputs for both dates
|
||||||
actual_inputs = [inp.str for inp in config.inputs]
|
actual_inputs = [inp.partition_ref.str for inp in config.inputs]
|
||||||
self.assertIn("daily_votes/2024-01-15", actual_inputs)
|
self.assertIn("daily_votes/2024-01-15", actual_inputs)
|
||||||
self.assertIn("daily_votes/2024-01-16", actual_inputs)
|
self.assertIn("daily_votes/2024-01-16", actual_inputs)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,10 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from databuild.proto import PartitionRef
|
from databuild.proto import PartitionRef, to_dict
|
||||||
from databuild.test.app.jobs.ingest_color_votes.config import configure
|
from databuild.test.app.jobs.ingest_color_votes.config import configure
|
||||||
from databuild.test.app.jobs.ingest_color_votes.execute import execute
|
from databuild.test.app.jobs.ingest_color_votes.execute import execute
|
||||||
|
from betterproto2 import Casing
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if sys.argv[1] == "config":
|
if sys.argv[1] == "config":
|
||||||
|
|
@ -13,7 +14,7 @@ if __name__ == "__main__":
|
||||||
PartitionRef(str=raw_ref)
|
PartitionRef(str=raw_ref)
|
||||||
for raw_ref in sys.argv[2:]
|
for raw_ref in sys.argv[2:]
|
||||||
])
|
])
|
||||||
print(json.dumps(response.to_dict()))
|
print(json.dumps(to_dict(response)))
|
||||||
elif sys.argv[1] == "exec":
|
elif sys.argv[1] == "exec":
|
||||||
execute(os.environ["DATA_DATE"], os.environ["COLOR"])
|
execute(os.environ["DATA_DATE"], os.environ["COLOR"])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig
|
from databuild.proto import PartitionRef, JobConfigureResponse, JobConfig, DepType, DataDep
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
@ -41,7 +41,7 @@ def configure(outputs: list[PartitionRef]) -> JobConfigureResponse:
|
||||||
|
|
||||||
configs.append(JobConfig(
|
configs.append(JobConfig(
|
||||||
outputs=output_partitions,
|
outputs=output_partitions,
|
||||||
inputs=inputs,
|
inputs=[DataDep(dep_type_code=DepType.MATERIALIZE, dep_type_name="materialize", partition_ref=ref) for ref in inputs],
|
||||||
args=[],
|
args=[],
|
||||||
env=env
|
env=env
|
||||||
))
|
))
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,10 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from databuild.proto import PartitionRef
|
from databuild.proto import PartitionRef, to_dict
|
||||||
from databuild.test.app.jobs.trailing_color_votes.config import configure
|
from databuild.test.app.jobs.trailing_color_votes.config import configure
|
||||||
from databuild.test.app.jobs.trailing_color_votes.execute import execute
|
from databuild.test.app.jobs.trailing_color_votes.execute import execute
|
||||||
|
from betterproto2 import Casing, OutputFormat
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if sys.argv[1] == "config":
|
if sys.argv[1] == "config":
|
||||||
|
|
@ -13,7 +14,7 @@ if __name__ == "__main__":
|
||||||
PartitionRef(str=raw_ref)
|
PartitionRef(str=raw_ref)
|
||||||
for raw_ref in sys.argv[2:]
|
for raw_ref in sys.argv[2:]
|
||||||
])
|
])
|
||||||
print(json.dumps(response.to_dict()))
|
print(json.dumps(to_dict(response)))
|
||||||
elif sys.argv[1] == "exec":
|
elif sys.argv[1] == "exec":
|
||||||
execute(os.environ["DATA_DATE"], os.environ["COLOR"])
|
execute(os.environ["DATA_DATE"], os.environ["COLOR"])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -25,29 +25,6 @@ pip.parse(
|
||||||
)
|
)
|
||||||
use_repo(pip, "pypi")
|
use_repo(pip, "pypi")
|
||||||
|
|
||||||
# Rules OCI - necessary for producing a docker container
|
|
||||||
bazel_dep(name = "rules_oci", version = "2.2.6")
|
|
||||||
# For testing, we also recommend https://registry.bazel.build/modules/container_structure_test
|
|
||||||
|
|
||||||
oci = use_extension("@rules_oci//oci:extensions.bzl", "oci")
|
|
||||||
|
|
||||||
# Declare external images you need to pull, for example:
|
|
||||||
oci.pull(
|
|
||||||
name = "debian",
|
|
||||||
image = "docker.io/library/python",
|
|
||||||
platforms = [
|
|
||||||
"linux/arm64/v8",
|
|
||||||
"linux/amd64",
|
|
||||||
],
|
|
||||||
# 'latest' is not reproducible, but it's convenient.
|
|
||||||
# During the build we print a WARNING message that includes recommended 'digest' and 'platforms'
|
|
||||||
# values which you can use here in place of 'tag' to pin for reproducibility.
|
|
||||||
tag = "3.12-bookworm",
|
|
||||||
)
|
|
||||||
|
|
||||||
# For each oci.pull call, repeat the "name" here to expose them as dependencies.
|
|
||||||
use_repo(oci, "debian", "debian_linux_amd64", "debian_linux_arm64_v8")
|
|
||||||
|
|
||||||
# Platforms for specifying linux/arm
|
# Platforms for specifying linux/arm
|
||||||
bazel_dep(name = "platforms", version = "0.0.11")
|
bazel_dep(name = "platforms", version = "0.0.11")
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue