Add test app e2e test coverage for generated graph
This commit is contained in:
parent
f92cfeb9b5
commit
b3298e7213
7 changed files with 177 additions and 84 deletions
|
|
@ -1,9 +1,15 @@
|
||||||
py_library(
|
py_library(
|
||||||
name = "job_src",
|
name = "job_src",
|
||||||
srcs = glob(["**/*.py"]),
|
srcs = glob(["**/*.py"], exclude=["e2e_test_common.py"]),
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
"//databuild:py_proto",
|
"//databuild:py_proto",
|
||||||
"//databuild/dsl/python:dsl",
|
"//databuild/dsl/python:dsl",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
py_library(
|
||||||
|
name = "e2e_test_common",
|
||||||
|
srcs = ["e2e_test_common.py"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ py_test(
|
||||||
srcs = ["test_e2e.py"],
|
srcs = ["test_e2e.py"],
|
||||||
data = [":bazel_graph.build"],
|
data = [":bazel_graph.build"],
|
||||||
main = "test_e2e.py",
|
main = "test_e2e.py",
|
||||||
deps = [":job_src"],
|
deps = ["//databuild/test/app:e2e_test_common"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Bazel-defined
|
# Bazel-defined
|
||||||
|
|
|
||||||
|
|
@ -5,102 +5,33 @@ End-to-end test for the bazel-defined test app.
|
||||||
Tests the full pipeline: build execution -> output verification -> JSON validation.
|
Tests the full pipeline: build execution -> output verification -> JSON validation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
from databuild.test.app.e2e_test_common import DataBuildE2ETestBase
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import unittest
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
class BazelE2ETest(unittest.TestCase):
|
class BazelE2ETest(DataBuildE2ETestBase):
|
||||||
"""End-to-end test for the bazel test app."""
|
"""End-to-end test for the bazel-defined test app."""
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
"""Set up test environment."""
|
|
||||||
self.output_dir = Path("/tmp/data/color_votes_1w/2025-09-01/red")
|
|
||||||
self.output_file = self.output_dir / "data.json"
|
|
||||||
self.partition_ref = "color_votes_1w/2025-09-01/red"
|
|
||||||
|
|
||||||
# Clean up any existing test data
|
|
||||||
if self.output_dir.exists():
|
|
||||||
shutil.rmtree(self.output_dir)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
"""Clean up test environment."""
|
|
||||||
if self.output_dir.exists():
|
|
||||||
shutil.rmtree(self.output_dir)
|
|
||||||
|
|
||||||
def test_end_to_end_execution(self):
|
def test_end_to_end_execution(self):
|
||||||
"""Test full end-to-end execution of the bazel graph."""
|
"""Test full end-to-end execution of the bazel graph."""
|
||||||
# Find the graph.build binary (following pattern from graph_test.py)
|
# Build possible paths for the bazel graph build binary
|
||||||
runfiles_dir = os.environ.get("RUNFILES_DIR")
|
possible_paths = self.get_standard_runfiles_paths(
|
||||||
test_srcdir = os.environ.get("TEST_SRCDIR")
|
'databuild/test/app/bazel/bazel_graph.build'
|
||||||
|
)
|
||||||
|
|
||||||
possible_paths = []
|
# Add fallback paths for local testing
|
||||||
if runfiles_dir:
|
|
||||||
possible_paths.append(os.path.join(runfiles_dir, '_main', 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
|
||||||
possible_paths.append(os.path.join(runfiles_dir, 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
|
||||||
|
|
||||||
if test_srcdir:
|
|
||||||
possible_paths.append(os.path.join(test_srcdir, '_main', 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
|
||||||
possible_paths.append(os.path.join(test_srcdir, 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
|
||||||
|
|
||||||
# Fallback for local testing
|
|
||||||
possible_paths.extend([
|
possible_paths.extend([
|
||||||
'bazel-bin/databuild/test/app/bazel/bazel_graph.build',
|
'bazel-bin/databuild/test/app/bazel/bazel_graph.build',
|
||||||
'./bazel_graph.build'
|
'./bazel_graph.build'
|
||||||
])
|
])
|
||||||
|
|
||||||
graph_build_path = None
|
# Find the graph build binary
|
||||||
for path in possible_paths:
|
graph_build_path = self.find_graph_build_binary(possible_paths)
|
||||||
if os.path.exists(path):
|
|
||||||
graph_build_path = path
|
|
||||||
break
|
|
||||||
|
|
||||||
self.assertIsNotNone(graph_build_path,
|
# Execute and verify the graph build
|
||||||
f"Graph build binary not found in any of: {possible_paths}")
|
self.execute_and_verify_graph_build(graph_build_path)
|
||||||
|
|
||||||
# Record start time for file modification check
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
# Execute the graph build (shell script)
|
|
||||||
result = subprocess.run(
|
|
||||||
["bash", graph_build_path, self.partition_ref],
|
|
||||||
capture_output=True,
|
|
||||||
text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify execution succeeded
|
|
||||||
self.assertEqual(result.returncode, 0,
|
|
||||||
f"Graph build failed with stderr: {result.stderr}")
|
|
||||||
|
|
||||||
# Verify output file was created
|
|
||||||
self.assertTrue(self.output_file.exists(),
|
|
||||||
f"Output file {self.output_file} was not created")
|
|
||||||
|
|
||||||
# Verify file was created recently (within 60 seconds)
|
|
||||||
file_mtime = os.path.getmtime(self.output_file)
|
|
||||||
time_diff = file_mtime - start_time
|
|
||||||
self.assertGreaterEqual(time_diff, -1, # Allow 1 second clock skew
|
|
||||||
f"File appears to be too old: {time_diff} seconds")
|
|
||||||
self.assertLessEqual(time_diff, 60,
|
|
||||||
f"File creation took too long: {time_diff} seconds")
|
|
||||||
|
|
||||||
# Verify file contains valid JSON
|
|
||||||
with open(self.output_file, 'r') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
try:
|
|
||||||
data = json.loads(content)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
self.fail(f"Output file does not contain valid JSON: {e}")
|
|
||||||
|
|
||||||
# Basic sanity check on JSON structure
|
|
||||||
self.assertIsInstance(data, (dict, list),
|
|
||||||
"JSON should be an object or array")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
import unittest
|
||||||
unittest.main()
|
unittest.main()
|
||||||
9
databuild/test/app/dsl/claude-generated-dsl-test.md
Normal file
9
databuild/test/app/dsl/claude-generated-dsl-test.md
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
|
||||||
|
We can't write a direct `bazel test` for the DSL generated graph, because:
|
||||||
|
|
||||||
|
1. Bazel doesn't allow you to `bazel run graph.generate` to generate a BUILD.bazel that will be used in the same build.
|
||||||
|
2. We don't want to leak test generation into the graph generation code (since tests here are app specific)
|
||||||
|
|
||||||
|
Instead, we need to use a two phase process, where we rely on the graph to already be generated here, which will contain a test, such that `bazel test //...` will give us recall over generated source as well. This implies that this generated source is going to be checked in to git (gasp, I know), and we need a mechanism to ensure it stays up to date. To achieve this, we'll create a test that asserts that the contents of the `generated` dir is the exact same as the output of a new run of `graph.generate`.
|
||||||
|
|
||||||
|
Our task is to implement this test that asserts equality between the two, e.g. the target could depend on `graph.generate`, and in the test run it and md5 the results, comparing it to the md5 of the existing generated dir.
|
||||||
7
databuild/test/app/dsl/generated_test/BUILD.bazel
Normal file
7
databuild/test/app/dsl/generated_test/BUILD.bazel
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
py_test(
|
||||||
|
name = "test_e2e",
|
||||||
|
srcs = ["test_e2e.py"],
|
||||||
|
data = ["//databuild/test/app/dsl/generated:dsl_graph.build"],
|
||||||
|
main = "test_e2e.py",
|
||||||
|
deps = ["//databuild/test/app:e2e_test_common"],
|
||||||
|
)
|
||||||
37
databuild/test/app/dsl/generated_test/test_e2e.py
Normal file
37
databuild/test/app/dsl/generated_test/test_e2e.py
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
End-to-end test for the DSL-generated test app.
|
||||||
|
|
||||||
|
Tests the full pipeline: build execution -> output verification -> JSON validation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from databuild.test.app.e2e_test_common import DataBuildE2ETestBase
|
||||||
|
|
||||||
|
|
||||||
|
class DSLGeneratedE2ETest(DataBuildE2ETestBase):
|
||||||
|
"""End-to-end test for the DSL-generated test app."""
|
||||||
|
|
||||||
|
def test_end_to_end_execution(self):
|
||||||
|
"""Test full end-to-end execution of the DSL-generated graph."""
|
||||||
|
# Build possible paths for the DSL-generated graph build binary
|
||||||
|
possible_paths = self.get_standard_runfiles_paths(
|
||||||
|
'databuild/test/app/dsl/generated/dsl_graph.build'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add fallback paths for local testing
|
||||||
|
possible_paths.extend([
|
||||||
|
'bazel-bin/databuild/test/app/dsl/generated/dsl_graph.build',
|
||||||
|
'./dsl_graph.build'
|
||||||
|
])
|
||||||
|
|
||||||
|
# Find the graph build binary
|
||||||
|
graph_build_path = self.find_graph_build_binary(possible_paths)
|
||||||
|
|
||||||
|
# Execute and verify the graph build
|
||||||
|
self.execute_and_verify_graph_build(graph_build_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import unittest
|
||||||
|
unittest.main()
|
||||||
103
databuild/test/app/e2e_test_common.py
Normal file
103
databuild/test/app/e2e_test_common.py
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Common end-to-end test logic for DataBuild test apps.
|
||||||
|
|
||||||
|
Provides shared functionality for testing both bazel-defined and DSL-generated graphs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class DataBuildE2ETestBase(unittest.TestCase):
|
||||||
|
"""Base class for DataBuild end-to-end tests."""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test environment."""
|
||||||
|
self.output_dir = Path("/tmp/data/color_votes_1w/2025-09-01/red")
|
||||||
|
self.output_file = self.output_dir / "data.json"
|
||||||
|
self.partition_ref = "color_votes_1w/2025-09-01/red"
|
||||||
|
|
||||||
|
# Clean up any existing test data
|
||||||
|
if self.output_dir.exists():
|
||||||
|
shutil.rmtree(self.output_dir)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
"""Clean up test environment."""
|
||||||
|
if self.output_dir.exists():
|
||||||
|
shutil.rmtree(self.output_dir)
|
||||||
|
|
||||||
|
def find_graph_build_binary(self, possible_paths: List[str]) -> str:
|
||||||
|
"""Find the graph.build binary from a list of possible paths."""
|
||||||
|
graph_build_path = None
|
||||||
|
for path in possible_paths:
|
||||||
|
if os.path.exists(path):
|
||||||
|
graph_build_path = path
|
||||||
|
break
|
||||||
|
|
||||||
|
self.assertIsNotNone(graph_build_path,
|
||||||
|
f"Graph build binary not found in any of: {possible_paths}")
|
||||||
|
return graph_build_path
|
||||||
|
|
||||||
|
def execute_and_verify_graph_build(self, graph_build_path: str) -> None:
|
||||||
|
"""Execute the graph build and verify the results."""
|
||||||
|
# Record start time for file modification check
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Execute the graph build (shell script)
|
||||||
|
result = subprocess.run(
|
||||||
|
["bash", graph_build_path, self.partition_ref],
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify execution succeeded
|
||||||
|
self.assertEqual(result.returncode, 0,
|
||||||
|
f"Graph build failed with stderr: {result.stderr}")
|
||||||
|
|
||||||
|
# Verify output file was created
|
||||||
|
self.assertTrue(self.output_file.exists(),
|
||||||
|
f"Output file {self.output_file} was not created")
|
||||||
|
|
||||||
|
# Verify file was created recently (within 60 seconds)
|
||||||
|
file_mtime = os.path.getmtime(self.output_file)
|
||||||
|
time_diff = file_mtime - start_time
|
||||||
|
self.assertGreaterEqual(time_diff, -1, # Allow 1 second clock skew
|
||||||
|
f"File appears to be too old: {time_diff} seconds")
|
||||||
|
self.assertLessEqual(time_diff, 60,
|
||||||
|
f"File creation took too long: {time_diff} seconds")
|
||||||
|
|
||||||
|
# Verify file contains valid JSON
|
||||||
|
with open(self.output_file, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(content)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.fail(f"Output file does not contain valid JSON: {e}")
|
||||||
|
|
||||||
|
# Basic sanity check on JSON structure
|
||||||
|
self.assertIsInstance(data, (dict, list),
|
||||||
|
"JSON should be an object or array")
|
||||||
|
|
||||||
|
def get_standard_runfiles_paths(self, relative_path: str) -> List[str]:
|
||||||
|
"""Get standard list of possible runfiles paths for a binary."""
|
||||||
|
runfiles_dir = os.environ.get("RUNFILES_DIR")
|
||||||
|
test_srcdir = os.environ.get("TEST_SRCDIR")
|
||||||
|
|
||||||
|
possible_paths = []
|
||||||
|
if runfiles_dir:
|
||||||
|
possible_paths.append(os.path.join(runfiles_dir, '_main', relative_path))
|
||||||
|
possible_paths.append(os.path.join(runfiles_dir, relative_path))
|
||||||
|
|
||||||
|
if test_srcdir:
|
||||||
|
possible_paths.append(os.path.join(test_srcdir, '_main', relative_path))
|
||||||
|
possible_paths.append(os.path.join(test_srcdir, relative_path))
|
||||||
|
|
||||||
|
return possible_paths
|
||||||
Loading…
Reference in a new issue