Add test app e2e test coverage for generated graph
This commit is contained in:
parent
f92cfeb9b5
commit
b3298e7213
7 changed files with 177 additions and 84 deletions
|
|
@ -1,9 +1,15 @@
|
|||
py_library(
|
||||
name = "job_src",
|
||||
srcs = glob(["**/*.py"]),
|
||||
srcs = glob(["**/*.py"], exclude=["e2e_test_common.py"]),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//databuild:py_proto",
|
||||
"//databuild/dsl/python:dsl",
|
||||
],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "e2e_test_common",
|
||||
srcs = ["e2e_test_common.py"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ py_test(
|
|||
srcs = ["test_e2e.py"],
|
||||
data = [":bazel_graph.build"],
|
||||
main = "test_e2e.py",
|
||||
deps = [":job_src"],
|
||||
deps = ["//databuild/test/app:e2e_test_common"],
|
||||
)
|
||||
|
||||
# Bazel-defined
|
||||
|
|
|
|||
|
|
@ -5,102 +5,33 @@ End-to-end test for the bazel-defined test app.
|
|||
Tests the full pipeline: build execution -> output verification -> JSON validation.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from databuild.test.app.e2e_test_common import DataBuildE2ETestBase
|
||||
|
||||
|
||||
class BazelE2ETest(unittest.TestCase):
|
||||
"""End-to-end test for the bazel test app."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.output_dir = Path("/tmp/data/color_votes_1w/2025-09-01/red")
|
||||
self.output_file = self.output_dir / "data.json"
|
||||
self.partition_ref = "color_votes_1w/2025-09-01/red"
|
||||
|
||||
# Clean up any existing test data
|
||||
if self.output_dir.exists():
|
||||
shutil.rmtree(self.output_dir)
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up test environment."""
|
||||
if self.output_dir.exists():
|
||||
shutil.rmtree(self.output_dir)
|
||||
class BazelE2ETest(DataBuildE2ETestBase):
|
||||
"""End-to-end test for the bazel-defined test app."""
|
||||
|
||||
def test_end_to_end_execution(self):
|
||||
"""Test full end-to-end execution of the bazel graph."""
|
||||
# Find the graph.build binary (following pattern from graph_test.py)
|
||||
runfiles_dir = os.environ.get("RUNFILES_DIR")
|
||||
test_srcdir = os.environ.get("TEST_SRCDIR")
|
||||
# Build possible paths for the bazel graph build binary
|
||||
possible_paths = self.get_standard_runfiles_paths(
|
||||
'databuild/test/app/bazel/bazel_graph.build'
|
||||
)
|
||||
|
||||
possible_paths = []
|
||||
if runfiles_dir:
|
||||
possible_paths.append(os.path.join(runfiles_dir, '_main', 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
||||
possible_paths.append(os.path.join(runfiles_dir, 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
||||
|
||||
if test_srcdir:
|
||||
possible_paths.append(os.path.join(test_srcdir, '_main', 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
||||
possible_paths.append(os.path.join(test_srcdir, 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build'))
|
||||
|
||||
# Fallback for local testing
|
||||
# Add fallback paths for local testing
|
||||
possible_paths.extend([
|
||||
'bazel-bin/databuild/test/app/bazel/bazel_graph.build',
|
||||
'./bazel_graph.build'
|
||||
])
|
||||
|
||||
graph_build_path = None
|
||||
for path in possible_paths:
|
||||
if os.path.exists(path):
|
||||
graph_build_path = path
|
||||
break
|
||||
# Find the graph build binary
|
||||
graph_build_path = self.find_graph_build_binary(possible_paths)
|
||||
|
||||
self.assertIsNotNone(graph_build_path,
|
||||
f"Graph build binary not found in any of: {possible_paths}")
|
||||
|
||||
# Record start time for file modification check
|
||||
start_time = time.time()
|
||||
|
||||
# Execute the graph build (shell script)
|
||||
result = subprocess.run(
|
||||
["bash", graph_build_path, self.partition_ref],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Verify execution succeeded
|
||||
self.assertEqual(result.returncode, 0,
|
||||
f"Graph build failed with stderr: {result.stderr}")
|
||||
|
||||
# Verify output file was created
|
||||
self.assertTrue(self.output_file.exists(),
|
||||
f"Output file {self.output_file} was not created")
|
||||
|
||||
# Verify file was created recently (within 60 seconds)
|
||||
file_mtime = os.path.getmtime(self.output_file)
|
||||
time_diff = file_mtime - start_time
|
||||
self.assertGreaterEqual(time_diff, -1, # Allow 1 second clock skew
|
||||
f"File appears to be too old: {time_diff} seconds")
|
||||
self.assertLessEqual(time_diff, 60,
|
||||
f"File creation took too long: {time_diff} seconds")
|
||||
|
||||
# Verify file contains valid JSON
|
||||
with open(self.output_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
try:
|
||||
data = json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
self.fail(f"Output file does not contain valid JSON: {e}")
|
||||
|
||||
# Basic sanity check on JSON structure
|
||||
self.assertIsInstance(data, (dict, list),
|
||||
"JSON should be an object or array")
|
||||
# Execute and verify the graph build
|
||||
self.execute_and_verify_graph_build(graph_build_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import unittest
|
||||
unittest.main()
|
||||
9
databuild/test/app/dsl/claude-generated-dsl-test.md
Normal file
9
databuild/test/app/dsl/claude-generated-dsl-test.md
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
|
||||
We can't write a direct `bazel test` for the DSL generated graph, because:
|
||||
|
||||
1. Bazel doesn't allow you to `bazel run graph.generate` to generate a BUILD.bazel that will be used in the same build.
|
||||
2. We don't want to leak test generation into the graph generation code (since tests here are app specific)
|
||||
|
||||
Instead, we need to use a two phase process, where we rely on the graph to already be generated here, which will contain a test, such that `bazel test //...` will give us recall over generated source as well. This implies that this generated source is going to be checked in to git (gasp, I know), and we need a mechanism to ensure it stays up to date. To achieve this, we'll create a test that asserts that the contents of the `generated` dir is the exact same as the output of a new run of `graph.generate`.
|
||||
|
||||
Our task is to implement this test that asserts equality between the two, e.g. the target could depend on `graph.generate`, and in the test run it and md5 the results, comparing it to the md5 of the existing generated dir.
|
||||
7
databuild/test/app/dsl/generated_test/BUILD.bazel
Normal file
7
databuild/test/app/dsl/generated_test/BUILD.bazel
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
py_test(
|
||||
name = "test_e2e",
|
||||
srcs = ["test_e2e.py"],
|
||||
data = ["//databuild/test/app/dsl/generated:dsl_graph.build"],
|
||||
main = "test_e2e.py",
|
||||
deps = ["//databuild/test/app:e2e_test_common"],
|
||||
)
|
||||
37
databuild/test/app/dsl/generated_test/test_e2e.py
Normal file
37
databuild/test/app/dsl/generated_test/test_e2e.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
End-to-end test for the DSL-generated test app.
|
||||
|
||||
Tests the full pipeline: build execution -> output verification -> JSON validation.
|
||||
"""
|
||||
|
||||
import os
|
||||
from databuild.test.app.e2e_test_common import DataBuildE2ETestBase
|
||||
|
||||
|
||||
class DSLGeneratedE2ETest(DataBuildE2ETestBase):
|
||||
"""End-to-end test for the DSL-generated test app."""
|
||||
|
||||
def test_end_to_end_execution(self):
|
||||
"""Test full end-to-end execution of the DSL-generated graph."""
|
||||
# Build possible paths for the DSL-generated graph build binary
|
||||
possible_paths = self.get_standard_runfiles_paths(
|
||||
'databuild/test/app/dsl/generated/dsl_graph.build'
|
||||
)
|
||||
|
||||
# Add fallback paths for local testing
|
||||
possible_paths.extend([
|
||||
'bazel-bin/databuild/test/app/dsl/generated/dsl_graph.build',
|
||||
'./dsl_graph.build'
|
||||
])
|
||||
|
||||
# Find the graph build binary
|
||||
graph_build_path = self.find_graph_build_binary(possible_paths)
|
||||
|
||||
# Execute and verify the graph build
|
||||
self.execute_and_verify_graph_build(graph_build_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import unittest
|
||||
unittest.main()
|
||||
103
databuild/test/app/e2e_test_common.py
Normal file
103
databuild/test/app/e2e_test_common.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Common end-to-end test logic for DataBuild test apps.
|
||||
|
||||
Provides shared functionality for testing both bazel-defined and DSL-generated graphs.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class DataBuildE2ETestBase(unittest.TestCase):
|
||||
"""Base class for DataBuild end-to-end tests."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.output_dir = Path("/tmp/data/color_votes_1w/2025-09-01/red")
|
||||
self.output_file = self.output_dir / "data.json"
|
||||
self.partition_ref = "color_votes_1w/2025-09-01/red"
|
||||
|
||||
# Clean up any existing test data
|
||||
if self.output_dir.exists():
|
||||
shutil.rmtree(self.output_dir)
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up test environment."""
|
||||
if self.output_dir.exists():
|
||||
shutil.rmtree(self.output_dir)
|
||||
|
||||
def find_graph_build_binary(self, possible_paths: List[str]) -> str:
|
||||
"""Find the graph.build binary from a list of possible paths."""
|
||||
graph_build_path = None
|
||||
for path in possible_paths:
|
||||
if os.path.exists(path):
|
||||
graph_build_path = path
|
||||
break
|
||||
|
||||
self.assertIsNotNone(graph_build_path,
|
||||
f"Graph build binary not found in any of: {possible_paths}")
|
||||
return graph_build_path
|
||||
|
||||
def execute_and_verify_graph_build(self, graph_build_path: str) -> None:
|
||||
"""Execute the graph build and verify the results."""
|
||||
# Record start time for file modification check
|
||||
start_time = time.time()
|
||||
|
||||
# Execute the graph build (shell script)
|
||||
result = subprocess.run(
|
||||
["bash", graph_build_path, self.partition_ref],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Verify execution succeeded
|
||||
self.assertEqual(result.returncode, 0,
|
||||
f"Graph build failed with stderr: {result.stderr}")
|
||||
|
||||
# Verify output file was created
|
||||
self.assertTrue(self.output_file.exists(),
|
||||
f"Output file {self.output_file} was not created")
|
||||
|
||||
# Verify file was created recently (within 60 seconds)
|
||||
file_mtime = os.path.getmtime(self.output_file)
|
||||
time_diff = file_mtime - start_time
|
||||
self.assertGreaterEqual(time_diff, -1, # Allow 1 second clock skew
|
||||
f"File appears to be too old: {time_diff} seconds")
|
||||
self.assertLessEqual(time_diff, 60,
|
||||
f"File creation took too long: {time_diff} seconds")
|
||||
|
||||
# Verify file contains valid JSON
|
||||
with open(self.output_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
try:
|
||||
data = json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
self.fail(f"Output file does not contain valid JSON: {e}")
|
||||
|
||||
# Basic sanity check on JSON structure
|
||||
self.assertIsInstance(data, (dict, list),
|
||||
"JSON should be an object or array")
|
||||
|
||||
def get_standard_runfiles_paths(self, relative_path: str) -> List[str]:
|
||||
"""Get standard list of possible runfiles paths for a binary."""
|
||||
runfiles_dir = os.environ.get("RUNFILES_DIR")
|
||||
test_srcdir = os.environ.get("TEST_SRCDIR")
|
||||
|
||||
possible_paths = []
|
||||
if runfiles_dir:
|
||||
possible_paths.append(os.path.join(runfiles_dir, '_main', relative_path))
|
||||
possible_paths.append(os.path.join(runfiles_dir, relative_path))
|
||||
|
||||
if test_srcdir:
|
||||
possible_paths.append(os.path.join(test_srcdir, '_main', relative_path))
|
||||
possible_paths.append(os.path.join(test_srcdir, relative_path))
|
||||
|
||||
return possible_paths
|
||||
Loading…
Reference in a new issue