diff --git a/databuild/test/app/bazel/BUILD.bazel b/databuild/test/app/bazel/BUILD.bazel index 6a75e5c..ca87f55 100644 --- a/databuild/test/app/bazel/BUILD.bazel +++ b/databuild/test/app/bazel/BUILD.bazel @@ -65,6 +65,14 @@ py_test( ], ) +py_test( + name = "test_e2e", + srcs = ["test_e2e.py"], + data = [":bazel_graph.build"], + main = "test_e2e.py", + deps = [":job_src"], +) + # Bazel-defined ## Graph databuild_graph( diff --git a/databuild/test/app/bazel/graph/lookup.py b/databuild/test/app/bazel/graph/lookup.py index 2e5ee06..5c548e9 100644 --- a/databuild/test/app/bazel/graph/lookup.py +++ b/databuild/test/app/bazel/graph/lookup.py @@ -4,7 +4,7 @@ from collections import defaultdict import sys import json -LABEL_BASE = "//databuild/test/app" +LABEL_BASE = "//databuild/test/app/bazel" def lookup(raw_ref: str): diff --git a/databuild/test/app/bazel/test_e2e.py b/databuild/test/app/bazel/test_e2e.py new file mode 100644 index 0000000..b9e2688 --- /dev/null +++ b/databuild/test/app/bazel/test_e2e.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +End-to-end test for the bazel-defined test app. + +Tests the full pipeline: build execution -> output verification -> JSON validation. +""" + +import json +import os +import shutil +import subprocess +import time +import unittest +from pathlib import Path + + +class BazelE2ETest(unittest.TestCase): + """End-to-end test for the bazel test app.""" + + def setUp(self): + """Set up test environment.""" + self.output_dir = Path("/tmp/data/color_votes_1w/2025-09-01/red") + self.output_file = self.output_dir / "data.json" + self.partition_ref = "color_votes_1w/2025-09-01/red" + + # Clean up any existing test data + if self.output_dir.exists(): + shutil.rmtree(self.output_dir) + + def tearDown(self): + """Clean up test environment.""" + if self.output_dir.exists(): + shutil.rmtree(self.output_dir) + + def test_end_to_end_execution(self): + """Test full end-to-end execution of the bazel graph.""" + # Find the graph.build binary (following pattern from graph_test.py) + runfiles_dir = os.environ.get("RUNFILES_DIR") + test_srcdir = os.environ.get("TEST_SRCDIR") + + possible_paths = [] + if runfiles_dir: + possible_paths.append(os.path.join(runfiles_dir, '_main', 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build')) + possible_paths.append(os.path.join(runfiles_dir, 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build')) + + if test_srcdir: + possible_paths.append(os.path.join(test_srcdir, '_main', 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build')) + possible_paths.append(os.path.join(test_srcdir, 'databuild', 'test', 'app', 'bazel', 'bazel_graph.build')) + + # Fallback for local testing + possible_paths.extend([ + 'bazel-bin/databuild/test/app/bazel/bazel_graph.build', + './bazel_graph.build' + ]) + + graph_build_path = None + for path in possible_paths: + if os.path.exists(path): + graph_build_path = path + break + + self.assertIsNotNone(graph_build_path, + f"Graph build binary not found in any of: {possible_paths}") + + # Record start time for file modification check + start_time = time.time() + + # Execute the graph build (shell script) + result = subprocess.run( + ["bash", graph_build_path, self.partition_ref], + capture_output=True, + text=True + ) + + # Verify execution succeeded + self.assertEqual(result.returncode, 0, + f"Graph build failed with stderr: {result.stderr}") + + # Verify output file was created + self.assertTrue(self.output_file.exists(), + f"Output file {self.output_file} was not created") + + # Verify file was created recently (within 60 seconds) + file_mtime = os.path.getmtime(self.output_file) + time_diff = file_mtime - start_time + self.assertGreaterEqual(time_diff, -1, # Allow 1 second clock skew + f"File appears to be too old: {time_diff} seconds") + self.assertLessEqual(time_diff, 60, + f"File creation took too long: {time_diff} seconds") + + # Verify file contains valid JSON + with open(self.output_file, 'r') as f: + content = f.read() + + try: + data = json.loads(content) + except json.JSONDecodeError as e: + self.fail(f"Output file does not contain valid JSON: {e}") + + # Basic sanity check on JSON structure + self.assertIsInstance(data, (dict, list), + "JSON should be an object or array") + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file