Detect out of date generated source
This commit is contained in:
parent
952366ab66
commit
07d2a9faec
4 changed files with 157 additions and 1 deletions
|
|
@ -120,7 +120,7 @@ class DataBuildGraph:
|
|||
import os
|
||||
|
||||
# Get job classes from the lookup table
|
||||
job_classes = list(set(self.lookup.values()))
|
||||
job_classes = sorted(set(self.lookup.values()), key=lambda cls: cls.__name__)
|
||||
|
||||
# Format deps for BUILD.bazel
|
||||
if deps:
|
||||
|
|
@ -172,6 +172,15 @@ databuild_graph(
|
|||
lookup = ":{name}_job_lookup",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# Create tar archive of generated files for testing
|
||||
genrule(
|
||||
name = "existing_generated",
|
||||
srcs = glob(["*.py", "BUILD.bazel"]),
|
||||
outs = ["existing_generated.tar"],
|
||||
cmd = "mkdir -p temp && cp $(SRCS) temp/ && find temp -exec touch -t 197001010000 {{}} + && tar -cf $@ -C temp .",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
'''
|
||||
|
||||
with open(os.path.join(output_dir, "BUILD.bazel"), "w") as f:
|
||||
|
|
|
|||
|
|
@ -22,3 +22,33 @@ databuild_dsl_generator(
|
|||
deps = [":dsl_src"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# Generate fresh DSL output for comparison testing
|
||||
genrule(
|
||||
name = "generate_fresh_dsl",
|
||||
outs = ["generated_fresh.tar"],
|
||||
cmd_bash = """
|
||||
# Create temporary directory for generation
|
||||
mkdir -p temp_workspace/databuild/test/app/dsl
|
||||
|
||||
# Set environment to generate to temp directory
|
||||
export BUILD_WORKSPACE_DIRECTORY="temp_workspace"
|
||||
|
||||
# Run the generator
|
||||
$(location :graph.generate)
|
||||
|
||||
# Create tar archive of generated files
|
||||
if [ -d "temp_workspace/databuild/test/app/dsl/generated" ]; then
|
||||
find temp_workspace/databuild/test/app/dsl/generated -exec touch -t 197001010000 {} +
|
||||
tar -cf $@ -C temp_workspace/databuild/test/app/dsl/generated .
|
||||
else
|
||||
# Create empty tar if no files generated
|
||||
tar -cf $@ -T /dev/null
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -rf temp_workspace
|
||||
""",
|
||||
tools = [":graph.generate"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -73,3 +73,15 @@ py_test(
|
|||
"//databuild/test/app/dsl:dsl_src",
|
||||
],
|
||||
)
|
||||
|
||||
# DSL generation consistency test
|
||||
py_test(
|
||||
name = "test_dsl_generation_consistency",
|
||||
srcs = ["test_dsl_generation_consistency.py"],
|
||||
main = "test_dsl_generation_consistency.py",
|
||||
data = [
|
||||
"//databuild/test/app/dsl:generate_fresh_dsl",
|
||||
"//databuild/test/app/dsl/generated:existing_generated",
|
||||
],
|
||||
deps = [],
|
||||
)
|
||||
|
|
|
|||
105
databuild/test/app/dsl/test/test_dsl_generation_consistency.py
Normal file
105
databuild/test/app/dsl/test/test_dsl_generation_consistency.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test that verifies the generated DSL code is up-to-date.
|
||||
|
||||
This test ensures that the checked-in generated directory contents match
|
||||
exactly what would be produced by a fresh run of graph.generate.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class TestDSLGenerationConsistency(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Find the test runfiles directory to locate tar files
|
||||
runfiles_dir = os.environ.get("RUNFILES_DIR")
|
||||
if runfiles_dir:
|
||||
self.runfiles_root = Path(runfiles_dir) / "_main"
|
||||
else:
|
||||
# Fallback for development - not expected to work in this case
|
||||
self.fail("RUNFILES_DIR not set - test must be run via bazel test")
|
||||
|
||||
def _compute_tar_hash(self, tar_path: Path) -> str:
|
||||
"""Compute MD5 hash of a tar file's contents."""
|
||||
if not tar_path.exists():
|
||||
self.fail(f"Tar file not found: {tar_path}")
|
||||
|
||||
with open(tar_path, "rb") as f:
|
||||
content = f.read()
|
||||
return hashlib.md5(content).hexdigest()
|
||||
|
||||
def _extract_and_list_tar(self, tar_path: Path) -> set:
|
||||
"""Extract tar file and return set of file paths and their content hashes."""
|
||||
if not tar_path.exists():
|
||||
return set()
|
||||
|
||||
result = subprocess.run([
|
||||
"tar", "-tf", str(tar_path)
|
||||
], capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
self.fail(f"Failed to list tar contents: {result.stderr}")
|
||||
|
||||
return set(result.stdout.strip().split('\n')) if result.stdout.strip() else set()
|
||||
|
||||
def test_generated_code_is_up_to_date(self):
|
||||
"""Test that the existing generated tar matches the fresh generated tar."""
|
||||
|
||||
# Find the tar files from data dependencies
|
||||
existing_tar = self.runfiles_root / "databuild/test/app/dsl/generated/existing_generated.tar"
|
||||
fresh_tar = self.runfiles_root / "databuild/test/app/dsl/generated_fresh.tar"
|
||||
|
||||
# Compute hashes of both tar files
|
||||
existing_hash = self._compute_tar_hash(existing_tar)
|
||||
fresh_hash = self._compute_tar_hash(fresh_tar)
|
||||
|
||||
# Compare hashes
|
||||
if existing_hash != fresh_hash:
|
||||
# Provide detailed diff information
|
||||
existing_files = self._extract_and_list_tar(existing_tar)
|
||||
fresh_files = self._extract_and_list_tar(fresh_tar)
|
||||
|
||||
only_in_existing = existing_files - fresh_files
|
||||
only_in_fresh = fresh_files - existing_files
|
||||
|
||||
error_msg = [
|
||||
"Generated DSL code is out of date!",
|
||||
f"Existing tar hash: {existing_hash}",
|
||||
f"Fresh tar hash: {fresh_hash}",
|
||||
"",
|
||||
"To fix this, run:",
|
||||
" bazel run //databuild/test/app/dsl:graph.generate",
|
||||
""
|
||||
]
|
||||
|
||||
if only_in_existing:
|
||||
error_msg.extend([
|
||||
"Files only in existing generated code:",
|
||||
*[f" - {f}" for f in sorted(only_in_existing)],
|
||||
""
|
||||
])
|
||||
|
||||
if only_in_fresh:
|
||||
error_msg.extend([
|
||||
"Files only in fresh generated code:",
|
||||
*[f" + {f}" for f in sorted(only_in_fresh)],
|
||||
""
|
||||
])
|
||||
|
||||
common_files = existing_files & fresh_files
|
||||
if common_files:
|
||||
error_msg.extend([
|
||||
f"Common files: {len(common_files)}",
|
||||
"This suggests files have different contents.",
|
||||
])
|
||||
|
||||
self.fail("\n".join(error_msg))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Reference in a new issue