Detect out of date generated source
This commit is contained in:
parent
952366ab66
commit
07d2a9faec
4 changed files with 157 additions and 1 deletions
|
|
@ -120,7 +120,7 @@ class DataBuildGraph:
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# Get job classes from the lookup table
|
# Get job classes from the lookup table
|
||||||
job_classes = list(set(self.lookup.values()))
|
job_classes = sorted(set(self.lookup.values()), key=lambda cls: cls.__name__)
|
||||||
|
|
||||||
# Format deps for BUILD.bazel
|
# Format deps for BUILD.bazel
|
||||||
if deps:
|
if deps:
|
||||||
|
|
@ -172,6 +172,15 @@ databuild_graph(
|
||||||
lookup = ":{name}_job_lookup",
|
lookup = ":{name}_job_lookup",
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Create tar archive of generated files for testing
|
||||||
|
genrule(
|
||||||
|
name = "existing_generated",
|
||||||
|
srcs = glob(["*.py", "BUILD.bazel"]),
|
||||||
|
outs = ["existing_generated.tar"],
|
||||||
|
cmd = "mkdir -p temp && cp $(SRCS) temp/ && find temp -exec touch -t 197001010000 {{}} + && tar -cf $@ -C temp .",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
with open(os.path.join(output_dir, "BUILD.bazel"), "w") as f:
|
with open(os.path.join(output_dir, "BUILD.bazel"), "w") as f:
|
||||||
|
|
|
||||||
|
|
@ -22,3 +22,33 @@ databuild_dsl_generator(
|
||||||
deps = [":dsl_src"],
|
deps = [":dsl_src"],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Generate fresh DSL output for comparison testing
|
||||||
|
genrule(
|
||||||
|
name = "generate_fresh_dsl",
|
||||||
|
outs = ["generated_fresh.tar"],
|
||||||
|
cmd_bash = """
|
||||||
|
# Create temporary directory for generation
|
||||||
|
mkdir -p temp_workspace/databuild/test/app/dsl
|
||||||
|
|
||||||
|
# Set environment to generate to temp directory
|
||||||
|
export BUILD_WORKSPACE_DIRECTORY="temp_workspace"
|
||||||
|
|
||||||
|
# Run the generator
|
||||||
|
$(location :graph.generate)
|
||||||
|
|
||||||
|
# Create tar archive of generated files
|
||||||
|
if [ -d "temp_workspace/databuild/test/app/dsl/generated" ]; then
|
||||||
|
find temp_workspace/databuild/test/app/dsl/generated -exec touch -t 197001010000 {} +
|
||||||
|
tar -cf $@ -C temp_workspace/databuild/test/app/dsl/generated .
|
||||||
|
else
|
||||||
|
# Create empty tar if no files generated
|
||||||
|
tar -cf $@ -T /dev/null
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
rm -rf temp_workspace
|
||||||
|
""",
|
||||||
|
tools = [":graph.generate"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -73,3 +73,15 @@ py_test(
|
||||||
"//databuild/test/app/dsl:dsl_src",
|
"//databuild/test/app/dsl:dsl_src",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# DSL generation consistency test
|
||||||
|
py_test(
|
||||||
|
name = "test_dsl_generation_consistency",
|
||||||
|
srcs = ["test_dsl_generation_consistency.py"],
|
||||||
|
main = "test_dsl_generation_consistency.py",
|
||||||
|
data = [
|
||||||
|
"//databuild/test/app/dsl:generate_fresh_dsl",
|
||||||
|
"//databuild/test/app/dsl/generated:existing_generated",
|
||||||
|
],
|
||||||
|
deps = [],
|
||||||
|
)
|
||||||
|
|
|
||||||
105
databuild/test/app/dsl/test/test_dsl_generation_consistency.py
Normal file
105
databuild/test/app/dsl/test/test_dsl_generation_consistency.py
Normal file
|
|
@ -0,0 +1,105 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test that verifies the generated DSL code is up-to-date.
|
||||||
|
|
||||||
|
This test ensures that the checked-in generated directory contents match
|
||||||
|
exactly what would be produced by a fresh run of graph.generate.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class TestDSLGenerationConsistency(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# Find the test runfiles directory to locate tar files
|
||||||
|
runfiles_dir = os.environ.get("RUNFILES_DIR")
|
||||||
|
if runfiles_dir:
|
||||||
|
self.runfiles_root = Path(runfiles_dir) / "_main"
|
||||||
|
else:
|
||||||
|
# Fallback for development - not expected to work in this case
|
||||||
|
self.fail("RUNFILES_DIR not set - test must be run via bazel test")
|
||||||
|
|
||||||
|
def _compute_tar_hash(self, tar_path: Path) -> str:
|
||||||
|
"""Compute MD5 hash of a tar file's contents."""
|
||||||
|
if not tar_path.exists():
|
||||||
|
self.fail(f"Tar file not found: {tar_path}")
|
||||||
|
|
||||||
|
with open(tar_path, "rb") as f:
|
||||||
|
content = f.read()
|
||||||
|
return hashlib.md5(content).hexdigest()
|
||||||
|
|
||||||
|
def _extract_and_list_tar(self, tar_path: Path) -> set:
|
||||||
|
"""Extract tar file and return set of file paths and their content hashes."""
|
||||||
|
if not tar_path.exists():
|
||||||
|
return set()
|
||||||
|
|
||||||
|
result = subprocess.run([
|
||||||
|
"tar", "-tf", str(tar_path)
|
||||||
|
], capture_output=True, text=True)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
self.fail(f"Failed to list tar contents: {result.stderr}")
|
||||||
|
|
||||||
|
return set(result.stdout.strip().split('\n')) if result.stdout.strip() else set()
|
||||||
|
|
||||||
|
def test_generated_code_is_up_to_date(self):
|
||||||
|
"""Test that the existing generated tar matches the fresh generated tar."""
|
||||||
|
|
||||||
|
# Find the tar files from data dependencies
|
||||||
|
existing_tar = self.runfiles_root / "databuild/test/app/dsl/generated/existing_generated.tar"
|
||||||
|
fresh_tar = self.runfiles_root / "databuild/test/app/dsl/generated_fresh.tar"
|
||||||
|
|
||||||
|
# Compute hashes of both tar files
|
||||||
|
existing_hash = self._compute_tar_hash(existing_tar)
|
||||||
|
fresh_hash = self._compute_tar_hash(fresh_tar)
|
||||||
|
|
||||||
|
# Compare hashes
|
||||||
|
if existing_hash != fresh_hash:
|
||||||
|
# Provide detailed diff information
|
||||||
|
existing_files = self._extract_and_list_tar(existing_tar)
|
||||||
|
fresh_files = self._extract_and_list_tar(fresh_tar)
|
||||||
|
|
||||||
|
only_in_existing = existing_files - fresh_files
|
||||||
|
only_in_fresh = fresh_files - existing_files
|
||||||
|
|
||||||
|
error_msg = [
|
||||||
|
"Generated DSL code is out of date!",
|
||||||
|
f"Existing tar hash: {existing_hash}",
|
||||||
|
f"Fresh tar hash: {fresh_hash}",
|
||||||
|
"",
|
||||||
|
"To fix this, run:",
|
||||||
|
" bazel run //databuild/test/app/dsl:graph.generate",
|
||||||
|
""
|
||||||
|
]
|
||||||
|
|
||||||
|
if only_in_existing:
|
||||||
|
error_msg.extend([
|
||||||
|
"Files only in existing generated code:",
|
||||||
|
*[f" - {f}" for f in sorted(only_in_existing)],
|
||||||
|
""
|
||||||
|
])
|
||||||
|
|
||||||
|
if only_in_fresh:
|
||||||
|
error_msg.extend([
|
||||||
|
"Files only in fresh generated code:",
|
||||||
|
*[f" + {f}" for f in sorted(only_in_fresh)],
|
||||||
|
""
|
||||||
|
])
|
||||||
|
|
||||||
|
common_files = existing_files & fresh_files
|
||||||
|
if common_files:
|
||||||
|
error_msg.extend([
|
||||||
|
f"Common files: {len(common_files)}",
|
||||||
|
"This suggests files have different contents.",
|
||||||
|
])
|
||||||
|
|
||||||
|
self.fail("\n".join(error_msg))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Loading…
Reference in a new issue