databuild/tests/end_to_end/podcast_reviews_test.sh

414 lines
No EOL
15 KiB
Bash
Executable file

#!/bin/bash
# End-to-end tests for podcast_reviews example
# Tests CLI vs Service build consistency for complex pipelines
set -euo pipefail
# Get the directory of this script
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Source utilities
source "$SCRIPT_DIR/lib/test_utils.sh"
source "$SCRIPT_DIR/lib/db_utils.sh"
source "$SCRIPT_DIR/lib/service_utils.sh"
# Test configuration
TEST_NAME="podcast_reviews_e2e"
CLI_BUILD_BINARY="${1:-}"
SERVICE_BINARY="${2:-}"
# Validate inputs
if [[ -z "$CLI_BUILD_BINARY" ]]; then
test_fail "CLI build binary path required as first argument"
fi
if [[ -z "$SERVICE_BINARY" ]]; then
test_fail "Service binary path required as second argument"
fi
if [[ ! -x "$CLI_BUILD_BINARY" ]]; then
test_fail "CLI build binary not found or not executable: $CLI_BUILD_BINARY"
fi
if [[ ! -x "$SERVICE_BINARY" ]]; then
test_fail "Service binary not found or not executable: $SERVICE_BINARY"
fi
# Setup test environment
TEST_DIR=$(setup_test_env "$TEST_NAME")
CLI_DB_PATH=$(create_test_database "cli_test_db")
SERVICE_DB_PATH=$(create_test_database "service_test_db")
# Cleanup function
cleanup() {
if [[ -n "${SERVICE_INFO:-}" ]]; then
stop_test_service "$SERVICE_INFO" || true
fi
cleanup_test_dir "$TEST_DIR" || true
}
trap cleanup EXIT
log_info "Starting Podcast Reviews end-to-end tests"
log_info "CLI Binary: $CLI_BUILD_BINARY"
log_info "Service Binary: $SERVICE_BINARY"
log_info "Test Directory: $TEST_DIR"
# Test 1: Simple Pipeline - Raw Reviews Extraction
test_raw_reviews_extraction() {
log_info "=== Test 1: Simple Pipeline - Raw Reviews Extraction ==="
local partition="reviews/date=2020-01-01"
local cli_output="$TEST_DIR/cli_raw_reviews.out"
# CLI Build
log_info "Running CLI build for partition: $partition"
export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH"
if ! run_with_timeout 120 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then
log_error "CLI build failed for partition: $partition"
cat "$cli_output"
return 1
fi
# Service Build
log_info "Running Service build for partition: $partition"
SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH")
if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 120; then
log_error "Service build failed for partition: $partition"
return 1
fi
stop_test_service "$SERVICE_INFO"
unset SERVICE_INFO
# Compare results
log_info "Comparing CLI and Service build results"
# Check that both databases have events
local cli_events=$(count_build_events "$CLI_DB_PATH")
local service_events=$(count_build_events "$SERVICE_DB_PATH")
if [[ "$cli_events" -eq 0 ]]; then
log_error "No CLI build events found"
return 1
fi
if [[ "$service_events" -eq 0 ]]; then
log_error "No Service build events found"
return 1
fi
# Check that partition was built in both
if ! is_partition_built "$CLI_DB_PATH" "$partition"; then
log_error "Partition $partition was not built via CLI"
return 1
fi
if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then
log_error "Partition $partition was not built via Service"
return 1
fi
test_pass "Raw reviews extraction test"
}
# Test 2: Complex Pipeline - Daily Summary with Dependencies
test_daily_summary_pipeline() {
log_info "=== Test 2: Complex Pipeline - Daily Summary with Dependencies ==="
local partition="daily_summaries/category=Technology/date=2020-01-01"
local cli_output="$TEST_DIR/cli_daily_summary.out"
# Clear previous events
clear_build_events "$CLI_DB_PATH"
clear_build_events "$SERVICE_DB_PATH"
# CLI Build
log_info "Running CLI build for complex partition: $partition"
export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH"
if ! run_with_timeout 300 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then
log_error "CLI build failed for complex partition: $partition"
cat "$cli_output"
return 1
fi
# Service Build
log_info "Running Service build for complex partition"
SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH")
if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 300; then
log_error "Service build failed for complex partition"
return 1
fi
stop_test_service "$SERVICE_INFO"
unset SERVICE_INFO
# Compare results
log_info "Comparing CLI and Service build results for complex pipeline"
# Check that target partition was built
if ! is_partition_built "$CLI_DB_PATH" "$partition"; then
log_error "Complex partition $partition was not built via CLI"
return 1
fi
if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then
log_error "Complex partition $partition was not built via Service"
return 1
fi
# Check that expected dependencies were built
local expected_dependencies=(
"reviews/date=2020-01-01"
"categorized_reviews/category=Technology/date=2020-01-01"
)
for dep in "${expected_dependencies[@]}"; do
if ! is_partition_built "$CLI_DB_PATH" "$dep"; then
log_warn "Expected dependency $dep was not found in CLI build (may not be required)"
fi
if ! is_partition_built "$SERVICE_DB_PATH" "$dep"; then
log_warn "Expected dependency $dep was not found in Service build (may not be required)"
fi
done
test_pass "Daily summary pipeline test"
}
# Test 3: Podcasts Metadata Extraction
test_podcasts_metadata() {
log_info "=== Test 3: Podcasts Metadata Extraction ==="
local partition="podcasts/all"
local cli_output="$TEST_DIR/cli_podcasts.out"
# Clear previous events
clear_build_events "$CLI_DB_PATH"
clear_build_events "$SERVICE_DB_PATH"
# CLI Build
log_info "Running CLI build for podcasts metadata: $partition"
export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH"
if ! run_with_timeout 180 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then
log_error "CLI build failed for podcasts metadata: $partition"
cat "$cli_output"
return 1
fi
# Service Build
log_info "Running Service build for podcasts metadata"
SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH")
if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 180; then
log_error "Service build failed for podcasts metadata"
return 1
fi
stop_test_service "$SERVICE_INFO"
unset SERVICE_INFO
# Compare results
log_info "Comparing CLI and Service build results for podcasts metadata"
# Check that partition was built
if ! is_partition_built "$CLI_DB_PATH" "$partition"; then
log_error "Podcasts metadata partition $partition was not built via CLI"
return 1
fi
if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then
log_error "Podcasts metadata partition $partition was not built via Service"
return 1
fi
test_pass "Podcasts metadata extraction test"
}
# Test 4: Multiple Partitions with Mixed Dependencies
test_multiple_mixed_partitions() {
log_info "=== Test 4: Multiple Partitions with Mixed Dependencies ==="
local partitions=(
"reviews/date=2020-01-01"
"reviews/date=2020-01-02"
"podcasts/all"
)
local partitions_json='["reviews/date=2020-01-01", "reviews/date=2020-01-02", "podcasts/all"]'
local cli_output="$TEST_DIR/cli_mixed.out"
# Clear previous events
clear_build_events "$CLI_DB_PATH"
clear_build_events "$SERVICE_DB_PATH"
# CLI Build
log_info "Running CLI build for multiple mixed partitions: ${partitions[*]}"
export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH"
if ! run_with_timeout 240 "$CLI_BUILD_BINARY" "${partitions[@]}" > "$cli_output" 2>&1; then
log_error "CLI build failed for multiple mixed partitions"
cat "$cli_output"
return 1
fi
# Service Build
log_info "Running Service build for multiple mixed partitions"
SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH")
if ! execute_service_build "$SERVICE_INFO" "$partitions_json" 240; then
log_error "Service build failed for multiple mixed partitions"
return 1
fi
stop_test_service "$SERVICE_INFO"
unset SERVICE_INFO
# Compare results
log_info "Comparing CLI and Service build results for multiple mixed partitions"
# Check that all target partitions were built
for partition in "${partitions[@]}"; do
if ! is_partition_built "$CLI_DB_PATH" "$partition"; then
log_error "Partition $partition was not built via CLI"
return 1
fi
if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then
log_error "Partition $partition was not built via Service"
return 1
fi
done
# Check event counts are reasonable
local cli_events=$(count_build_events "$CLI_DB_PATH")
local service_events=$(count_build_events "$SERVICE_DB_PATH")
if [[ "$cli_events" -lt 3 ]]; then
log_error "Too few CLI events: $cli_events (expected at least 3)"
return 1
fi
if [[ "$service_events" -lt 3 ]]; then
log_error "Too few Service events: $service_events (expected at least 3)"
return 1
fi
test_pass "Multiple mixed partitions test"
}
# Test 5: Event and Output Consistency
test_consistency_validation() {
log_info "=== Test 5: Event and Output Consistency Validation ==="
# Use fresh databases for this test
local cli_db_consistency="$(create_test_database "cli_consistency_test")"
local service_db_consistency="$(create_test_database "service_consistency_test")"
local partition="reviews/date=2020-01-01"
local cli_output="$TEST_DIR/cli_consistency.out"
# CLI Build
export DATABUILD_BUILD_EVENT_LOG="sqlite:///$cli_db_consistency"
if ! run_with_timeout 120 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then
log_error "CLI build failed for consistency test"
return 1
fi
# Service Build
SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$service_db_consistency")
if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 120; then
log_error "Service build failed for consistency test"
return 1
fi
stop_test_service "$SERVICE_INFO"
unset SERVICE_INFO
# Extract and compare events
local cli_events_file="$TEST_DIR/cli_consistency_events.json"
local service_events_file="$TEST_DIR/service_consistency_events.json"
get_partition_events "$cli_db_consistency" "$partition" "$cli_events_file"
get_partition_events "$service_db_consistency" "$partition" "$service_events_file"
# Validate event consistency
local cli_event_count=$(count_lines "$cli_events_file")
local service_event_count=$(count_lines "$service_events_file")
if [[ "$cli_event_count" -eq 0 ]]; then
log_error "No CLI events found for consistency test"
return 1
fi
if [[ "$service_event_count" -eq 0 ]]; then
log_error "No Service events found for consistency test"
return 1
fi
# Events should be reasonably similar in count
local event_diff=$((cli_event_count - service_event_count))
if [[ ${event_diff#-} -gt 5 ]]; then # Absolute value > 5
log_warn "Event counts differ significantly: CLI=$cli_event_count, Service=$service_event_count"
else
log_info "Event counts are consistent: CLI=$cli_event_count, Service=$service_event_count"
fi
# Detailed event count validation (matching simple_test.sh approach)
log_info "Performing detailed event count validation..."
local cli_total_events=$(count_build_events "$cli_db_consistency")
local service_total_events=$(count_build_events "$service_db_consistency")
log_info "Total events: CLI=$cli_total_events, Service=$service_total_events"
# Count events by type using the same approach as simple_test.sh
local cli_job_events=$(sqlite3 "$cli_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'job';" 2>/dev/null || echo "0")
local cli_partition_events=$(sqlite3 "$cli_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'partition';" 2>/dev/null || echo "0")
local cli_request_events=$(sqlite3 "$cli_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'build_request';" 2>/dev/null || echo "0")
local service_job_events=$(sqlite3 "$service_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'job';" 2>/dev/null || echo "0")
local service_partition_events=$(sqlite3 "$service_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'partition';" 2>/dev/null || echo "0")
local service_request_events=$(sqlite3 "$service_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'build_request';" 2>/dev/null || echo "0")
log_info "Event breakdown:"
log_info " Job events: CLI=$cli_job_events, Service=$service_job_events"
log_info " Partition events: CLI=$cli_partition_events, Service=$service_partition_events"
log_info " Request events: CLI=$cli_request_events, Service=$service_request_events"
# Validate core events are identical (job, partition, and request events should all match now)
if [[ "$cli_job_events" -eq "$service_job_events" ]] && [[ "$cli_partition_events" -eq "$service_partition_events" ]] && [[ "$cli_request_events" -eq "$service_request_events" ]]; then
log_info "✅ All build events (job, partition, and request) are identical"
else
log_error "❌ Build events differ between CLI and Service - this indicates a problem"
log_error "Expected CLI and Service to emit identical event counts after alignment"
return 1
fi
# Validate total event counts are identical
if [[ "$cli_total_events" -eq "$service_total_events" ]]; then
log_info "✅ Total event counts are identical: $cli_total_events events each"
else
log_error "❌ Total event counts differ: CLI=$cli_total_events, Service=$service_total_events"
return 1
fi
test_pass "Consistency validation test"
}
# Run all tests
main() {
log_info "Starting Podcast Reviews End-to-End Tests"
test_raw_reviews_extraction
test_daily_summary_pipeline
test_podcasts_metadata
test_multiple_mixed_partitions
test_consistency_validation
log_info "All Podcast Reviews tests completed successfully!"
}
# Execute main function
main "$@"