#!/bin/bash # End-to-end tests for podcast_reviews example # Tests CLI vs Service build consistency for complex pipelines set -euo pipefail # Get the directory of this script SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Source utilities source "$SCRIPT_DIR/lib/test_utils.sh" source "$SCRIPT_DIR/lib/db_utils.sh" source "$SCRIPT_DIR/lib/service_utils.sh" # Test configuration TEST_NAME="podcast_reviews_e2e" CLI_BUILD_BINARY="${1:-}" SERVICE_BINARY="${2:-}" # Validate inputs if [[ -z "$CLI_BUILD_BINARY" ]]; then test_fail "CLI build binary path required as first argument" fi if [[ -z "$SERVICE_BINARY" ]]; then test_fail "Service binary path required as second argument" fi if [[ ! -x "$CLI_BUILD_BINARY" ]]; then test_fail "CLI build binary not found or not executable: $CLI_BUILD_BINARY" fi if [[ ! -x "$SERVICE_BINARY" ]]; then test_fail "Service binary not found or not executable: $SERVICE_BINARY" fi # Setup test environment TEST_DIR=$(setup_test_env "$TEST_NAME") CLI_DB_PATH=$(create_test_database "cli_test_db") SERVICE_DB_PATH=$(create_test_database "service_test_db") # Cleanup function cleanup() { if [[ -n "${SERVICE_INFO:-}" ]]; then stop_test_service "$SERVICE_INFO" || true fi cleanup_test_dir "$TEST_DIR" || true } trap cleanup EXIT log_info "Starting Podcast Reviews end-to-end tests" log_info "CLI Binary: $CLI_BUILD_BINARY" log_info "Service Binary: $SERVICE_BINARY" log_info "Test Directory: $TEST_DIR" # Test 1: Simple Pipeline - Raw Reviews Extraction test_raw_reviews_extraction() { log_info "=== Test 1: Simple Pipeline - Raw Reviews Extraction ===" local partition="reviews/date=2020-01-01" local cli_output="$TEST_DIR/cli_raw_reviews.out" # CLI Build log_info "Running CLI build for partition: $partition" export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH" if ! run_with_timeout 120 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then log_error "CLI build failed for partition: $partition" cat "$cli_output" return 1 fi # Service Build log_info "Running Service build for partition: $partition" SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH") if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 120; then log_error "Service build failed for partition: $partition" return 1 fi stop_test_service "$SERVICE_INFO" unset SERVICE_INFO # Compare results log_info "Comparing CLI and Service build results" # Check that both databases have events local cli_events=$(count_build_events "$CLI_DB_PATH") local service_events=$(count_build_events "$SERVICE_DB_PATH") if [[ "$cli_events" -eq 0 ]]; then log_error "No CLI build events found" return 1 fi if [[ "$service_events" -eq 0 ]]; then log_error "No Service build events found" return 1 fi # Check that partition was built in both if ! is_partition_built "$CLI_DB_PATH" "$partition"; then log_error "Partition $partition was not built via CLI" return 1 fi if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then log_error "Partition $partition was not built via Service" return 1 fi test_pass "Raw reviews extraction test" } # Test 2: Complex Pipeline - Daily Summary with Dependencies test_daily_summary_pipeline() { log_info "=== Test 2: Complex Pipeline - Daily Summary with Dependencies ===" local partition="daily_summaries/category=Technology/date=2020-01-01" local cli_output="$TEST_DIR/cli_daily_summary.out" # Clear previous events clear_build_events "$CLI_DB_PATH" clear_build_events "$SERVICE_DB_PATH" # CLI Build log_info "Running CLI build for complex partition: $partition" export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH" if ! run_with_timeout 300 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then log_error "CLI build failed for complex partition: $partition" cat "$cli_output" return 1 fi # Service Build log_info "Running Service build for complex partition" SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH") if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 300; then log_error "Service build failed for complex partition" return 1 fi stop_test_service "$SERVICE_INFO" unset SERVICE_INFO # Compare results log_info "Comparing CLI and Service build results for complex pipeline" # Check that target partition was built if ! is_partition_built "$CLI_DB_PATH" "$partition"; then log_error "Complex partition $partition was not built via CLI" return 1 fi if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then log_error "Complex partition $partition was not built via Service" return 1 fi # Check that expected dependencies were built local expected_dependencies=( "reviews/date=2020-01-01" "categorized_reviews/category=Technology/date=2020-01-01" ) for dep in "${expected_dependencies[@]}"; do if ! is_partition_built "$CLI_DB_PATH" "$dep"; then log_warn "Expected dependency $dep was not found in CLI build (may not be required)" fi if ! is_partition_built "$SERVICE_DB_PATH" "$dep"; then log_warn "Expected dependency $dep was not found in Service build (may not be required)" fi done test_pass "Daily summary pipeline test" } # Test 3: Podcasts Metadata Extraction test_podcasts_metadata() { log_info "=== Test 3: Podcasts Metadata Extraction ===" local partition="podcasts/all" local cli_output="$TEST_DIR/cli_podcasts.out" # Clear previous events clear_build_events "$CLI_DB_PATH" clear_build_events "$SERVICE_DB_PATH" # CLI Build log_info "Running CLI build for podcasts metadata: $partition" export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH" if ! run_with_timeout 180 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then log_error "CLI build failed for podcasts metadata: $partition" cat "$cli_output" return 1 fi # Service Build log_info "Running Service build for podcasts metadata" SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH") if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 180; then log_error "Service build failed for podcasts metadata" return 1 fi stop_test_service "$SERVICE_INFO" unset SERVICE_INFO # Compare results log_info "Comparing CLI and Service build results for podcasts metadata" # Check that partition was built if ! is_partition_built "$CLI_DB_PATH" "$partition"; then log_error "Podcasts metadata partition $partition was not built via CLI" return 1 fi if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then log_error "Podcasts metadata partition $partition was not built via Service" return 1 fi test_pass "Podcasts metadata extraction test" } # Test 4: Multiple Partitions with Mixed Dependencies test_multiple_mixed_partitions() { log_info "=== Test 4: Multiple Partitions with Mixed Dependencies ===" local partitions=( "reviews/date=2020-01-01" "reviews/date=2020-01-02" "podcasts/all" ) local partitions_json='["reviews/date=2020-01-01", "reviews/date=2020-01-02", "podcasts/all"]' local cli_output="$TEST_DIR/cli_mixed.out" # Clear previous events clear_build_events "$CLI_DB_PATH" clear_build_events "$SERVICE_DB_PATH" # CLI Build log_info "Running CLI build for multiple mixed partitions: ${partitions[*]}" export DATABUILD_BUILD_EVENT_LOG="sqlite:///$CLI_DB_PATH" if ! run_with_timeout 240 "$CLI_BUILD_BINARY" "${partitions[@]}" > "$cli_output" 2>&1; then log_error "CLI build failed for multiple mixed partitions" cat "$cli_output" return 1 fi # Service Build log_info "Running Service build for multiple mixed partitions" SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$SERVICE_DB_PATH") if ! execute_service_build "$SERVICE_INFO" "$partitions_json" 240; then log_error "Service build failed for multiple mixed partitions" return 1 fi stop_test_service "$SERVICE_INFO" unset SERVICE_INFO # Compare results log_info "Comparing CLI and Service build results for multiple mixed partitions" # Check that all target partitions were built for partition in "${partitions[@]}"; do if ! is_partition_built "$CLI_DB_PATH" "$partition"; then log_error "Partition $partition was not built via CLI" return 1 fi if ! is_partition_built "$SERVICE_DB_PATH" "$partition"; then log_error "Partition $partition was not built via Service" return 1 fi done # Check event counts are reasonable local cli_events=$(count_build_events "$CLI_DB_PATH") local service_events=$(count_build_events "$SERVICE_DB_PATH") if [[ "$cli_events" -lt 3 ]]; then log_error "Too few CLI events: $cli_events (expected at least 3)" return 1 fi if [[ "$service_events" -lt 3 ]]; then log_error "Too few Service events: $service_events (expected at least 3)" return 1 fi test_pass "Multiple mixed partitions test" } # Test 5: Event and Output Consistency test_consistency_validation() { log_info "=== Test 5: Event and Output Consistency Validation ===" # Use fresh databases for this test local cli_db_consistency="$(create_test_database "cli_consistency_test")" local service_db_consistency="$(create_test_database "service_consistency_test")" local partition="reviews/date=2020-01-01" local cli_output="$TEST_DIR/cli_consistency.out" # CLI Build export DATABUILD_BUILD_EVENT_LOG="sqlite:///$cli_db_consistency" if ! run_with_timeout 120 "$CLI_BUILD_BINARY" "$partition" > "$cli_output" 2>&1; then log_error "CLI build failed for consistency test" return 1 fi # Service Build SERVICE_INFO=$(start_test_service "$SERVICE_BINARY" "$service_db_consistency") if ! execute_service_build "$SERVICE_INFO" "[\"$partition\"]" 120; then log_error "Service build failed for consistency test" return 1 fi stop_test_service "$SERVICE_INFO" unset SERVICE_INFO # Extract and compare events local cli_events_file="$TEST_DIR/cli_consistency_events.json" local service_events_file="$TEST_DIR/service_consistency_events.json" get_partition_events "$cli_db_consistency" "$partition" "$cli_events_file" get_partition_events "$service_db_consistency" "$partition" "$service_events_file" # Validate event consistency local cli_event_count=$(count_lines "$cli_events_file") local service_event_count=$(count_lines "$service_events_file") if [[ "$cli_event_count" -eq 0 ]]; then log_error "No CLI events found for consistency test" return 1 fi if [[ "$service_event_count" -eq 0 ]]; then log_error "No Service events found for consistency test" return 1 fi # Events should be reasonably similar in count local event_diff=$((cli_event_count - service_event_count)) if [[ ${event_diff#-} -gt 5 ]]; then # Absolute value > 5 log_warn "Event counts differ significantly: CLI=$cli_event_count, Service=$service_event_count" else log_info "Event counts are consistent: CLI=$cli_event_count, Service=$service_event_count" fi # Detailed event count validation (matching simple_test.sh approach) log_info "Performing detailed event count validation..." local cli_total_events=$(count_build_events "$cli_db_consistency") local service_total_events=$(count_build_events "$service_db_consistency") log_info "Total events: CLI=$cli_total_events, Service=$service_total_events" # Count events by type using the same approach as simple_test.sh local cli_job_events=$(sqlite3 "$cli_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'job';" 2>/dev/null || echo "0") local cli_partition_events=$(sqlite3 "$cli_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'partition';" 2>/dev/null || echo "0") local cli_request_events=$(sqlite3 "$cli_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'build_request';" 2>/dev/null || echo "0") local service_job_events=$(sqlite3 "$service_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'job';" 2>/dev/null || echo "0") local service_partition_events=$(sqlite3 "$service_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'partition';" 2>/dev/null || echo "0") local service_request_events=$(sqlite3 "$service_db_consistency" "SELECT COUNT(*) FROM build_events WHERE event_type = 'build_request';" 2>/dev/null || echo "0") log_info "Event breakdown:" log_info " Job events: CLI=$cli_job_events, Service=$service_job_events" log_info " Partition events: CLI=$cli_partition_events, Service=$service_partition_events" log_info " Request events: CLI=$cli_request_events, Service=$service_request_events" # Validate core events are identical (job, partition, and request events should all match now) if [[ "$cli_job_events" -eq "$service_job_events" ]] && [[ "$cli_partition_events" -eq "$service_partition_events" ]] && [[ "$cli_request_events" -eq "$service_request_events" ]]; then log_info "✅ All build events (job, partition, and request) are identical" else log_error "❌ Build events differ between CLI and Service - this indicates a problem" log_error "Expected CLI and Service to emit identical event counts after alignment" return 1 fi # Validate total event counts are identical if [[ "$cli_total_events" -eq "$service_total_events" ]]; then log_info "✅ Total event counts are identical: $cli_total_events events each" else log_error "❌ Total event counts differ: CLI=$cli_total_events, Service=$service_total_events" return 1 fi test_pass "Consistency validation test" } # Run all tests main() { log_info "Starting Podcast Reviews End-to-End Tests" test_raw_reviews_extraction test_daily_summary_pipeline test_podcasts_metadata test_multiple_mixed_partitions test_consistency_validation log_info "All Podcast Reviews tests completed successfully!" } # Execute main function main "$@"