diff --git a/databuild/cli_main.rs b/databuild/cli_main.rs index a285c21..f9b055f 100644 --- a/databuild/cli_main.rs +++ b/databuild/cli_main.rs @@ -232,6 +232,39 @@ async fn cmd_serve(port: u16, database: &str, config_path: &str, config: &Databu // Initialize logging tracing_subscriber::fmt::init(); + // Acquire and hold the server lock for the duration of the server + let mut server_lock = ServerLock::new(&config.graph_label).unwrap_or_else(|e| { + eprintln!("Failed to create server lock: {}", e); + std::process::exit(1); + }); + + // Try to acquire exclusive lock + match server_lock.try_lock() { + Ok(true) => { + // Write our state + let config_hash = ServerLock::hash_config(Path::new(config_path)).unwrap_or_default(); + let state = lib::server_lock::ServerLockState { + pid: std::process::id(), + port, + started_at: ServerLock::now_millis(), + config_hash, + }; + if let Err(e) = server_lock.write_state(&state) { + eprintln!("Failed to write server state: {}", e); + } + } + Ok(false) => { + // Another server is holding the lock - this shouldn't happen in daemon mode + // but could happen if user manually runs serve while another server is running + eprintln!("Another server is already running for graph '{}'. Use 'databuild stop' first.", config.graph_label); + std::process::exit(1); + } + Err(e) => { + eprintln!("Failed to acquire server lock: {}", e); + std::process::exit(1); + } + } + println!("Loaded configuration from: {}", config_path); println!(" Graph: {}", config.graph_label); println!(" Jobs: {}", config.jobs.len()); diff --git a/databuild/config.rs b/databuild/config.rs index dc5a7e0..b6544a1 100644 --- a/databuild/config.rs +++ b/databuild/config.rs @@ -78,9 +78,9 @@ impl DatabuildConfig { /// If `bel_uri` is not set, returns the default path `.databuild/${graph_label}/bel.sqlite`. /// Relative paths are not resolved here - that's the caller's responsibility. pub fn effective_bel_uri(&self) -> String { - self.bel_uri.clone().unwrap_or_else(|| { - format!(".databuild/{}/bel.sqlite", self.graph_label) - }) + self.bel_uri + .clone() + .unwrap_or_else(|| format!(".databuild/{}/bel.sqlite", self.graph_label)) } } @@ -134,7 +134,10 @@ mod tests { assert_eq!(config.effective_bel_uri(), ".databuild/my_graph/bel.sqlite"); // Custom: uses provided value - let config = DatabuildConfig::from_json(r#"{ "graph_label": "my_graph", "bel_uri": "postgresql://localhost/db" }"#).unwrap(); + let config = DatabuildConfig::from_json( + r#"{ "graph_label": "my_graph", "bel_uri": "postgresql://localhost/db" }"#, + ) + .unwrap(); assert_eq!(config.effective_bel_uri(), "postgresql://localhost/db"); } diff --git a/databuild/daemon.rs b/databuild/daemon.rs index 1ac1ab8..045c668 100644 --- a/databuild/daemon.rs +++ b/databuild/daemon.rs @@ -2,7 +2,7 @@ //! //! Implements the classic double-fork pattern to create a proper Unix daemon. -use crate::server_lock::{ServerLock, ServerLockState}; +use crate::server_lock::ServerLock; use crate::util::DatabuildError; use std::fs::OpenOptions; use std::path::Path; @@ -108,68 +108,51 @@ pub fn ensure_server_running( graph_label: &str, config_hash: &str, ) -> Result { - let mut lock = ServerLock::new(graph_label)?; + let lock = ServerLock::new(graph_label)?; - // Try to acquire the lock - if lock.try_lock()? { - // We got the lock, so no server is running - // Find an available port - let port = find_available_port(3538)?; - - // Write initial state (we'll update after server starts) - let state = ServerLockState { - pid: std::process::id(), - port, - started_at: ServerLock::now_millis(), - config_hash: config_hash.to_string(), - }; - lock.write_state(&state)?; - - // Spawn the daemon - let log_path = lock.log_path(); - let mut child = spawn_daemon(config_path, port, &log_path)?; - - // Update state with actual PID - let pid = child.id(); - let state = ServerLockState { - pid, - port, - started_at: ServerLock::now_millis(), - config_hash: config_hash.to_string(), - }; - lock.write_state(&state)?; - - // Release the lock so the daemon can grab it - drop(lock); - - // Wait for server to become healthy - wait_for_health(port, 10000)?; - - Ok(DaemonizeResult::Started { port }) - } else { - // Server is already running, read the port - let state = lock - .read_state()? - .ok_or_else(|| DatabuildError::from("Lock held but no state found"))?; - - // Verify server is actually healthy - if !health_check(state.port) { + // First, check if there's already a running server by reading existing state + if let Some(state) = lock.read_state()? { + // Check if that process is still running + if ServerLock::is_process_running(state.pid) { + // Verify server is actually healthy + if health_check(state.port) { + // Check if config has changed + if state.config_hash != config_hash { + eprintln!( + "Warning: Config has changed since server started.\n\ + Run 'databuild stop && databuild serve' to apply changes." + ); + } + return Ok(DaemonizeResult::AlreadyRunning { port: state.port }); + } + // Process exists but not healthy - might still be starting up + // Wait a bit and check again + if wait_for_health(state.port, 5000).is_ok() { + return Ok(DaemonizeResult::AlreadyRunning { port: state.port }); + } + // Still unhealthy, will need to be stopped manually return Err(DatabuildError::from(format!( "Server at port {} appears unhealthy. Try 'databuild stop' and retry.", state.port ))); + } else { + // Stale lock file - process is gone, clean up + lock.remove_stale_lock()?; } - - // Check if config has changed - if state.config_hash != config_hash { - eprintln!( - "Warning: Config has changed since server started.\n\ - Run 'databuild stop && databuild serve' to apply changes." - ); - } - - Ok(DaemonizeResult::AlreadyRunning { port: state.port }) } + + // No server running - start one + // Find an available port + let port = find_available_port(3538)?; + + // Spawn the daemon - it will acquire its own lock + let log_path = lock.log_path(); + let _child = spawn_daemon(config_path, port, &log_path)?; + + // Wait for server to become healthy (which implies it has acquired the lock) + wait_for_health(port, 10000)?; + + Ok(DaemonizeResult::Started { port }) } /// Stop a running server. diff --git a/databuild/server_lock.rs b/databuild/server_lock.rs index b6bec71..5d1d679 100644 --- a/databuild/server_lock.rs +++ b/databuild/server_lock.rs @@ -45,8 +45,9 @@ impl ServerLock { /// Creates the .databuild/${graph_label}/ directory if it doesn't exist. pub fn new_in_dir(base_dir: &Path, graph_label: &str) -> Result { let graph_dir = base_dir.join(".databuild").join(graph_label); - fs::create_dir_all(&graph_dir) - .map_err(|e| DatabuildError::from(format!("Failed to create graph directory: {}", e)))?; + fs::create_dir_all(&graph_dir).map_err(|e| { + DatabuildError::from(format!("Failed to create graph directory: {}", e)) + })?; let lock_path = graph_dir.join("server.lock"); @@ -88,7 +89,10 @@ impl ServerLock { // Lock is held by another process Ok(false) } - Err(e) => Err(DatabuildError::from(format!("Failed to acquire lock: {}", e))), + Err(e) => Err(DatabuildError::from(format!( + "Failed to acquire lock: {}", + e + ))), } } @@ -151,9 +155,7 @@ impl ServerLock { #[cfg(unix)] { use std::os::unix::process::CommandExt; - unsafe { - libc::kill(pid as i32, 0) == 0 - } + unsafe { libc::kill(pid as i32, 0) == 0 } } #[cfg(not(unix))] { @@ -278,7 +280,10 @@ mod tests { let temp = tempdir().unwrap(); let lock = ServerLock::new_in_dir(temp.path(), "nonexistent_graph").unwrap(); let state = lock.read_state().unwrap(); - assert!(state.is_none(), "Reading nonexistent lock file should return None"); + assert!( + state.is_none(), + "Reading nonexistent lock file should return None" + ); } #[test] @@ -295,6 +300,9 @@ mod tests { .unwrap() .as_millis() as u64; - assert!(now >= before && now <= after, "now_millis should be between before and after"); + assert!( + now >= before && now <= after, + "now_millis should be between before and after" + ); } } diff --git a/scripts/run_multihop_example.sh b/scripts/run_multihop_example.sh index d3ee847..dd27226 100755 --- a/scripts/run_multihop_example.sh +++ b/scripts/run_multihop_example.sh @@ -4,71 +4,55 @@ set -euo pipefail # Navigate to repository root cd "$(dirname "$0")/.." -# Configuration -PORT=3050 -DB_PATH="/tmp/databuild_multihop.db" +CONFIG="examples/multihop/config.json" FLAG_FILE="/tmp/databuild_multihop_alpha_complete" -PID_FILE="/tmp/databuild_multihop.pid" echo "=== DataBuild Multi-Hop Example ===" echo # Clean up previous state echo "Cleaning up previous state..." -rm -f "$DB_PATH" "$FLAG_FILE" "$PID_FILE" -pkill -f "databuild.*serve.*port $PORT" || true -sleep 1 +rm -f "$FLAG_FILE" +rm -rf .databuild/multihop/ +./bazel-bin/databuild/databuild --config "$CONFIG" stop 2>/dev/null || true # Build the binary echo "Building databuild CLI..." bazel build //databuild:databuild -# Start the server in background -echo "Starting databuild server on port $PORT..." -./bazel-bin/databuild/databuild serve \ - --port $PORT \ - --database "$DB_PATH" \ - --config examples/multihop/config.json & +echo +echo "=== Starting server ===" +echo -SERVER_PID=$! -echo $SERVER_PID > "$PID_FILE" -echo "Server started with PID $SERVER_PID" +# Start the server by making a request (triggers auto-start) +OUTPUT=$(./bazel-bin/databuild/databuild --config "$CONFIG" wants list 2>&1) -# Wait for server to be ready -echo "Waiting for server to start..." -sleep 2 - -# Test server health -if curl -s http://localhost:$PORT/health > /dev/null 2>&1; then - echo "Server is ready!" -else - echo "WARNING: Server health check failed, but continuing..." -fi +# Extract port from status +PORT=$(./bazel-bin/databuild/databuild --config "$CONFIG" status 2>&1 | grep "Port:" | awk '{print $2}') echo -echo "=== Server is running ===" +echo "Server running at: http://127.0.0.1:${PORT}" echo -echo "You can now interact with the server:" +echo "=== Ready to run example ===" +echo +echo "Try the following commands:" +echo +echo " # Check server status" +echo " ./bazel-bin/databuild/databuild --config $CONFIG status" echo echo " # Create a want for data/beta (triggers dependency chain)" -echo " ./bazel-bin/databuild/databuild --server http://localhost:$PORT want data/beta" +echo " ./bazel-bin/databuild/databuild --config $CONFIG want data/beta" echo echo " # Monitor wants" -echo " ./bazel-bin/databuild/databuild --server http://localhost:$PORT wants list" +echo " ./bazel-bin/databuild/databuild --config $CONFIG wants list" echo echo " # Monitor job runs" -echo " ./bazel-bin/databuild/databuild --server http://localhost:$PORT job-runs list" +echo " ./bazel-bin/databuild/databuild --config $CONFIG job-runs list" echo echo " # Monitor partitions" -echo " ./bazel-bin/databuild/databuild --server http://localhost:$PORT partitions list" +echo " ./bazel-bin/databuild/databuild --config $CONFIG partitions list" echo -echo "To stop the server:" -echo " kill $SERVER_PID" -echo " # or: pkill -f 'databuild.*serve.*port $PORT'" +echo " # Stop the server when done" +echo " ./bazel-bin/databuild/databuild --config $CONFIG stop" echo -echo "Server logs will appear below. Press Ctrl+C to stop." -echo "==========================================" -echo - -# Wait for the server process -wait $SERVER_PID +echo "==========================================="