server refactor
This commit is contained in:
parent
7134b5e480
commit
f71be8482f
6 changed files with 93 additions and 152 deletions
|
|
@ -18,7 +18,7 @@ rust_binary(
|
||||||
|
|
||||||
# DataBuild library using generated prost code
|
# DataBuild library using generated prost code
|
||||||
rust_library(
|
rust_library(
|
||||||
name = "databuild",
|
name = "lib",
|
||||||
srcs = glob(["**/*.rs"]) + [
|
srcs = glob(["**/*.rs"]) + [
|
||||||
":generate_databuild_rust",
|
":generate_databuild_rust",
|
||||||
],
|
],
|
||||||
|
|
@ -44,34 +44,19 @@ rust_library(
|
||||||
|
|
||||||
rust_test(
|
rust_test(
|
||||||
name = "databuild_test",
|
name = "databuild_test",
|
||||||
crate = ":databuild",
|
crate = ":lib",
|
||||||
data = ["//databuild/test:test_job_helper"],
|
data = ["//databuild/test:test_job_helper"],
|
||||||
env = {"RUST_BACKTRACE": "1"},
|
env = {"RUST_BACKTRACE": "1"},
|
||||||
)
|
)
|
||||||
|
|
||||||
# DataBuild HTTP server binary
|
|
||||||
rust_binary(
|
|
||||||
name = "databuild_server",
|
|
||||||
srcs = ["server_main.rs"],
|
|
||||||
edition = "2021",
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
deps = [
|
|
||||||
":databuild",
|
|
||||||
"@crates//:axum",
|
|
||||||
"@crates//:tokio",
|
|
||||||
"@crates//:tracing",
|
|
||||||
"@crates//:tracing-subscriber",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# DataBuild CLI binary
|
# DataBuild CLI binary
|
||||||
rust_binary(
|
rust_binary(
|
||||||
name = "databuild_cli",
|
name = "databuild",
|
||||||
srcs = ["cli_main.rs"],
|
srcs = ["cli_main.rs"],
|
||||||
edition = "2021",
|
edition = "2021",
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
":databuild",
|
":lib",
|
||||||
"@crates//:axum",
|
"@crates//:axum",
|
||||||
"@crates//:clap",
|
"@crates//:clap",
|
||||||
"@crates//:reqwest",
|
"@crates//:reqwest",
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,13 @@
|
||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
use databuild::build_event_log::SqliteBELStorage;
|
|
||||||
use databuild::http_server::{create_router, AppState};
|
|
||||||
use reqwest::blocking::Client;
|
use reqwest::blocking::Client;
|
||||||
use std::sync::Arc;
|
use std::sync::{Arc, RwLock};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::sync::{broadcast, mpsc};
|
use tokio::sync::{broadcast, mpsc};
|
||||||
|
use lib::build_event_log::SqliteBELStorage;
|
||||||
|
use lib::build_state::BuildState;
|
||||||
|
use lib::config::DatabuildConfig;
|
||||||
|
use lib::http_server::{create_router, AppState};
|
||||||
|
use lib::orchestrator::{Orchestrator, OrchestratorConfig};
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(name = "databuild")]
|
#[command(name = "databuild")]
|
||||||
|
|
@ -112,14 +115,13 @@ fn main() {
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn cmd_serve(port: u16, database: &str, config_path: Option<&str>) {
|
async fn cmd_serve(port: u16, database: &str, config_path: Option<&str>) {
|
||||||
use databuild::build_event_log::BELStorage;
|
|
||||||
|
|
||||||
// Initialize logging
|
// Initialize logging
|
||||||
tracing_subscriber::fmt::init();
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
// Load configuration if provided
|
// Load configuration if provided
|
||||||
let jobs = if let Some(path) = config_path {
|
let jobs = if let Some(path) = config_path {
|
||||||
match databuild::config::DatabuildConfig::from_file(path) {
|
match DatabuildConfig::from_file(path) {
|
||||||
Ok(config) => {
|
Ok(config) => {
|
||||||
println!("Loaded configuration from: {}", path);
|
println!("Loaded configuration from: {}", path);
|
||||||
println!(" Jobs: {}", config.jobs.len());
|
println!(" Jobs: {}", config.jobs.len());
|
||||||
|
|
@ -142,19 +144,38 @@ async fn cmd_serve(port: u16, database: &str, config_path: Option<&str>) {
|
||||||
// Create command channel for orchestrator communication
|
// Create command channel for orchestrator communication
|
||||||
let (command_tx, command_rx) = mpsc::channel(100);
|
let (command_tx, command_rx) = mpsc::channel(100);
|
||||||
|
|
||||||
|
// Create event broadcast channel (orchestrator -> HTTP server)
|
||||||
|
let (event_tx, _event_rx) = broadcast::channel(1000);
|
||||||
|
|
||||||
// Create shutdown broadcast channel
|
// Create shutdown broadcast channel
|
||||||
let (shutdown_tx, _shutdown_rx) = broadcast::channel(1);
|
let (shutdown_tx, _shutdown_rx) = broadcast::channel(1);
|
||||||
|
|
||||||
|
// Create shared mirrored build state for HTTP server
|
||||||
|
let mirrored_state = Arc::new(RwLock::new(BuildState::default()));
|
||||||
|
|
||||||
|
// Spawn state-mirror task to keep HTTP server's build state in sync
|
||||||
|
let mirror_clone = mirrored_state.clone();
|
||||||
|
let mut mirror_rx = event_tx.subscribe();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
while let Ok(event) = mirror_rx.recv().await {
|
||||||
|
let mut state = mirror_clone.write().unwrap();
|
||||||
|
state.handle_event(&event);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Spawn orchestrator in background thread
|
// Spawn orchestrator in background thread
|
||||||
// Note: Orchestrator needs its own BEL storage instance for writes
|
// Note: Orchestrator needs its own BEL storage instance for writes
|
||||||
let orch_bel_storage = SqliteBELStorage::create(database).expect("Failed to create BEL storage");
|
let orch_bel_storage = SqliteBELStorage::create(database).expect("Failed to create BEL storage");
|
||||||
let orch_shutdown_rx = shutdown_tx.subscribe();
|
let orch_shutdown_rx = shutdown_tx.subscribe();
|
||||||
let orch_handle = std::thread::spawn(move || {
|
let orch_handle = std::thread::spawn(move || {
|
||||||
use databuild::orchestrator::{Orchestrator, OrchestratorConfig};
|
// Create orchestrator with both channels and jobs from config
|
||||||
|
|
||||||
// Create orchestrator with command channel and jobs from config
|
|
||||||
let config = OrchestratorConfig { jobs };
|
let config = OrchestratorConfig { jobs };
|
||||||
let mut orchestrator = Orchestrator::new_with_commands(orch_bel_storage, config, command_rx);
|
let mut orchestrator = Orchestrator::new_with_channels(
|
||||||
|
orch_bel_storage,
|
||||||
|
config,
|
||||||
|
command_rx,
|
||||||
|
event_tx,
|
||||||
|
);
|
||||||
let mut shutdown_rx = orch_shutdown_rx;
|
let mut shutdown_rx = orch_shutdown_rx;
|
||||||
|
|
||||||
// Run orchestrator loop
|
// Run orchestrator loop
|
||||||
|
|
@ -173,8 +194,8 @@ async fn cmd_serve(port: u16, database: &str, config_path: Option<&str>) {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Create app state with shared storage, command sender, and shutdown channel
|
// Create app state with mirrored state, shared storage, command sender, and shutdown channel
|
||||||
let state = AppState::new(bel_storage, command_tx, shutdown_tx.clone());
|
let state = AppState::new(mirrored_state, bel_storage, command_tx, shutdown_tx.clone());
|
||||||
|
|
||||||
// Spawn idle timeout checker task
|
// Spawn idle timeout checker task
|
||||||
let idle_state = state.clone();
|
let idle_state = state.clone();
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,9 @@ use tower_http::cors::CorsLayer;
|
||||||
/// Shared application state for HTTP handlers
|
/// Shared application state for HTTP handlers
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct AppState {
|
pub struct AppState {
|
||||||
/// Shared read-only access to build state (maintained by orchestrator)
|
/// Mirrored build state (updated via event broadcast from orchestrator)
|
||||||
pub build_state: Arc<RwLock<BuildState>>,
|
pub build_state: Arc<RwLock<BuildState>>,
|
||||||
/// Shared read-only access to BEL storage (for event log queries)
|
/// Shared read-only access to BEL storage (for event log queries if needed)
|
||||||
pub bel_storage: Arc<dyn BELStorage>,
|
pub bel_storage: Arc<dyn BELStorage>,
|
||||||
/// Command sender for write operations (sends to orchestrator)
|
/// Command sender for write operations (sends to orchestrator)
|
||||||
pub command_tx: mpsc::Sender<Command>,
|
pub command_tx: mpsc::Sender<Command>,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
pub mod build_event_log;
|
pub mod build_event_log;
|
||||||
mod build_state;
|
pub mod build_state;
|
||||||
pub mod commands;
|
pub mod commands;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
mod data_deps;
|
mod data_deps;
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ use crate::util::DatabuildError;
|
||||||
use crate::{JobRunBufferEventV1, PartitionRef, WantDetail};
|
use crate::{JobRunBufferEventV1, PartitionRef, WantDetail};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::{broadcast, mpsc};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -26,6 +26,8 @@ pub struct Orchestrator<S: BELStorage + Debug> {
|
||||||
pub job_runs: Vec<crate::job_run::JobRunHandle<SubProcessBackend>>,
|
pub job_runs: Vec<crate::job_run::JobRunHandle<SubProcessBackend>>,
|
||||||
/// Optional command receiver for write operations from HTTP server
|
/// Optional command receiver for write operations from HTTP server
|
||||||
pub command_rx: Option<mpsc::Receiver<Command>>,
|
pub command_rx: Option<mpsc::Receiver<Command>>,
|
||||||
|
/// Optional event broadcaster for state updates to HTTP server
|
||||||
|
pub event_tx: Option<broadcast::Sender<Event>>,
|
||||||
/// Environment variables for each job run (keyed by job_run_id)
|
/// Environment variables for each job run (keyed by job_run_id)
|
||||||
pub job_environments: HashMap<Uuid, HashMap<String, String>>,
|
pub job_environments: HashMap<Uuid, HashMap<String, String>>,
|
||||||
}
|
}
|
||||||
|
|
@ -37,6 +39,7 @@ impl Default for Orchestrator<MemoryBELStorage> {
|
||||||
config: Default::default(),
|
config: Default::default(),
|
||||||
job_runs: Default::default(),
|
job_runs: Default::default(),
|
||||||
command_rx: None,
|
command_rx: None,
|
||||||
|
event_tx: None,
|
||||||
job_environments: HashMap::new(),
|
job_environments: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -49,6 +52,7 @@ impl Orchestrator<MemoryBELStorage> {
|
||||||
config: self.config.clone(),
|
config: self.config.clone(),
|
||||||
job_runs: Default::default(),
|
job_runs: Default::default(),
|
||||||
command_rx: None,
|
command_rx: None,
|
||||||
|
event_tx: None,
|
||||||
job_environments: HashMap::new(),
|
job_environments: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -133,6 +137,7 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
config,
|
config,
|
||||||
job_runs: Vec::new(),
|
job_runs: Vec::new(),
|
||||||
command_rx: None,
|
command_rx: None,
|
||||||
|
event_tx: None,
|
||||||
job_environments: HashMap::new(),
|
job_environments: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -147,16 +152,44 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
config,
|
config,
|
||||||
job_runs: Vec::new(),
|
job_runs: Vec::new(),
|
||||||
command_rx: Some(command_rx),
|
command_rx: Some(command_rx),
|
||||||
|
event_tx: None,
|
||||||
job_environments: HashMap::new(),
|
job_environments: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_with_channels(
|
||||||
|
storage: S,
|
||||||
|
config: OrchestratorConfig,
|
||||||
|
command_rx: mpsc::Receiver<Command>,
|
||||||
|
event_tx: broadcast::Sender<Event>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
bel: BuildEventLog::new(storage, Default::default()),
|
||||||
|
config,
|
||||||
|
job_runs: Vec::new(),
|
||||||
|
command_rx: Some(command_rx),
|
||||||
|
event_tx: Some(event_tx),
|
||||||
|
job_environments: HashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append event to BEL and broadcast to HTTP server (if configured)
|
||||||
|
fn append_and_broadcast(&mut self, event: &Event) -> Result<u64, DatabuildError> {
|
||||||
|
let idx = self.bel.append_event(event)?;
|
||||||
|
if let Some(tx) = &self.event_tx {
|
||||||
|
// Ignore send errors (no receivers is fine)
|
||||||
|
let _ = tx.send(event.clone());
|
||||||
|
}
|
||||||
|
Ok(idx)
|
||||||
|
}
|
||||||
|
|
||||||
fn schedule_queued_jobs(&mut self) -> Result<(), DatabuildError> {
|
fn schedule_queued_jobs(&mut self) -> Result<(), DatabuildError> {
|
||||||
use crate::JobRunHeartbeatEventV1;
|
use crate::JobRunHeartbeatEventV1;
|
||||||
use crate::data_build_event::Event;
|
use crate::data_build_event::Event;
|
||||||
use crate::job_run::JobRunHandle;
|
use crate::job_run::JobRunHandle;
|
||||||
|
|
||||||
let mut new_jobs = Vec::new();
|
let mut new_jobs = Vec::new();
|
||||||
|
let mut events_to_append = Vec::new();
|
||||||
for job in self.job_runs.drain(..) {
|
for job in self.job_runs.drain(..) {
|
||||||
let transitioned = match job {
|
let transitioned = match job {
|
||||||
JobRunHandle::NotStarted(not_started) => {
|
JobRunHandle::NotStarted(not_started) => {
|
||||||
|
|
@ -165,11 +198,11 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
let env = self.job_environments.get(&job_run_id).cloned();
|
let env = self.job_environments.get(&job_run_id).cloned();
|
||||||
let running = not_started.run(env)?;
|
let running = not_started.run(env)?;
|
||||||
|
|
||||||
// Emit heartbeat event to notify BuildState that job is now running
|
// Collect heartbeat event to emit after drain completes
|
||||||
let heartbeat_event = Event::JobRunHeartbeatV1(JobRunHeartbeatEventV1 {
|
let heartbeat_event = Event::JobRunHeartbeatV1(JobRunHeartbeatEventV1 {
|
||||||
job_run_id: job_run_id.to_string(),
|
job_run_id: job_run_id.to_string(),
|
||||||
});
|
});
|
||||||
self.bel.append_event(&heartbeat_event)?;
|
events_to_append.push(heartbeat_event);
|
||||||
|
|
||||||
JobRunHandle::Running(running)
|
JobRunHandle::Running(running)
|
||||||
}
|
}
|
||||||
|
|
@ -178,12 +211,19 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
new_jobs.push(transitioned);
|
new_jobs.push(transitioned);
|
||||||
}
|
}
|
||||||
self.job_runs = new_jobs;
|
self.job_runs = new_jobs;
|
||||||
|
|
||||||
|
// Now append all collected events
|
||||||
|
for event in events_to_append {
|
||||||
|
self.append_and_broadcast(&event)?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Visits individual job runs, appending resulting events, and moving runs between run status
|
/// Visits individual job runs, appending resulting events, and moving runs between run status
|
||||||
/// containers. Either jobs are still running, or they are moved to terminal states.
|
/// containers. Either jobs are still running, or they are moved to terminal states.
|
||||||
fn poll_job_runs(&mut self) -> Result<(), DatabuildError> {
|
fn poll_job_runs(&mut self) -> Result<(), DatabuildError> {
|
||||||
|
use crate::data_build_event::Event;
|
||||||
use crate::job_run::{JobRunHandle, VisitResult};
|
use crate::job_run::{JobRunHandle, VisitResult};
|
||||||
|
|
||||||
self.schedule_queued_jobs()?;
|
self.schedule_queued_jobs()?;
|
||||||
|
|
@ -194,7 +234,9 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
// Need to decide on heartbeat frequency (e.g., every N polls or based on time elapsed)
|
// Need to decide on heartbeat frequency (e.g., every N polls or based on time elapsed)
|
||||||
|
|
||||||
// Visit all running jobs using type-safe transitions
|
// Visit all running jobs using type-safe transitions
|
||||||
|
// Collect events first to avoid borrow checker issues
|
||||||
let mut new_jobs = Vec::new();
|
let mut new_jobs = Vec::new();
|
||||||
|
let mut events_to_append = Vec::new();
|
||||||
for job in self.job_runs.drain(..) {
|
for job in self.job_runs.drain(..) {
|
||||||
let transitioned = match job {
|
let transitioned = match job {
|
||||||
JobRunHandle::Running(running) => match running.visit()? {
|
JobRunHandle::Running(running) => match running.visit()? {
|
||||||
|
|
@ -205,19 +247,19 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
VisitResult::Completed(completed) => {
|
VisitResult::Completed(completed) => {
|
||||||
println!("Completed job: {:?}", completed.job_run_id);
|
println!("Completed job: {:?}", completed.job_run_id);
|
||||||
let event = completed.state.to_event(&completed.job_run_id);
|
let event = completed.state.to_event(&completed.job_run_id);
|
||||||
self.bel.append_event(&event)?;
|
events_to_append.push(event);
|
||||||
JobRunHandle::Completed(completed)
|
JobRunHandle::Completed(completed)
|
||||||
}
|
}
|
||||||
VisitResult::Failed(failed) => {
|
VisitResult::Failed(failed) => {
|
||||||
println!("Failed job: {:?}", failed.job_run_id);
|
println!("Failed job: {:?}", failed.job_run_id);
|
||||||
let event = failed.state.to_event(&failed.job_run_id);
|
let event = failed.state.to_event(&failed.job_run_id);
|
||||||
self.bel.append_event(&event)?;
|
events_to_append.push(event);
|
||||||
JobRunHandle::Failed(failed)
|
JobRunHandle::Failed(failed)
|
||||||
}
|
}
|
||||||
VisitResult::DepMiss(dep_miss) => {
|
VisitResult::DepMiss(dep_miss) => {
|
||||||
println!("Dep miss job: {:?}", dep_miss.job_run_id);
|
println!("Dep miss job: {:?}", dep_miss.job_run_id);
|
||||||
let event = dep_miss.state.to_event(&dep_miss.job_run_id);
|
let event = dep_miss.state.to_event(&dep_miss.job_run_id);
|
||||||
self.bel.append_event(&event)?;
|
events_to_append.push(event);
|
||||||
JobRunHandle::DepMiss(dep_miss)
|
JobRunHandle::DepMiss(dep_miss)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -227,6 +269,11 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
}
|
}
|
||||||
self.job_runs = new_jobs;
|
self.job_runs = new_jobs;
|
||||||
|
|
||||||
|
// Now append all collected events
|
||||||
|
for event in events_to_append {
|
||||||
|
self.append_and_broadcast(&event)?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -302,7 +349,7 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
.collect(),
|
.collect(),
|
||||||
want_attributed_partitions: wg.wants.iter().map(|w| w.into()).collect(),
|
want_attributed_partitions: wg.wants.iter().map(|w| w.into()).collect(),
|
||||||
});
|
});
|
||||||
self.bel.append_event(&job_buffer_event)?;
|
self.append_and_broadcast(&job_buffer_event)?;
|
||||||
self.job_runs.push(job_run);
|
self.job_runs.push(job_run);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
||||||
|
|
@ -1,112 +0,0 @@
|
||||||
use databuild::build_event_log::SqliteBELStorage;
|
|
||||||
use databuild::http_server::{create_router, AppState};
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Duration;
|
|
||||||
use tokio::sync::{broadcast, mpsc};
|
|
||||||
|
|
||||||
#[tokio::main]
|
|
||||||
async fn main() {
|
|
||||||
use databuild::orchestrator::{Orchestrator, OrchestratorConfig};
|
|
||||||
|
|
||||||
// Initialize logging
|
|
||||||
tracing_subscriber::fmt::init();
|
|
||||||
|
|
||||||
let database = ":memory:";
|
|
||||||
|
|
||||||
// Create SQLite BEL storage (shared between orchestrator and HTTP server)
|
|
||||||
let bel_storage = Arc::new(
|
|
||||||
SqliteBELStorage::create(database).expect("Failed to create BEL storage"),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Create command channel for orchestrator communication
|
|
||||||
let (command_tx, command_rx) = mpsc::channel(100);
|
|
||||||
|
|
||||||
// Create shutdown broadcast channel
|
|
||||||
let (shutdown_tx, _shutdown_rx) = broadcast::channel(1);
|
|
||||||
|
|
||||||
// Spawn orchestrator in background thread
|
|
||||||
let orch_bel_storage = SqliteBELStorage::create(database).expect("Failed to create BEL storage");
|
|
||||||
let orch_shutdown_rx = shutdown_tx.subscribe();
|
|
||||||
let orch_handle = std::thread::spawn(move || {
|
|
||||||
// Create orchestrator with command channel
|
|
||||||
let config = OrchestratorConfig::default();
|
|
||||||
let mut orchestrator = Orchestrator::new_with_commands(orch_bel_storage, config, command_rx);
|
|
||||||
let mut shutdown_rx = orch_shutdown_rx;
|
|
||||||
|
|
||||||
// Run orchestrator loop
|
|
||||||
loop {
|
|
||||||
// Check for shutdown signal
|
|
||||||
if shutdown_rx.try_recv().is_ok() {
|
|
||||||
println!("Orchestrator received shutdown signal");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Err(e) = orchestrator.step() {
|
|
||||||
eprintln!("Orchestrator error: {}", e);
|
|
||||||
}
|
|
||||||
// Small sleep to avoid busy-waiting
|
|
||||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Create app state with shared storage, command sender, and shutdown channel
|
|
||||||
let state = AppState::new(bel_storage, command_tx, shutdown_tx.clone());
|
|
||||||
|
|
||||||
// Spawn idle timeout checker task
|
|
||||||
let idle_state = state.clone();
|
|
||||||
let idle_shutdown_tx = shutdown_tx.clone();
|
|
||||||
tokio::spawn(async move {
|
|
||||||
let idle_timeout = Duration::from_secs(3 * 60 * 60); // 3 hours
|
|
||||||
|
|
||||||
loop {
|
|
||||||
tokio::time::sleep(Duration::from_secs(60)).await;
|
|
||||||
|
|
||||||
let last_request = idle_state.last_request_time.load(std::sync::atomic::Ordering::Relaxed);
|
|
||||||
let now = std::time::SystemTime::now()
|
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
|
||||||
.unwrap()
|
|
||||||
.as_millis() as u64;
|
|
||||||
|
|
||||||
if now - last_request > idle_timeout.as_millis() as u64 {
|
|
||||||
eprintln!(
|
|
||||||
"Server idle for {} hours, shutting down",
|
|
||||||
idle_timeout.as_secs() / 3600
|
|
||||||
);
|
|
||||||
let _ = idle_shutdown_tx.send(());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Create router
|
|
||||||
let app = create_router(state);
|
|
||||||
|
|
||||||
// Bind to port 3000
|
|
||||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:3000")
|
|
||||||
.await
|
|
||||||
.expect("Failed to bind to port 3000");
|
|
||||||
|
|
||||||
println!("DataBuild server listening on http://127.0.0.1:3000");
|
|
||||||
println!(" GET /health");
|
|
||||||
println!(" GET /api/wants");
|
|
||||||
println!(" POST /api/wants");
|
|
||||||
println!(" GET /api/wants/:id");
|
|
||||||
println!(" GET /api/partitions");
|
|
||||||
println!(" GET /api/job_runs");
|
|
||||||
|
|
||||||
// Subscribe to shutdown signal for graceful shutdown
|
|
||||||
let mut server_shutdown_rx = shutdown_tx.subscribe();
|
|
||||||
|
|
||||||
// Run the server with graceful shutdown
|
|
||||||
axum::serve(listener, app)
|
|
||||||
.with_graceful_shutdown(async move {
|
|
||||||
let _ = server_shutdown_rx.recv().await;
|
|
||||||
println!("HTTP server received shutdown signal");
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.expect("Server error");
|
|
||||||
|
|
||||||
// Wait for orchestrator to finish
|
|
||||||
let _ = orch_handle.join();
|
|
||||||
println!("Shutdown complete");
|
|
||||||
}
|
|
||||||
Loading…
Reference in a new issue