databuild/databuild/partition_state.rs
Stuart Axelbrooke 17d5987517
Some checks failed
/ setup (push) Has been cancelled
WIP
2025-11-28 12:48:54 +08:00

577 lines
21 KiB
Rust

use crate::util::{HasRelatedIds, RelatedIds};
use crate::{PartitionDetail, PartitionRef, PartitionStatus, PartitionStatusCode};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use uuid::Uuid;
/// Derive a deterministic UUID from job_run_id and partition_ref.
/// This ensures replay produces the same UUIDs.
pub fn derive_partition_uuid(job_run_id: &str, partition_ref: &str) -> Uuid {
let mut hasher = Sha256::new();
hasher.update(job_run_id.as_bytes());
hasher.update(partition_ref.as_bytes());
let hash = hasher.finalize();
Uuid::from_slice(&hash[0..16]).expect("SHA256 produces at least 16 bytes")
}
/// State: Partition is currently being built by a job
#[derive(Debug, Clone)]
pub struct BuildingState {
pub job_run_id: String,
}
/// State: Partition is waiting for upstream dependencies to be built
#[derive(Debug, Clone)]
pub struct UpstreamBuildingState {
pub job_run_id: String,
pub missing_deps: Vec<PartitionRef>, // partition refs that are missing
}
/// State: Upstream dependencies are satisfied, partition is ready to retry building
#[derive(Debug, Clone)]
pub struct UpForRetryState {
pub original_job_run_id: String, // job that had the dep miss
}
/// State: Partition has been successfully built
#[derive(Debug, Clone)]
pub struct LiveState {
pub built_at: u64,
pub built_by: String, // job_run_id
}
/// State: Partition build failed (hard failure, not retryable)
#[derive(Debug, Clone)]
pub struct FailedState {
pub failed_at: u64,
pub failed_by: String, // job_run_id
}
/// State: Partition failed because upstream dependencies failed (terminal)
#[derive(Debug, Clone)]
pub struct UpstreamFailedState {
pub failed_at: u64,
pub failed_upstream_refs: Vec<PartitionRef>, // which upstream partitions failed
}
/// State: Partition has been marked as invalid/tainted
#[derive(Debug, Clone)]
pub struct TaintedState {
pub tainted_at: u64,
pub taint_ids: Vec<String>,
/// Job run that originally built this partition (before it was tainted)
pub built_by: String,
}
/// Generic partition struct parameterized by state.
/// Each partition has a unique UUID derived from the job_run_id that created it.
#[derive(Debug, Clone)]
pub struct PartitionWithState<S> {
pub uuid: Uuid,
pub partition_ref: PartitionRef,
pub state: S,
}
/// Wrapper enum for storing partitions in collections.
/// Note: Missing state has been removed - partitions are only created when jobs start building them.
#[derive(Debug, Clone)]
pub enum Partition {
Building(PartitionWithState<BuildingState>),
UpstreamBuilding(PartitionWithState<UpstreamBuildingState>),
UpForRetry(PartitionWithState<UpForRetryState>),
Live(PartitionWithState<LiveState>),
Failed(PartitionWithState<FailedState>),
UpstreamFailed(PartitionWithState<UpstreamFailedState>),
Tainted(PartitionWithState<TaintedState>),
}
/// Type-safe partition reference wrappers that encode state expectations in function signatures. It
/// is critical that these be treated with respect, not just summoned because it's convenient.
/// These should be created ephemerally from typestate objects via .get_ref() and used
/// immediately — never stored long-term, as partition state can change.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct BuildingPartitionRef(pub PartitionRef);
impl PartitionWithState<BuildingState> {
pub fn get_ref(&self) -> BuildingPartitionRef {
BuildingPartitionRef(self.partition_ref.clone())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct UpstreamBuildingPartitionRef(pub PartitionRef);
impl PartitionWithState<UpstreamBuildingState> {
pub fn get_ref(&self) -> UpstreamBuildingPartitionRef {
UpstreamBuildingPartitionRef(self.partition_ref.clone())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct UpForRetryPartitionRef(pub PartitionRef);
impl PartitionWithState<UpForRetryState> {
pub fn get_ref(&self) -> UpForRetryPartitionRef {
UpForRetryPartitionRef(self.partition_ref.clone())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct LivePartitionRef(pub PartitionRef);
impl PartitionWithState<LiveState> {
pub fn get_ref(&self) -> LivePartitionRef {
LivePartitionRef(self.partition_ref.clone())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct FailedPartitionRef(pub PartitionRef);
impl PartitionWithState<FailedState> {
pub fn get_ref(&self) -> FailedPartitionRef {
FailedPartitionRef(self.partition_ref.clone())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct UpstreamFailedPartitionRef(pub PartitionRef);
impl PartitionWithState<UpstreamFailedState> {
pub fn get_ref(&self) -> UpstreamFailedPartitionRef {
UpstreamFailedPartitionRef(self.partition_ref.clone())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct TaintedPartitionRef(pub PartitionRef);
impl PartitionWithState<TaintedState> {
pub fn get_ref(&self) -> TaintedPartitionRef {
TaintedPartitionRef(self.partition_ref.clone())
}
}
// Type-safe transition methods for BuildingState
impl PartitionWithState<BuildingState> {
/// Create a new partition directly in Building state.
/// UUID is derived from job_run_id + partition_ref for deterministic replay.
pub fn new(job_run_id: String, partition_ref: PartitionRef) -> Self {
let uuid = derive_partition_uuid(&job_run_id, &partition_ref.r#ref);
PartitionWithState {
uuid,
partition_ref,
state: BuildingState { job_run_id },
}
}
/// Transition from Building to Live when a job successfully completes
pub fn complete(self, timestamp: u64) -> PartitionWithState<LiveState> {
PartitionWithState {
uuid: self.uuid,
partition_ref: self.partition_ref,
state: LiveState {
built_at: timestamp,
built_by: self.state.job_run_id,
},
}
}
/// Transition from Building to Failed when a job fails (hard failure)
pub fn fail(self, timestamp: u64) -> PartitionWithState<FailedState> {
PartitionWithState {
uuid: self.uuid,
partition_ref: self.partition_ref,
state: FailedState {
failed_at: timestamp,
failed_by: self.state.job_run_id,
},
}
}
/// Transition from Building to UpstreamBuilding when job reports missing dependencies
pub fn dep_miss(
self,
missing_deps: Vec<PartitionRef>,
) -> PartitionWithState<UpstreamBuildingState> {
PartitionWithState {
uuid: self.uuid,
partition_ref: self.partition_ref,
state: UpstreamBuildingState {
job_run_id: self.state.job_run_id,
missing_deps,
},
}
}
}
// Type-safe transition methods for UpstreamBuildingState
impl PartitionWithState<UpstreamBuildingState> {
/// Transition from UpstreamBuilding to UpForRetry when all upstream deps are satisfied
pub fn upstreams_satisfied(self) -> PartitionWithState<UpForRetryState> {
PartitionWithState {
uuid: self.uuid,
partition_ref: self.partition_ref,
state: UpForRetryState {
original_job_run_id: self.state.job_run_id,
},
}
}
/// Transition from UpstreamBuilding to UpstreamFailed when an upstream dep fails
pub fn upstream_failed(
self,
failed_upstream_refs: Vec<PartitionRef>,
timestamp: u64,
) -> PartitionWithState<UpstreamFailedState> {
PartitionWithState {
uuid: self.uuid,
partition_ref: self.partition_ref,
state: UpstreamFailedState {
failed_at: timestamp,
failed_upstream_refs,
},
}
}
/// Check if a specific upstream ref is in our missing deps
pub fn is_waiting_for(&self, upstream_ref: &str) -> bool {
self.state
.missing_deps
.iter()
.any(|d| d.r#ref == upstream_ref)
}
/// Remove a satisfied upstream from missing deps. Returns remaining count.
pub fn satisfy_upstream(mut self, upstream_ref: &str) -> (Self, usize) {
self.state.missing_deps.retain(|r| r.r#ref != upstream_ref);
let remaining = self.state.missing_deps.len();
(self, remaining)
}
}
// Type-safe transition methods for LiveState
impl PartitionWithState<LiveState> {
/// Transition from Live to Tainted when a taint is applied
pub fn taint(self, taint_id: String, timestamp: u64) -> PartitionWithState<TaintedState> {
PartitionWithState {
uuid: self.uuid,
partition_ref: self.partition_ref,
state: TaintedState {
tainted_at: timestamp,
taint_ids: vec![taint_id],
built_by: self.state.built_by,
},
}
}
}
// Type-safe transition methods for TaintedState
impl PartitionWithState<TaintedState> {
/// Add another taint to an already-tainted partition
pub fn add_taint(mut self, taint_id: String) -> Self {
if !self.state.taint_ids.contains(&taint_id) {
self.state.taint_ids.push(taint_id);
}
self
}
}
// Helper methods on the Partition enum
impl Partition {
/// Get the UUID from any state
pub fn uuid(&self) -> Uuid {
match self {
Partition::Building(p) => p.uuid,
Partition::UpstreamBuilding(p) => p.uuid,
Partition::UpForRetry(p) => p.uuid,
Partition::Live(p) => p.uuid,
Partition::Failed(p) => p.uuid,
Partition::UpstreamFailed(p) => p.uuid,
Partition::Tainted(p) => p.uuid,
}
}
/// Get the partition reference from any state
pub fn partition_ref(&self) -> &PartitionRef {
match self {
Partition::Building(p) => &p.partition_ref,
Partition::UpstreamBuilding(p) => &p.partition_ref,
Partition::UpForRetry(p) => &p.partition_ref,
Partition::Live(p) => &p.partition_ref,
Partition::Failed(p) => &p.partition_ref,
Partition::UpstreamFailed(p) => &p.partition_ref,
Partition::Tainted(p) => &p.partition_ref,
}
}
/// Check if partition is in Live state
pub fn is_live(&self) -> bool {
matches!(self, Partition::Live(_))
}
/// Check if partition is in a terminal state (Live, Failed, UpstreamFailed, or Tainted)
pub fn is_terminal(&self) -> bool {
matches!(
self,
Partition::Live(_)
| Partition::Failed(_)
| Partition::UpstreamFailed(_)
| Partition::Tainted(_)
)
}
/// Check if partition is currently being built (includes UpstreamBuilding as it holds a "lease")
pub fn is_building(&self) -> bool {
matches!(
self,
Partition::Building(_) | Partition::UpstreamBuilding(_)
)
}
/// Check if partition is in UpForRetry state (ready to be rebuilt)
pub fn is_up_for_retry(&self) -> bool {
matches!(self, Partition::UpForRetry(_))
}
/// Check if partition is failed (hard failure)
pub fn is_failed(&self) -> bool {
matches!(self, Partition::Failed(_))
}
/// Check if partition is upstream failed
pub fn is_upstream_failed(&self) -> bool {
matches!(self, Partition::UpstreamFailed(_))
}
/// Check if partition is tainted
pub fn is_tainted(&self) -> bool {
matches!(self, Partition::Tainted(_))
}
}
// ==================== HasRelatedIds trait implementation ====================
impl HasRelatedIds for Partition {
/// Get the IDs of all entities this partition references.
/// Note: downstream_partition_uuids and want_ids come from BuildState indexes,
/// not from Partition itself.
fn related_ids(&self) -> RelatedIds {
// Job run ID from the builder (for states that track it)
let job_run_ids: Vec<String> = match self {
Partition::Building(p) => vec![p.state.job_run_id.clone()],
Partition::UpstreamBuilding(p) => vec![p.state.job_run_id.clone()],
Partition::UpForRetry(p) => vec![p.state.original_job_run_id.clone()],
Partition::Live(p) => vec![p.state.built_by.clone()],
Partition::Failed(p) => vec![p.state.failed_by.clone()],
Partition::UpstreamFailed(_) => vec![],
Partition::Tainted(p) => vec![p.state.built_by.clone()],
};
// Partition refs from missing deps (for UpstreamBuilding state)
let partition_refs: Vec<String> = match self {
Partition::UpstreamBuilding(p) => p
.state
.missing_deps
.iter()
.map(|d| d.r#ref.clone())
.collect(),
Partition::UpstreamFailed(p) => p
.state
.failed_upstream_refs
.iter()
.map(|d| d.r#ref.clone())
.collect(),
_ => vec![],
};
RelatedIds {
partition_refs,
partition_uuids: vec![],
job_run_ids,
want_ids: vec![],
}
}
}
impl Partition {
/// Convert to PartitionDetail for API responses and queries.
/// Note: want_ids and downstream_partition_uuids are empty here and will be
/// populated by BuildState from its inverted indexes.
/// Upstream lineage is resolved via built_by_job_run_id → job run's read_deps.
pub fn to_detail(&self) -> PartitionDetail {
match self {
Partition::Building(p) => PartitionDetail {
r#ref: Some(p.partition_ref.clone()),
status: Some(PartitionStatus {
code: PartitionStatusCode::PartitionBuilding as i32,
name: "PartitionBuilding".to_string(),
}),
want_ids: vec![], // Populated by BuildState
job_run_ids: vec![p.state.job_run_id.clone()],
taint_ids: vec![],
last_updated_timestamp: None,
uuid: p.uuid.to_string(),
built_by_job_run_id: None,
downstream_partition_uuids: vec![], // Populated by BuildState
},
Partition::UpstreamBuilding(p) => PartitionDetail {
r#ref: Some(p.partition_ref.clone()),
status: Some(PartitionStatus {
code: PartitionStatusCode::PartitionBuilding as i32, // Use Building status for UpstreamBuilding
name: "PartitionUpstreamBuilding".to_string(),
}),
want_ids: vec![], // Populated by BuildState
job_run_ids: vec![p.state.job_run_id.clone()],
taint_ids: vec![],
last_updated_timestamp: None,
uuid: p.uuid.to_string(),
built_by_job_run_id: None,
downstream_partition_uuids: vec![], // Populated by BuildState
},
Partition::UpForRetry(p) => PartitionDetail {
r#ref: Some(p.partition_ref.clone()),
status: Some(PartitionStatus {
code: PartitionStatusCode::PartitionBuilding as i32, // Still "building" conceptually
name: "PartitionUpForRetry".to_string(),
}),
want_ids: vec![], // Populated by BuildState
job_run_ids: vec![p.state.original_job_run_id.clone()],
taint_ids: vec![],
last_updated_timestamp: None,
uuid: p.uuid.to_string(),
built_by_job_run_id: None,
downstream_partition_uuids: vec![], // Populated by BuildState
},
Partition::Live(p) => PartitionDetail {
r#ref: Some(p.partition_ref.clone()),
status: Some(PartitionStatus {
code: PartitionStatusCode::PartitionLive as i32,
name: "PartitionLive".to_string(),
}),
want_ids: vec![], // Populated by BuildState
job_run_ids: vec![p.state.built_by.clone()],
taint_ids: vec![],
last_updated_timestamp: Some(p.state.built_at),
uuid: p.uuid.to_string(),
built_by_job_run_id: Some(p.state.built_by.clone()),
downstream_partition_uuids: vec![], // Populated by BuildState
},
Partition::Failed(p) => PartitionDetail {
r#ref: Some(p.partition_ref.clone()),
status: Some(PartitionStatus {
code: PartitionStatusCode::PartitionFailed as i32,
name: "PartitionFailed".to_string(),
}),
want_ids: vec![], // Populated by BuildState
job_run_ids: vec![p.state.failed_by.clone()],
taint_ids: vec![],
last_updated_timestamp: Some(p.state.failed_at),
uuid: p.uuid.to_string(),
built_by_job_run_id: None,
downstream_partition_uuids: vec![], // Populated by BuildState
},
Partition::UpstreamFailed(p) => PartitionDetail {
r#ref: Some(p.partition_ref.clone()),
status: Some(PartitionStatus {
code: PartitionStatusCode::PartitionFailed as i32, // Use Failed status
name: "PartitionUpstreamFailed".to_string(),
}),
want_ids: vec![], // Populated by BuildState
job_run_ids: vec![],
taint_ids: vec![],
last_updated_timestamp: Some(p.state.failed_at),
uuid: p.uuid.to_string(),
built_by_job_run_id: None,
downstream_partition_uuids: vec![], // Populated by BuildState
},
Partition::Tainted(p) => PartitionDetail {
r#ref: Some(p.partition_ref.clone()),
status: Some(PartitionStatus {
code: PartitionStatusCode::PartitionTainted as i32,
name: "PartitionTainted".to_string(),
}),
want_ids: vec![], // Populated by BuildState
job_run_ids: vec![p.state.built_by.clone()],
taint_ids: p.state.taint_ids.clone(),
last_updated_timestamp: Some(p.state.tainted_at),
uuid: p.uuid.to_string(),
built_by_job_run_id: Some(p.state.built_by.clone()),
downstream_partition_uuids: vec![], // Populated by BuildState
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_derive_partition_uuid_deterministic() {
let uuid1 = derive_partition_uuid("job-123", "data/beta");
let uuid2 = derive_partition_uuid("job-123", "data/beta");
assert_eq!(uuid1, uuid2);
}
#[test]
fn test_derive_partition_uuid_different_inputs() {
let uuid1 = derive_partition_uuid("job-123", "data/beta");
let uuid2 = derive_partition_uuid("job-456", "data/beta");
let uuid3 = derive_partition_uuid("job-123", "data/alpha");
assert_ne!(uuid1, uuid2);
assert_ne!(uuid1, uuid3);
assert_ne!(uuid2, uuid3);
}
#[test]
fn test_partition_building_transitions() {
let partition = PartitionWithState::<BuildingState>::new(
"job-123".to_string(),
PartitionRef {
r#ref: "data/beta".to_string(),
},
);
// Can transition to Live
let live = partition.clone().complete(1000);
assert_eq!(live.state.built_at, 1000);
assert_eq!(live.state.built_by, "job-123");
// Can transition to Failed
let failed = partition.clone().fail(2000);
assert_eq!(failed.state.failed_at, 2000);
assert_eq!(failed.state.failed_by, "job-123");
// Can transition to UpstreamBuilding (dep miss)
let upstream_building = partition.dep_miss(vec![PartitionRef {
r#ref: "data/alpha".to_string(),
}]);
assert_eq!(upstream_building.state.missing_deps.len(), 1);
assert_eq!(upstream_building.state.missing_deps[0].r#ref, "data/alpha");
}
#[test]
fn test_upstream_building_transitions() {
let building = PartitionWithState::<BuildingState>::new(
"job-123".to_string(),
PartitionRef {
r#ref: "data/beta".to_string(),
},
);
let upstream_building = building.dep_miss(vec![PartitionRef {
r#ref: "data/alpha".to_string(),
}]);
// Can transition to UpForRetry
let up_for_retry = upstream_building.clone().upstreams_satisfied();
assert_eq!(up_for_retry.state.original_job_run_id, "job-123");
// Can transition to UpstreamFailed
let upstream_failed = upstream_building.upstream_failed(
vec![PartitionRef {
r#ref: "data/alpha".to_string(),
}],
3000,
);
assert_eq!(upstream_failed.state.failed_at, 3000);
assert_eq!(upstream_failed.state.failed_upstream_refs.len(), 1);
assert_eq!(
upstream_failed.state.failed_upstream_refs[0].r#ref,
"data/alpha"
);
}
}