From c556fec218705705f4ed55f9c31b229612d0986e Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke Date: Mon, 24 Nov 2025 21:49:02 +0800 Subject: [PATCH] add state comments in protobuf --- databuild/databuild.proto | 53 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/databuild/databuild.proto b/databuild/databuild.proto index c7fffc1..f329aeb 100644 --- a/databuild/databuild.proto +++ b/databuild/databuild.proto @@ -176,13 +176,27 @@ message WantStatus { string name = 2; } enum WantStatusCode { - WantIdle = 0; - WantBuilding = 1; - WantFailed = 2; - WantSuccessful = 3; - WantCanceled = 4; - WantUpstreamBuilding = 5; - WantUpstreamFailed = 6; + // Wants are created in this state, and they should immediately transition to another state based on the current state + // of partitions they reference. + WantNew = 0; + // The want is not building, but not blocked from building either - it is schedulable. + WantIdle = 1; + // No referenced partitions are failed, and at least one referenced partition is building. + WantBuilding = 2; + // At least 1 referenced partition is failed. + WantFailed = 3; + // All referenced partitions are live. + WantSuccessful = 4; + // The want itself has been canceled. It should no longer influence job scheduling, and any existing jobs not building + // partitions requested by other active wants should be canceled. + WantCanceled = 5; + // A referenced partition's building job failed with a dep miss, and a derivative want is now building the missed + // partitions. This want is waiting for missed partitions to be live before going back to Idle and becoming + // schedulable again. + WantUpstreamBuilding = 6; + // After entering WantUpstreamBuilding state, one of the derivative want's triggered jobs has failed, meaning this + // want will not be able to succeed. + WantUpstreamFailed = 7; } message WantDetail { @@ -219,13 +233,18 @@ message PartitionStatus { string name = 2; } enum PartitionStatusCode { - // TODO how do we avoid copying job states here? This is essentially a union of job states and taints? - PartitionUnknown = 0; - PartitionWanted = 1; - PartitionBuilding = 2; - PartitionLive = 3; - PartitionFailed = 4; - PartitionTainted = 5; + // Work is in progress to produce the partition. This state acts as a leasing mechanism: the orchestrator will not + // schedule other jobs to produce this partition while it is in Building; e.g., a dep miss may have occurred when + // trying to build the partition, and jobs for the upstreams may be in progress, and this state enables us to signal + // that we shouldn't reschedule + PartitionBuilding = 0; + // The partition has been produced and is currently valid. + PartitionLive = 1; + // Building of the partition has failed in a way that is not retryable. + PartitionFailed = 2; + // The partition has been marked as tainted. It shouldn't be read, and if any active wants reference it, a job to + // build it should be scheduled. + PartitionTainted = 3; } message TaintDetail { @@ -237,11 +256,17 @@ message JobRunStatus { string name = 2; } enum JobRunStatusCode { + // The job run has been queued, and will be run at some point in the future (e.g. pool slot opens, etc). JobRunQueued = 0; + // The job run is now running. JobRunRunning = 1; + // The job run has failed for a non-recoverable reason. JobRunFailed = 2; + // The job run has been canceled. JobRunCanceled = 3; + // The job run succeeded. JobRunSucceeded = 4; + // The job run failed due to specific missing deps, emitting a JobRunMissingDeps. JobRunDepMiss = 5; } message JobRunDetail {