397 lines
11 KiB
Protocol Buffer
397 lines
11 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
package databuild.v1;
|
|
|
|
// Job Config
|
|
// Fully resolved configuration for a job run, aside from the partitions it should produce, which
|
|
// are passed as args
|
|
message JobConfig {
|
|
// The fully qualified, unique label representing the job
|
|
string label = 1;
|
|
// The command to run to launch the job
|
|
string entrypoint = 2;
|
|
// The environment variables to set for the job
|
|
map<string, string> environment = 3;
|
|
// A list of regex patterns that partitions must match to be considered for this job
|
|
repeated string partition_patterns = 4;
|
|
// TODO future fields to consider
|
|
// - timeout
|
|
// -
|
|
}
|
|
|
|
|
|
// Core Build Event Log (BEL)
|
|
|
|
message PartitionRef {
|
|
string ref = 1;
|
|
}
|
|
|
|
// The base event for all events written to the BEL
|
|
message DataBuildEvent {
|
|
uint64 timestamp = 1;
|
|
uint64 event_id = 2;
|
|
oneof event {
|
|
// Job run events
|
|
JobRunBufferEventV1 job_run_buffer_v1 = 10;
|
|
JobRunQueueEventV1 job_run_queue_v1 = 11;
|
|
JobRunHeartbeatEventV1 job_run_heartbeat_v1 = 12;
|
|
JobRunSuccessEventV1 job_run_success_v1 = 13;
|
|
JobRunFailureEventV1 job_run_failure_v1 = 14;
|
|
JobRunCancelEventV1 job_run_cancel_v1 = 15;
|
|
JobRunMissingDepsEventV1 job_run_missing_deps_v1 = 16;
|
|
// Want events
|
|
WantCreateEventV1 want_create_v1 = 17;
|
|
WantCancelEventV1 want_cancel_v1 = 18;
|
|
// Taint events
|
|
TaintCreateEventV1 taint_create_v1 = 19;
|
|
TaintCancelEventV1 taint_cancel_v1 = 20;
|
|
}
|
|
}
|
|
|
|
// Source metadata for user-driven events
|
|
message EventSource {
|
|
oneof source {
|
|
ManuallyTriggeredEvent manually_triggered = 1;
|
|
JobTriggeredEvent job_triggered = 2;
|
|
}
|
|
}
|
|
message ManuallyTriggeredEvent {
|
|
string user = 1;
|
|
}
|
|
message JobTriggeredEvent {
|
|
string job_run_id = 1;
|
|
}
|
|
|
|
message WantAttributedPartitions {
|
|
string want_id = 1;
|
|
repeated PartitionRef partitions = 2;
|
|
}
|
|
|
|
// Indicates buffer state for job.
|
|
message JobRunBufferEventV1 {
|
|
string job_run_id = 1;
|
|
string job_label = 2;
|
|
repeated PartitionRef building_partitions = 3;
|
|
repeated WantAttributedPartitions want_attributed_partitions = 4;
|
|
// TODO how do we handle buffer definition? Start simple, noop until we want something here?
|
|
}
|
|
// Just indicates that job has entered queue
|
|
message JobRunQueueEventV1 {
|
|
string job_run_id = 1;
|
|
}
|
|
// Emitted immediately on job spawn, and periodically by job to indicate job health when heartbeating is required. In
|
|
// future it will also be used to enable job re-entrance.
|
|
message JobRunHeartbeatEventV1 {
|
|
string job_run_id = 1;
|
|
// TODO reentrance?
|
|
}
|
|
// Simply indicates that the job has succeeded.
|
|
message JobRunSuccessEventV1 {
|
|
string job_run_id = 1;
|
|
}
|
|
// Simply indicates that the job has failed. Depending on retry logic defined in job, it may retry.
|
|
message JobRunFailureEventV1 {
|
|
string job_run_id = 1;
|
|
string reason = 2;
|
|
}
|
|
// Job was explicitly canceled.
|
|
message JobRunCancelEventV1 {
|
|
string job_run_id = 1;
|
|
EventSource source = 2;
|
|
optional string comment = 3;
|
|
}
|
|
// Job indicating that required deps are missing, listing upstreams -> impacted outputs so that wants can be propagated.
|
|
message JobRunMissingDepsEventV1 {
|
|
string job_run_id = 1;
|
|
repeated MissingDeps missing_deps = 2;
|
|
repeated ReadDeps read_deps = 3;
|
|
}
|
|
message JobRunReadDepsEventV1 {
|
|
string job_run_id = 1;
|
|
repeated ReadDeps read_deps = 2;
|
|
}
|
|
message JobRunMissingDeps {
|
|
string version = 1;
|
|
repeated MissingDeps missing_deps = 2;
|
|
}
|
|
message MissingDeps {
|
|
// The list of partition refs that are prevented from building by these missing deps (can be just 1)
|
|
repeated PartitionRef impacted = 1;
|
|
repeated PartitionRef missing = 2;
|
|
}
|
|
message JobRunReadDeps {
|
|
string version = 1;
|
|
repeated ReadDeps read_deps = 2;
|
|
}
|
|
message ReadDeps {
|
|
// The list of partition refs that are built using the read deps (can be just 1)
|
|
repeated PartitionRef impacted = 1;
|
|
repeated PartitionRef read = 2;
|
|
}
|
|
|
|
|
|
message WantCreateEventV1 {
|
|
// The unique ID of this want
|
|
string want_id = 1;
|
|
repeated PartitionRef partitions = 2;
|
|
uint64 data_timestamp = 3;
|
|
uint64 ttl_seconds = 4;
|
|
uint64 sla_seconds = 5;
|
|
// The source of the want. Can be from job, API, CLI, web app...
|
|
EventSource source = 6;
|
|
optional string comment = 7;
|
|
}
|
|
message WantCancelEventV1 {
|
|
string want_id = 1;
|
|
EventSource source = 2;
|
|
optional string comment = 3;
|
|
}
|
|
|
|
message TaintCreateEventV1 {
|
|
string taint_id = 1;
|
|
string root_taint_id = 2;
|
|
string parent_taint_id = 3;
|
|
repeated PartitionRef partitions = 4;
|
|
EventSource source = 5;
|
|
optional string comment = 6;
|
|
}
|
|
message TaintCancelEventV1 {
|
|
string taint_id = 1;
|
|
EventSource source = 2;
|
|
optional string comment = 3;
|
|
}
|
|
|
|
// Build State
|
|
|
|
// Represents the whole state of the system
|
|
//message BuildState {
|
|
// map<string, WantDetail> wants = 1;
|
|
// map<string, PartitionDetail> partitions = 2;
|
|
// map<string, TaintDetail> taints = 3;
|
|
// map<string, JobRunDetail> job_runs = 4;
|
|
//}
|
|
|
|
message WantStatus {
|
|
WantStatusCode code = 1;
|
|
string name = 2;
|
|
}
|
|
enum WantStatusCode {
|
|
// Wants are created in this state, and they should immediately transition to another state based on the current state
|
|
// of partitions they reference.
|
|
WantNew = 0;
|
|
// The want is not building, but not blocked from building either - it is schedulable.
|
|
WantIdle = 1;
|
|
// No referenced partitions are failed, and at least one referenced partition is building.
|
|
WantBuilding = 2;
|
|
// At least 1 referenced partition is failed.
|
|
WantFailed = 3;
|
|
// All referenced partitions are live.
|
|
WantSuccessful = 4;
|
|
// The want itself has been canceled. It should no longer influence job scheduling, and any existing jobs not building
|
|
// partitions requested by other active wants should be canceled.
|
|
WantCanceled = 5;
|
|
// A referenced partition's building job failed with a dep miss, and a derivative want is now building the missed
|
|
// partitions. This want is waiting for missed partitions to be live before going back to Idle and becoming
|
|
// schedulable again.
|
|
WantUpstreamBuilding = 6;
|
|
// After entering WantUpstreamBuilding state, one of the derivative want's triggered jobs has failed, meaning this
|
|
// want will not be able to succeed.
|
|
WantUpstreamFailed = 7;
|
|
}
|
|
|
|
message WantDetail {
|
|
string want_id = 1;
|
|
// The partitions directly wanted by this want
|
|
repeated PartitionRef partitions = 2;
|
|
// The upstream partitions, detected from a dep miss job run failure
|
|
repeated PartitionRef upstreams = 3;
|
|
uint64 data_timestamp = 4;
|
|
uint64 ttl_seconds = 5;
|
|
uint64 sla_seconds = 6;
|
|
EventSource source = 7;
|
|
optional string comment = 8;
|
|
WantStatus status = 9;
|
|
uint64 last_updated_timestamp = 10;
|
|
// TODO
|
|
}
|
|
|
|
message PartitionDetail {
|
|
// The partition reference
|
|
PartitionRef ref = 1;
|
|
// The partitions current status
|
|
PartitionStatus status = 2;
|
|
// The latest update to the partition's status
|
|
optional uint64 last_updated_timestamp = 3;
|
|
// IDs that associate the partition with other objects
|
|
repeated string job_run_ids = 4;
|
|
// Wants that reference this partition
|
|
repeated string want_ids = 5;
|
|
repeated string taint_ids = 6;
|
|
// The unique identifier for this partition instance (UUID as string)
|
|
// Each time a partition is built, it gets a new UUID derived from the job_run_id
|
|
string uuid = 7;
|
|
}
|
|
message PartitionStatus {
|
|
PartitionStatusCode code = 1;
|
|
string name = 2;
|
|
}
|
|
enum PartitionStatusCode {
|
|
// Work is in progress to produce the partition. This state acts as a leasing mechanism: the orchestrator will not
|
|
// schedule other jobs to produce this partition while it is in Building; e.g., a dep miss may have occurred when
|
|
// trying to build the partition, and jobs for the upstreams may be in progress, and this state enables us to signal
|
|
// that we shouldn't reschedule
|
|
PartitionBuilding = 0;
|
|
// The partition has been produced and is currently valid.
|
|
PartitionLive = 1;
|
|
// Building of the partition has failed in a way that is not retryable.
|
|
PartitionFailed = 2;
|
|
// The partition has been marked as tainted. It shouldn't be read, and if any active wants reference it, a job to
|
|
// build it should be scheduled.
|
|
PartitionTainted = 3;
|
|
}
|
|
|
|
message TaintDetail {
|
|
// The unique identifier for this taint
|
|
string taint_id = 1;
|
|
// The root taint ID (for taint hierarchies)
|
|
string root_taint_id = 2;
|
|
// The parent taint ID (for taint hierarchies)
|
|
string parent_taint_id = 3;
|
|
// The partitions affected by this taint
|
|
repeated PartitionRef partitions = 4;
|
|
// Source of the taint event
|
|
EventSource source = 5;
|
|
// Optional comment describing the taint
|
|
optional string comment = 6;
|
|
}
|
|
|
|
message JobRunStatus {
|
|
JobRunStatusCode code = 1;
|
|
string name = 2;
|
|
}
|
|
enum JobRunStatusCode {
|
|
// The job run has been queued, and will be run at some point in the future (e.g. pool slot opens, etc).
|
|
JobRunQueued = 0;
|
|
// The job run is now running.
|
|
JobRunRunning = 1;
|
|
// The job run has failed for a non-recoverable reason.
|
|
JobRunFailed = 2;
|
|
// The job run has been canceled.
|
|
JobRunCanceled = 3;
|
|
// The job run succeeded.
|
|
JobRunSucceeded = 4;
|
|
// The job run failed due to specific missing deps, emitting a JobRunMissingDeps.
|
|
JobRunDepMiss = 5;
|
|
}
|
|
message JobRunDetail {
|
|
string id = 1;
|
|
JobRunStatus status = 2;
|
|
optional uint64 last_heartbeat_at = 3;
|
|
repeated PartitionRef building_partitions = 4;
|
|
repeated WantAttributedPartitions servicing_wants = 5;
|
|
}
|
|
|
|
|
|
|
|
message EventFilter {
|
|
// IDs of wants to get relevant events for
|
|
repeated string want_ids = 1;
|
|
}
|
|
|
|
message ListWantsRequest {
|
|
optional uint64 page = 1;
|
|
optional uint64 page_size = 2;
|
|
// TODO add filters later
|
|
}
|
|
message ListWantsResponse {
|
|
repeated WantDetail data = 1;
|
|
uint64 match_count = 2;
|
|
uint64 page = 3;
|
|
uint64 page_size = 4;
|
|
}
|
|
|
|
message ListTaintsRequest {
|
|
optional uint64 page = 1;
|
|
optional uint64 page_size = 2;
|
|
// TODO add filters later
|
|
}
|
|
message ListTaintsResponse {
|
|
repeated TaintDetail data = 1;
|
|
uint64 match_count = 2;
|
|
uint64 page = 3;
|
|
uint64 page_size = 4;
|
|
}
|
|
|
|
message ListPartitionsRequest {
|
|
optional uint64 page = 1;
|
|
optional uint64 page_size = 2;
|
|
// TODO add filters later
|
|
}
|
|
message ListPartitionsResponse {
|
|
repeated PartitionDetail data = 1;
|
|
uint64 match_count = 2;
|
|
uint64 page = 3;
|
|
uint64 page_size = 4;
|
|
}
|
|
|
|
message ListJobRunsRequest {
|
|
optional uint64 page = 1;
|
|
optional uint64 page_size = 2;
|
|
// TODO add filters later
|
|
}
|
|
message ListJobRunsResponse {
|
|
repeated JobRunDetail data = 1;
|
|
uint64 match_count = 2;
|
|
uint64 page = 3;
|
|
uint64 page_size = 4;
|
|
}
|
|
|
|
message CreateWantRequest {
|
|
repeated PartitionRef partitions = 1;
|
|
uint64 data_timestamp = 2;
|
|
uint64 ttl_seconds = 3;
|
|
uint64 sla_seconds = 4;
|
|
EventSource source = 5;
|
|
optional string comment = 6;
|
|
}
|
|
message CreateWantResponse {
|
|
WantDetail data = 1;
|
|
}
|
|
|
|
message CancelWantRequest {
|
|
string want_id = 1;
|
|
EventSource source = 2;
|
|
optional string comment = 3;
|
|
}
|
|
message CancelWantResponse {
|
|
WantDetail data = 1;
|
|
}
|
|
|
|
message GetWantRequest {
|
|
string want_id = 1;
|
|
}
|
|
message GetWantResponse {
|
|
WantDetail data = 1;
|
|
}
|
|
|
|
message CreateTaintRequest {
|
|
// TODO
|
|
}
|
|
message CreateTaintResponse {
|
|
// TODO
|
|
}
|
|
|
|
message GetTaintRequest {
|
|
// TODO
|
|
}
|
|
message GetTaintResponse {
|
|
// TODO
|
|
}
|
|
|
|
// Not implemented yet
|
|
//message CancelTaintRequest {
|
|
// // TODO
|
|
//}
|
|
//message CancelTaintResponse {
|
|
// // TODO
|
|
//}
|