Planning stage 1.1
This commit is contained in:
parent
7785af3a69
commit
21f457c4f7
2 changed files with 2 additions and 338 deletions
|
|
@ -169,341 +169,3 @@ service DataBuildService {
|
||||||
// User-facing: build the desired partitions
|
// User-facing: build the desired partitions
|
||||||
rpc Build(GraphBuildRequest) returns (GraphBuildResponse);
|
rpc Build(GraphBuildRequest) returns (GraphBuildResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Partition Activity Log
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// Partition lifecycle states
|
|
||||||
enum PartitionStatus {
|
|
||||||
PARTITION_UNKNOWN = 0;
|
|
||||||
PARTITION_REQUESTED = 1; // Partition requested but not yet scheduled
|
|
||||||
PARTITION_SCHEDULED = 2; // Job scheduled to produce this partition
|
|
||||||
PARTITION_BUILDING = 3; // Job actively building this partition
|
|
||||||
PARTITION_AVAILABLE = 4; // Partition successfully built and available
|
|
||||||
PARTITION_FAILED = 5; // Partition build failed
|
|
||||||
PARTITION_STALE = 6; // Partition exists but upstream dependencies changed
|
|
||||||
PARTITION_DELEGATED = 7; // Request delegated to existing build
|
|
||||||
}
|
|
||||||
|
|
||||||
// Individual partition activity event
|
|
||||||
message PartitionEvent {
|
|
||||||
// Event identity
|
|
||||||
string partition_event_id = 1;
|
|
||||||
int64 timestamp = 2;
|
|
||||||
|
|
||||||
// Partition identification
|
|
||||||
PartitionRef partition_ref = 3;
|
|
||||||
PartitionStatus status = 4;
|
|
||||||
|
|
||||||
// Build context
|
|
||||||
string job_graph_run_id = 5; // Links to graph execution
|
|
||||||
string job_run_id = 6; // Links to specific job run
|
|
||||||
JobLabel producing_job = 7; // Which job produces this partition
|
|
||||||
|
|
||||||
// Coordination metadata
|
|
||||||
repeated string requesting_clients = 8; // Who requested this partition
|
|
||||||
string delegated_to_run_id = 9; // If delegated, which run
|
|
||||||
|
|
||||||
// Dependencies
|
|
||||||
repeated PartitionRef upstream_deps = 10;
|
|
||||||
repeated PartitionRef downstream_deps = 11;
|
|
||||||
|
|
||||||
// Data about the partition
|
|
||||||
PartitionManifest manifest = 12; // Present when status = AVAILABLE
|
|
||||||
string failure_reason = 13; // Present when status = FAILED
|
|
||||||
|
|
||||||
// Storage metadata
|
|
||||||
string storage_backend = 14;
|
|
||||||
map<string, string> storage_metadata = 15;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Query capabilities
|
|
||||||
message PartitionEventQuery {
|
|
||||||
repeated PartitionRef partition_refs = 1;
|
|
||||||
repeated PartitionStatus statuses = 2;
|
|
||||||
repeated string job_graph_run_ids = 3;
|
|
||||||
TimeRange time_range = 4;
|
|
||||||
int32 limit = 5;
|
|
||||||
int32 offset = 6;
|
|
||||||
OrderBy order_by = 7;
|
|
||||||
}
|
|
||||||
|
|
||||||
message TimeRange {
|
|
||||||
int64 start = 1;
|
|
||||||
int64 end = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message OrderBy {
|
|
||||||
string field = 1;
|
|
||||||
bool ascending = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stream filtering
|
|
||||||
message EventStreamFilter {
|
|
||||||
repeated PartitionRef partition_refs = 1;
|
|
||||||
repeated PartitionStatus statuses = 2;
|
|
||||||
int64 since = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Coordination support
|
|
||||||
message ActiveBuild {
|
|
||||||
PartitionRef partition_ref = 1;
|
|
||||||
string job_graph_run_id = 2;
|
|
||||||
PartitionStatus status = 3;
|
|
||||||
repeated string requesting_clients = 4;
|
|
||||||
int64 started_at = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
message DependencyGraph {
|
|
||||||
PartitionRef root = 1;
|
|
||||||
repeated DependencyNode nodes = 2;
|
|
||||||
repeated DependencyEdge edges = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
message DependencyNode {
|
|
||||||
PartitionRef partition_ref = 1;
|
|
||||||
PartitionStatus status = 2;
|
|
||||||
int64 last_updated = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
message DependencyEdge {
|
|
||||||
PartitionRef source = 1;
|
|
||||||
PartitionRef target = 2;
|
|
||||||
DependencyType type = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum DependencyType {
|
|
||||||
DEPENDS_ON = 0;
|
|
||||||
PRODUCES = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Request/Response message types
|
|
||||||
message AppendPartitionEventRequest {
|
|
||||||
PartitionEvent event = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message AppendPartitionEventResponse {
|
|
||||||
string event_id = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GetLatestPartitionEventRequest {
|
|
||||||
PartitionRef partition_ref = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GetLatestPartitionEventResponse {
|
|
||||||
PartitionEvent event = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message QueryPartitionEventsRequest {
|
|
||||||
PartitionEventQuery query = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message QueryPartitionEventsResponse {
|
|
||||||
repeated PartitionEvent events = 1;
|
|
||||||
int64 total_count = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StreamPartitionEventsRequest {
|
|
||||||
EventStreamFilter filter = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message RequestPartitionRequest {
|
|
||||||
PartitionRef partition_ref = 1;
|
|
||||||
string client_id = 2;
|
|
||||||
RequestOptions options = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
message RequestOptions {
|
|
||||||
bool allow_delegation = 1;
|
|
||||||
int32 timeout_seconds = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message RequestPartitionResponse {
|
|
||||||
RequestResult result = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message RequestResult {
|
|
||||||
oneof result {
|
|
||||||
PartitionLocation available = 1;
|
|
||||||
DelegationToken delegated = 2;
|
|
||||||
BuildToken building = 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
message PartitionLocation {
|
|
||||||
PartitionRef partition_ref = 1;
|
|
||||||
PartitionManifest manifest = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message DelegationToken {
|
|
||||||
string job_graph_run_id = 1;
|
|
||||||
int64 estimated_completion = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message BuildToken {
|
|
||||||
string job_graph_run_id = 1;
|
|
||||||
int64 started_at = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GetActiveBuildStatusRequest {
|
|
||||||
repeated PartitionRef partition_refs = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GetActiveBuildStatusResponse {
|
|
||||||
repeated ActiveBuild active_builds = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GetDependencyGraphRequest {
|
|
||||||
PartitionRef partition_ref = 1;
|
|
||||||
int32 depth = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GetDependencyGraphResponse {
|
|
||||||
DependencyGraph graph = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Catalog
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
//
|
|
||||||
// Job Run Events
|
|
||||||
//
|
|
||||||
|
|
||||||
// The complete lifecycle that each job goes through
|
|
||||||
enum JobRunStatus {
|
|
||||||
// Initial state
|
|
||||||
NOT_SCHEDULED = 0;
|
|
||||||
// Job preconditions met, job scheduled
|
|
||||||
JOB_SCHEDULED = 1;
|
|
||||||
// Job execution has begun
|
|
||||||
JOB_RUNNING = 2;
|
|
||||||
// Job execution finished successfully
|
|
||||||
JOB_SUCCESS = 3;
|
|
||||||
// Job execution failed
|
|
||||||
JOB_FAILED = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
message JobConfigEvent { JobConfig config = 1; }
|
|
||||||
message JobLogsEvent { repeated string logs = 1; }
|
|
||||||
|
|
||||||
//
|
|
||||||
message JobEvent {
|
|
||||||
// IDs
|
|
||||||
// Unique ID for this job graph run
|
|
||||||
string job_graph_run_id = 1;
|
|
||||||
// Unique ID for this job run
|
|
||||||
string job_run_id = 2;
|
|
||||||
// Unique ID for this event
|
|
||||||
string job_graph_run_event_id = 3;
|
|
||||||
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
|
|
||||||
int64 sequence = 4;
|
|
||||||
|
|
||||||
// Metadata
|
|
||||||
// Status of the job run as of this event
|
|
||||||
JobRunStatus status = 10;
|
|
||||||
// Hash key of (label, outputs) to associate different runs over time
|
|
||||||
string job_key = 11;
|
|
||||||
JobLabel label = 12;
|
|
||||||
repeated PartitionRef outputs = 13;
|
|
||||||
|
|
||||||
// Sum type of the below events
|
|
||||||
JobConfigEvent config = 20;
|
|
||||||
JobLogsEvent logs = 21;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Job Graph Run Events
|
|
||||||
//
|
|
||||||
|
|
||||||
message GraphAnalyzeEvent { JobGraph graph = 1; }
|
|
||||||
|
|
||||||
// Represents a change in status for a datadep
|
|
||||||
message GraphDataDepEvent {}
|
|
||||||
|
|
||||||
// The complete lifecycle that each job graph run goes through
|
|
||||||
enum JobGraphRunStatus {
|
|
||||||
// Initial state - graph will be analyzed before executing
|
|
||||||
GRAPH_STARTED = 0;
|
|
||||||
// Analysis completed
|
|
||||||
GRAPH_ANALYZED = 1;
|
|
||||||
// Graph is in this state until deps are satisfied for at least 1 job
|
|
||||||
AWAITING_DEPS = 2;
|
|
||||||
// Graph is executing at least 1 job (if this drops to 0, state goes back to AWAITING_DEPS)
|
|
||||||
GRAPH_RUNNING = 3;
|
|
||||||
// Graph execution finished successfully
|
|
||||||
GRAPH_SUCCESS = 4;
|
|
||||||
// Graph execution failed
|
|
||||||
GRAPH_FAILED = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GraphEvent {
|
|
||||||
// IDs
|
|
||||||
// Unique ID for this job graph run
|
|
||||||
string job_graph_run_id = 1;
|
|
||||||
// Unique ID for this event
|
|
||||||
string job_graph_run_event_id = 2;
|
|
||||||
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
|
|
||||||
int64 sequence = 3;
|
|
||||||
|
|
||||||
// Metadata
|
|
||||||
// Current status of the job graph run as of this event
|
|
||||||
JobGraphRunStatus status = 10;
|
|
||||||
GraphLabel label = 11;
|
|
||||||
|
|
||||||
// Sum type of below events
|
|
||||||
GraphAnalyzeEvent analysis = 20;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The sequence of events that completely describes progress of the job graph build
|
|
||||||
message JobGraphRunEvent {
|
|
||||||
string job_graph_run_event_id = 1;
|
|
||||||
int64 timestamp = 2;
|
|
||||||
|
|
||||||
// Sum type for potential events
|
|
||||||
JobEvent job_event = 10;
|
|
||||||
GraphEvent graph_event = 11;
|
|
||||||
}
|
|
||||||
|
|
||||||
message JobGraphRun {
|
|
||||||
string job_graph_run_id = 1;
|
|
||||||
JobGraph graph = 2;
|
|
||||||
repeated JobGraphRunEvent events = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
message JobGraphRunQuery {
|
|
||||||
// TODO
|
|
||||||
}
|
|
||||||
|
|
||||||
message ListJobGraphRunsRequest { JobGraphRunQuery query = 1; }
|
|
||||||
message ListJobGraphRunsResponse { repeated JobGraphRun runs = 1; }
|
|
||||||
|
|
||||||
message PartitionManifestsQuery {
|
|
||||||
// TODO
|
|
||||||
}
|
|
||||||
|
|
||||||
message ListPartitionManifestsRequest { PartitionManifestsQuery query = 1; }
|
|
||||||
message ListPartitionManifestsResponse { repeated PartitionManifest manifests = 1; }
|
|
||||||
|
|
||||||
service Catalog {
|
|
||||||
// JTBDs
|
|
||||||
// -
|
|
||||||
|
|
||||||
// Enables lookup of job graph runs (current or past)
|
|
||||||
rpc ListJobGraphRuns(ListJobGraphRunsRequest) returns (ListJobGraphRunsResponse);
|
|
||||||
|
|
||||||
// Enables lookup of partition manifests produced as part of prior job runs
|
|
||||||
rpc ListPartitions(ListPartitionManifestsRequest) returns (ListPartitionManifestsResponse);
|
|
||||||
|
|
||||||
// Partition activity log methods
|
|
||||||
rpc AppendPartitionEvent(AppendPartitionEventRequest) returns (AppendPartitionEventResponse);
|
|
||||||
rpc GetLatestPartitionEvent(GetLatestPartitionEventRequest) returns (GetLatestPartitionEventResponse);
|
|
||||||
rpc QueryPartitionEvents(QueryPartitionEventsRequest) returns (QueryPartitionEventsResponse);
|
|
||||||
rpc StreamPartitionEvents(StreamPartitionEventsRequest) returns (stream PartitionEvent);
|
|
||||||
|
|
||||||
// Coordination methods
|
|
||||||
rpc RequestPartition(RequestPartitionRequest) returns (RequestPartitionResponse);
|
|
||||||
rpc GetActiveBuildStatus(GetActiveBuildStatusRequest) returns (GetActiveBuildStatusResponse);
|
|
||||||
rpc GetDependencyGraph(GetDependencyGraphRequest) returns (GetDependencyGraphResponse);
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,8 @@ message BuildEvent {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Build events are practically job events, as they are the unit of work, but they also represent progress towards building specific partitions and their downstreams. One build request ID represents the literal request to the service (potentially accepting a provided build request ID). The expectation is that most build requests involve multiple partitions, and we should be able to see the tree structure over time to see jobs succeeding and progress towards the requested partition being built. Individual job runs should have their own ID allowing them to be referenced later.
|
||||||
|
|
||||||
TODO narrative
|
TODO narrative
|
||||||
|
|
||||||
## 2. Persistence
|
## 2. Persistence
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue