Planning stage 1.1

This commit is contained in:
Stuart Axelbrooke 2025-07-05 14:46:35 -07:00
parent 7785af3a69
commit 21f457c4f7
2 changed files with 2 additions and 338 deletions

View file

@ -169,341 +169,3 @@ service DataBuildService {
// User-facing: build the desired partitions
rpc Build(GraphBuildRequest) returns (GraphBuildResponse);
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Partition Activity Log
///////////////////////////////////////////////////////////////////////////////////////////////
// Partition lifecycle states
enum PartitionStatus {
PARTITION_UNKNOWN = 0;
PARTITION_REQUESTED = 1; // Partition requested but not yet scheduled
PARTITION_SCHEDULED = 2; // Job scheduled to produce this partition
PARTITION_BUILDING = 3; // Job actively building this partition
PARTITION_AVAILABLE = 4; // Partition successfully built and available
PARTITION_FAILED = 5; // Partition build failed
PARTITION_STALE = 6; // Partition exists but upstream dependencies changed
PARTITION_DELEGATED = 7; // Request delegated to existing build
}
// Individual partition activity event
message PartitionEvent {
// Event identity
string partition_event_id = 1;
int64 timestamp = 2;
// Partition identification
PartitionRef partition_ref = 3;
PartitionStatus status = 4;
// Build context
string job_graph_run_id = 5; // Links to graph execution
string job_run_id = 6; // Links to specific job run
JobLabel producing_job = 7; // Which job produces this partition
// Coordination metadata
repeated string requesting_clients = 8; // Who requested this partition
string delegated_to_run_id = 9; // If delegated, which run
// Dependencies
repeated PartitionRef upstream_deps = 10;
repeated PartitionRef downstream_deps = 11;
// Data about the partition
PartitionManifest manifest = 12; // Present when status = AVAILABLE
string failure_reason = 13; // Present when status = FAILED
// Storage metadata
string storage_backend = 14;
map<string, string> storage_metadata = 15;
}
// Query capabilities
message PartitionEventQuery {
repeated PartitionRef partition_refs = 1;
repeated PartitionStatus statuses = 2;
repeated string job_graph_run_ids = 3;
TimeRange time_range = 4;
int32 limit = 5;
int32 offset = 6;
OrderBy order_by = 7;
}
message TimeRange {
int64 start = 1;
int64 end = 2;
}
message OrderBy {
string field = 1;
bool ascending = 2;
}
// Stream filtering
message EventStreamFilter {
repeated PartitionRef partition_refs = 1;
repeated PartitionStatus statuses = 2;
int64 since = 3;
}
// Coordination support
message ActiveBuild {
PartitionRef partition_ref = 1;
string job_graph_run_id = 2;
PartitionStatus status = 3;
repeated string requesting_clients = 4;
int64 started_at = 5;
}
message DependencyGraph {
PartitionRef root = 1;
repeated DependencyNode nodes = 2;
repeated DependencyEdge edges = 3;
}
message DependencyNode {
PartitionRef partition_ref = 1;
PartitionStatus status = 2;
int64 last_updated = 3;
}
message DependencyEdge {
PartitionRef source = 1;
PartitionRef target = 2;
DependencyType type = 3;
}
enum DependencyType {
DEPENDS_ON = 0;
PRODUCES = 1;
}
// Request/Response message types
message AppendPartitionEventRequest {
PartitionEvent event = 1;
}
message AppendPartitionEventResponse {
string event_id = 1;
}
message GetLatestPartitionEventRequest {
PartitionRef partition_ref = 1;
}
message GetLatestPartitionEventResponse {
PartitionEvent event = 1;
}
message QueryPartitionEventsRequest {
PartitionEventQuery query = 1;
}
message QueryPartitionEventsResponse {
repeated PartitionEvent events = 1;
int64 total_count = 2;
}
message StreamPartitionEventsRequest {
EventStreamFilter filter = 1;
}
message RequestPartitionRequest {
PartitionRef partition_ref = 1;
string client_id = 2;
RequestOptions options = 3;
}
message RequestOptions {
bool allow_delegation = 1;
int32 timeout_seconds = 2;
}
message RequestPartitionResponse {
RequestResult result = 1;
}
message RequestResult {
oneof result {
PartitionLocation available = 1;
DelegationToken delegated = 2;
BuildToken building = 3;
}
}
message PartitionLocation {
PartitionRef partition_ref = 1;
PartitionManifest manifest = 2;
}
message DelegationToken {
string job_graph_run_id = 1;
int64 estimated_completion = 2;
}
message BuildToken {
string job_graph_run_id = 1;
int64 started_at = 2;
}
message GetActiveBuildStatusRequest {
repeated PartitionRef partition_refs = 1;
}
message GetActiveBuildStatusResponse {
repeated ActiveBuild active_builds = 1;
}
message GetDependencyGraphRequest {
PartitionRef partition_ref = 1;
int32 depth = 2;
}
message GetDependencyGraphResponse {
DependencyGraph graph = 1;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Catalog
///////////////////////////////////////////////////////////////////////////////////////////////
//
// Job Run Events
//
// The complete lifecycle that each job goes through
enum JobRunStatus {
// Initial state
NOT_SCHEDULED = 0;
// Job preconditions met, job scheduled
JOB_SCHEDULED = 1;
// Job execution has begun
JOB_RUNNING = 2;
// Job execution finished successfully
JOB_SUCCESS = 3;
// Job execution failed
JOB_FAILED = 4;
}
message JobConfigEvent { JobConfig config = 1; }
message JobLogsEvent { repeated string logs = 1; }
//
message JobEvent {
// IDs
// Unique ID for this job graph run
string job_graph_run_id = 1;
// Unique ID for this job run
string job_run_id = 2;
// Unique ID for this event
string job_graph_run_event_id = 3;
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
int64 sequence = 4;
// Metadata
// Status of the job run as of this event
JobRunStatus status = 10;
// Hash key of (label, outputs) to associate different runs over time
string job_key = 11;
JobLabel label = 12;
repeated PartitionRef outputs = 13;
// Sum type of the below events
JobConfigEvent config = 20;
JobLogsEvent logs = 21;
}
//
// Job Graph Run Events
//
message GraphAnalyzeEvent { JobGraph graph = 1; }
// Represents a change in status for a datadep
message GraphDataDepEvent {}
// The complete lifecycle that each job graph run goes through
enum JobGraphRunStatus {
// Initial state - graph will be analyzed before executing
GRAPH_STARTED = 0;
// Analysis completed
GRAPH_ANALYZED = 1;
// Graph is in this state until deps are satisfied for at least 1 job
AWAITING_DEPS = 2;
// Graph is executing at least 1 job (if this drops to 0, state goes back to AWAITING_DEPS)
GRAPH_RUNNING = 3;
// Graph execution finished successfully
GRAPH_SUCCESS = 4;
// Graph execution failed
GRAPH_FAILED = 5;
}
message GraphEvent {
// IDs
// Unique ID for this job graph run
string job_graph_run_id = 1;
// Unique ID for this event
string job_graph_run_event_id = 2;
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
int64 sequence = 3;
// Metadata
// Current status of the job graph run as of this event
JobGraphRunStatus status = 10;
GraphLabel label = 11;
// Sum type of below events
GraphAnalyzeEvent analysis = 20;
}
// The sequence of events that completely describes progress of the job graph build
message JobGraphRunEvent {
string job_graph_run_event_id = 1;
int64 timestamp = 2;
// Sum type for potential events
JobEvent job_event = 10;
GraphEvent graph_event = 11;
}
message JobGraphRun {
string job_graph_run_id = 1;
JobGraph graph = 2;
repeated JobGraphRunEvent events = 3;
}
message JobGraphRunQuery {
// TODO
}
message ListJobGraphRunsRequest { JobGraphRunQuery query = 1; }
message ListJobGraphRunsResponse { repeated JobGraphRun runs = 1; }
message PartitionManifestsQuery {
// TODO
}
message ListPartitionManifestsRequest { PartitionManifestsQuery query = 1; }
message ListPartitionManifestsResponse { repeated PartitionManifest manifests = 1; }
service Catalog {
// JTBDs
// -
// Enables lookup of job graph runs (current or past)
rpc ListJobGraphRuns(ListJobGraphRunsRequest) returns (ListJobGraphRunsResponse);
// Enables lookup of partition manifests produced as part of prior job runs
rpc ListPartitions(ListPartitionManifestsRequest) returns (ListPartitionManifestsResponse);
// Partition activity log methods
rpc AppendPartitionEvent(AppendPartitionEventRequest) returns (AppendPartitionEventResponse);
rpc GetLatestPartitionEvent(GetLatestPartitionEventRequest) returns (GetLatestPartitionEventResponse);
rpc QueryPartitionEvents(QueryPartitionEventsRequest) returns (QueryPartitionEventsResponse);
rpc StreamPartitionEvents(StreamPartitionEventsRequest) returns (stream PartitionEvent);
// Coordination methods
rpc RequestPartition(RequestPartitionRequest) returns (RequestPartitionResponse);
rpc GetActiveBuildStatus(GetActiveBuildStatusRequest) returns (GetActiveBuildStatusResponse);
rpc GetDependencyGraph(GetDependencyGraphRequest) returns (GetDependencyGraphResponse);
}

View file

@ -28,6 +28,8 @@ message BuildEvent {
}
```
Build events are practically job events, as they are the unit of work, but they also represent progress towards building specific partitions and their downstreams. One build request ID represents the literal request to the service (potentially accepting a provided build request ID). The expectation is that most build requests involve multiple partitions, and we should be able to see the tree structure over time to see jobs succeeding and progress towards the requested partition being built. Individual job runs should have their own ID allowing them to be referenced later.
TODO narrative
## 2. Persistence