512 lines
14 KiB
Protocol Buffer
512 lines
14 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
import "google/protobuf/timestamp.proto";
|
|
import "google/protobuf/struct.proto";
|
|
|
|
package databuild.v1;
|
|
|
|
message PartitionRef {
|
|
string str = 1;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Jobs
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// Job Config
|
|
//
|
|
|
|
// The type of dependency
|
|
enum DepType {
|
|
QUERY = 0; // Default
|
|
MATERIALIZE = 1;
|
|
}
|
|
|
|
// Represents a data dependency
|
|
message DataDep {
|
|
DepType dep_type = 1;
|
|
PartitionRef partition_ref = 2;
|
|
}
|
|
|
|
// Configuration for a job
|
|
message JobConfig {
|
|
// The partitions that this parameterization produces
|
|
repeated PartitionRef outputs = 1;
|
|
|
|
// Required data dependencies
|
|
repeated DataDep inputs = 2;
|
|
|
|
// Command line arguments
|
|
repeated string args = 3;
|
|
|
|
// Environment variables
|
|
map<string, string> env = 4;
|
|
}
|
|
|
|
// Request message for job configuration service
|
|
message JobConfigureRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
// Response message for job configuration service
|
|
message JobConfigureResponse { repeated JobConfig configs = 1; }
|
|
|
|
// Implemented by the job.cfg bazel rule
|
|
service JobConfigure {
|
|
rpc Configure(JobConfigureRequest) returns (JobConfigureResponse);
|
|
}
|
|
|
|
//
|
|
// Job Exec
|
|
//
|
|
|
|
// Manifest that records the literal partitions consumed (and their manifests) in order to
|
|
// produce the specified partitions
|
|
message PartitionManifest {
|
|
// The refs of the partitions produced by this job
|
|
repeated PartitionRef outputs = 1;
|
|
|
|
// Input partition manifests
|
|
repeated PartitionManifest inputs = 2;
|
|
|
|
// Start time of job execution (Unix timestamp)
|
|
google.protobuf.Timestamp start_time = 3;
|
|
|
|
// End time of job execution (Unix timestamp)
|
|
google.protobuf.Timestamp end_time = 4;
|
|
|
|
// The configuration used to run the job
|
|
Task task = 5;
|
|
}
|
|
|
|
message JobExecuteRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
// Metadata for the complete set of partitions produced by this job
|
|
message JobExecuteResponse { repeated PartitionManifest manifests = 1; }
|
|
|
|
// Implemented by the job.exec bazel rule
|
|
service JobExecute {
|
|
rpc Execute(JobExecuteRequest) returns (JobExecuteResponse);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Graphs
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// GraphLookup
|
|
//
|
|
|
|
message JobLabel {
|
|
// The bazel label the references the job_target
|
|
string label = 1;
|
|
}
|
|
|
|
message GraphLookupRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
// Represents a not-yet configured task
|
|
message TaskRef {
|
|
// The job whose configure/exec targets will be used
|
|
JobLabel job = 1;
|
|
|
|
// The partition refs this task is responsible for producing, and with which the configure
|
|
// target will be invoked
|
|
repeated PartitionRef outputs = 2;
|
|
}
|
|
|
|
// Represents the complete set of tasks needed to produce the requested partitions
|
|
message GraphLookupResponse { repeated TaskRef task_refs = 1; }
|
|
|
|
// Implemented per graph
|
|
service GraphLookup {
|
|
rpc Lookup(GraphLookupRequest) returns (GraphLookupResponse);
|
|
}
|
|
|
|
// Request message for graph analyze service
|
|
message GraphAnalyzeRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
//
|
|
// JobGraph
|
|
//
|
|
|
|
message Task {
|
|
// The bazel label uniquely identifying the job
|
|
JobLabel job = 1;
|
|
|
|
// The configuration for the job
|
|
JobConfig config = 2;
|
|
}
|
|
|
|
// The bazel label referencing the graph
|
|
message GraphLabel { string label = 1; }
|
|
|
|
// Represents a job graph
|
|
message JobGraph {
|
|
// The bazel label of the graph to be executed
|
|
GraphLabel label = 1;
|
|
|
|
// The output partitions to be produced by this graph
|
|
repeated PartitionRef outputs = 2;
|
|
|
|
// The job configurations that make up this graph
|
|
repeated Task nodes = 3;
|
|
}
|
|
|
|
// Response message for graph analyze service
|
|
message GraphAnalyzeResponse { JobGraph graph = 1; }
|
|
|
|
message GraphExecuteResponse { repeated PartitionManifest manifests = 1; }
|
|
message GraphBuildRequest { repeated PartitionRef outputs = 1; }
|
|
message GraphBuildResponse { repeated PartitionManifest manifests = 1; }
|
|
|
|
// Service for job configuration and graph analysis
|
|
service DataBuildService {
|
|
// // Get job configurations for the specified output references
|
|
// rpc GetJobConfigs(JobConfigureRequest) returns (JobConfigureResponse) {}
|
|
|
|
// Analyze and get the job graph for the specified output references
|
|
rpc AnalyzeGraph(GraphAnalyzeRequest) returns (GraphAnalyzeResponse);
|
|
|
|
// Execute the specified job graph (implemented by databuild)
|
|
rpc Execute(JobGraph) returns (GraphExecuteResponse);
|
|
|
|
// User-facing: build the desired partitions
|
|
rpc Build(GraphBuildRequest) returns (GraphBuildResponse);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Partition Activity Log
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Partition lifecycle states
|
|
enum PartitionStatus {
|
|
PARTITION_UNKNOWN = 0;
|
|
PARTITION_REQUESTED = 1; // Partition requested but not yet scheduled
|
|
PARTITION_SCHEDULED = 2; // Job scheduled to produce this partition
|
|
PARTITION_BUILDING = 3; // Job actively building this partition
|
|
PARTITION_AVAILABLE = 4; // Partition successfully built and available
|
|
PARTITION_FAILED = 5; // Partition build failed
|
|
PARTITION_STALE = 6; // Partition exists but upstream dependencies changed
|
|
PARTITION_DELEGATED = 7; // Request delegated to existing build
|
|
}
|
|
|
|
// Individual partition activity event
|
|
message PartitionEvent {
|
|
// Event identity
|
|
string partition_event_id = 1;
|
|
google.protobuf.Timestamp timestamp = 2;
|
|
|
|
// Partition identification
|
|
PartitionRef partition_ref = 3;
|
|
PartitionStatus status = 4;
|
|
|
|
// Build context
|
|
string job_graph_run_id = 5; // Links to graph execution
|
|
string job_run_id = 6; // Links to specific job run
|
|
JobLabel producing_job = 7; // Which job produces this partition
|
|
|
|
// Coordination metadata
|
|
repeated string requesting_clients = 8; // Who requested this partition
|
|
string delegated_to_run_id = 9; // If delegated, which run
|
|
|
|
// Dependencies
|
|
repeated PartitionRef upstream_deps = 10;
|
|
repeated PartitionRef downstream_deps = 11;
|
|
|
|
// Data about the partition
|
|
PartitionManifest manifest = 12; // Present when status = AVAILABLE
|
|
string failure_reason = 13; // Present when status = FAILED
|
|
|
|
// Storage metadata
|
|
string storage_backend = 14;
|
|
map<string, string> storage_metadata = 15;
|
|
}
|
|
|
|
// Query capabilities
|
|
message PartitionEventQuery {
|
|
repeated PartitionRef partition_refs = 1;
|
|
repeated PartitionStatus statuses = 2;
|
|
repeated string job_graph_run_ids = 3;
|
|
TimeRange time_range = 4;
|
|
int32 limit = 5;
|
|
int32 offset = 6;
|
|
OrderBy order_by = 7;
|
|
}
|
|
|
|
message TimeRange {
|
|
google.protobuf.Timestamp start = 1;
|
|
google.protobuf.Timestamp end = 2;
|
|
}
|
|
|
|
message OrderBy {
|
|
string field = 1;
|
|
bool ascending = 2;
|
|
}
|
|
|
|
// Stream filtering
|
|
message EventStreamFilter {
|
|
repeated PartitionRef partition_refs = 1;
|
|
repeated PartitionStatus statuses = 2;
|
|
google.protobuf.Timestamp since = 3;
|
|
}
|
|
|
|
// Coordination support
|
|
message ActiveBuild {
|
|
PartitionRef partition_ref = 1;
|
|
string job_graph_run_id = 2;
|
|
PartitionStatus status = 3;
|
|
repeated string requesting_clients = 4;
|
|
google.protobuf.Timestamp started_at = 5;
|
|
}
|
|
|
|
message DependencyGraph {
|
|
PartitionRef root = 1;
|
|
repeated DependencyNode nodes = 2;
|
|
repeated DependencyEdge edges = 3;
|
|
}
|
|
|
|
message DependencyNode {
|
|
PartitionRef partition_ref = 1;
|
|
PartitionStatus status = 2;
|
|
google.protobuf.Timestamp last_updated = 3;
|
|
}
|
|
|
|
message DependencyEdge {
|
|
PartitionRef source = 1;
|
|
PartitionRef target = 2;
|
|
DependencyType type = 3;
|
|
}
|
|
|
|
enum DependencyType {
|
|
DEPENDS_ON = 0;
|
|
PRODUCES = 1;
|
|
}
|
|
|
|
// Request/Response message types
|
|
message AppendPartitionEventRequest {
|
|
PartitionEvent event = 1;
|
|
}
|
|
|
|
message AppendPartitionEventResponse {
|
|
string event_id = 1;
|
|
}
|
|
|
|
message GetLatestPartitionEventRequest {
|
|
PartitionRef partition_ref = 1;
|
|
}
|
|
|
|
message GetLatestPartitionEventResponse {
|
|
PartitionEvent event = 1;
|
|
}
|
|
|
|
message QueryPartitionEventsRequest {
|
|
PartitionEventQuery query = 1;
|
|
}
|
|
|
|
message QueryPartitionEventsResponse {
|
|
repeated PartitionEvent events = 1;
|
|
int64 total_count = 2;
|
|
}
|
|
|
|
message StreamPartitionEventsRequest {
|
|
EventStreamFilter filter = 1;
|
|
}
|
|
|
|
message RequestPartitionRequest {
|
|
PartitionRef partition_ref = 1;
|
|
string client_id = 2;
|
|
RequestOptions options = 3;
|
|
}
|
|
|
|
message RequestOptions {
|
|
bool allow_delegation = 1;
|
|
int32 timeout_seconds = 2;
|
|
}
|
|
|
|
message RequestPartitionResponse {
|
|
RequestResult result = 1;
|
|
}
|
|
|
|
message RequestResult {
|
|
oneof result {
|
|
PartitionLocation available = 1;
|
|
DelegationToken delegated = 2;
|
|
BuildToken building = 3;
|
|
}
|
|
}
|
|
|
|
message PartitionLocation {
|
|
PartitionRef partition_ref = 1;
|
|
PartitionManifest manifest = 2;
|
|
}
|
|
|
|
message DelegationToken {
|
|
string job_graph_run_id = 1;
|
|
google.protobuf.Timestamp estimated_completion = 2;
|
|
}
|
|
|
|
message BuildToken {
|
|
string job_graph_run_id = 1;
|
|
google.protobuf.Timestamp started_at = 2;
|
|
}
|
|
|
|
message GetActiveBuildStatusRequest {
|
|
repeated PartitionRef partition_refs = 1;
|
|
}
|
|
|
|
message GetActiveBuildStatusResponse {
|
|
repeated ActiveBuild active_builds = 1;
|
|
}
|
|
|
|
message GetDependencyGraphRequest {
|
|
PartitionRef partition_ref = 1;
|
|
int32 depth = 2;
|
|
}
|
|
|
|
message GetDependencyGraphResponse {
|
|
DependencyGraph graph = 1;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Catalog
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// Job Run Events
|
|
//
|
|
|
|
// The complete lifecycle that each job goes through
|
|
enum JobRunStatus {
|
|
// Initial state
|
|
NOT_SCHEDULED = 0;
|
|
// Job preconditions met, job scheduled
|
|
JOB_SCHEDULED = 1;
|
|
// Job execution has begun
|
|
JOB_RUNNING = 2;
|
|
// Job execution finished successfully
|
|
JOB_SUCCESS = 3;
|
|
// Job execution failed
|
|
JOB_FAILED = 4;
|
|
}
|
|
|
|
message JobConfigEvent { JobConfig config = 1; }
|
|
message JobLogsEvent { repeated string logs = 1; }
|
|
|
|
//
|
|
message JobEvent {
|
|
// IDs
|
|
// Unique ID for this job graph run
|
|
string job_graph_run_id = 1;
|
|
// Unique ID for this job run
|
|
string job_run_id = 2;
|
|
// Unique ID for this event
|
|
string job_graph_run_event_id = 3;
|
|
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
|
|
int64 sequence = 4;
|
|
|
|
// Metadata
|
|
// Status of the job run as of this event
|
|
JobRunStatus status = 10;
|
|
// Hash key of (label, outputs) to associate different runs over time
|
|
string job_key = 11;
|
|
JobLabel label = 12;
|
|
repeated PartitionRef outputs = 13;
|
|
|
|
// Sum type of the below events
|
|
JobConfigEvent config = 20;
|
|
JobLogsEvent logs = 21;
|
|
}
|
|
|
|
//
|
|
// Job Graph Run Events
|
|
//
|
|
|
|
message GraphAnalyzeEvent { JobGraph graph = 1; }
|
|
|
|
// Represents a change in status for a datadep
|
|
message GraphDataDepEvent {}
|
|
|
|
// The complete lifecycle that each job graph run goes through
|
|
enum JobGraphRunStatus {
|
|
// Initial state - graph will be analyzed before executing
|
|
GRAPH_STARTED = 0;
|
|
// Analysis completed
|
|
GRAPH_ANALYZED = 1;
|
|
// Graph is in this state until deps are satisfied for at least 1 job
|
|
AWAITING_DEPS = 2;
|
|
// Graph is executing at least 1 job (if this drops to 0, state goes back to AWAITING_DEPS)
|
|
GRAPH_RUNNING = 3;
|
|
// Graph execution finished successfully
|
|
GRAPH_SUCCESS = 4;
|
|
// Graph execution failed
|
|
GRAPH_FAILED = 5;
|
|
}
|
|
|
|
message GraphEvent {
|
|
// IDs
|
|
// Unique ID for this job graph run
|
|
string job_graph_run_id = 1;
|
|
// Unique ID for this event
|
|
string job_graph_run_event_id = 2;
|
|
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
|
|
int64 sequence = 3;
|
|
|
|
// Metadata
|
|
// Current status of the job graph run as of this event
|
|
JobGraphRunStatus status = 10;
|
|
GraphLabel label = 11;
|
|
|
|
// Sum type of below events
|
|
GraphAnalyzeEvent analysis = 20;
|
|
}
|
|
|
|
// The sequence of events that completely describes progress of the job graph build
|
|
message JobGraphRunEvent {
|
|
string job_graph_run_event_id = 1;
|
|
google.protobuf.Timestamp timestamp = 2;
|
|
|
|
// Sum type for potential events
|
|
JobEvent job_event = 10;
|
|
GraphEvent graph_event = 11;
|
|
}
|
|
|
|
message JobGraphRun {
|
|
string job_graph_run_id = 1;
|
|
JobGraph graph = 2;
|
|
repeated JobGraphRunEvent events = 3;
|
|
}
|
|
|
|
message JobGraphRunQuery {
|
|
// TODO
|
|
}
|
|
|
|
message ListJobGraphRunsRequest { JobGraphRunQuery query = 1; }
|
|
message ListJobGraphRunsResponse { repeated JobGraphRun runs = 1; }
|
|
|
|
message PartitionManifestsQuery {
|
|
// TODO
|
|
}
|
|
|
|
message ListPartitionManifestsRequest { PartitionManifestsQuery query = 1; }
|
|
message ListPartitionManifestsResponse { repeated PartitionManifest manifests = 1; }
|
|
|
|
service Catalog {
|
|
// JTBDs
|
|
// -
|
|
|
|
// Enables lookup of job graph runs (current or past)
|
|
rpc ListJobGraphRuns(ListJobGraphRunsRequest) returns (ListJobGraphRunsResponse);
|
|
|
|
// Enables lookup of partition manifests produced as part of prior job runs
|
|
rpc ListPartitions(ListPartitionManifestsRequest) returns (ListPartitionManifestsResponse);
|
|
|
|
// Partition activity log methods
|
|
rpc AppendPartitionEvent(AppendPartitionEventRequest) returns (AppendPartitionEventResponse);
|
|
rpc GetLatestPartitionEvent(GetLatestPartitionEventRequest) returns (GetLatestPartitionEventResponse);
|
|
rpc QueryPartitionEvents(QueryPartitionEventsRequest) returns (QueryPartitionEventsResponse);
|
|
rpc StreamPartitionEvents(StreamPartitionEventsRequest) returns (stream PartitionEvent);
|
|
|
|
// Coordination methods
|
|
rpc RequestPartition(RequestPartitionRequest) returns (RequestPartitionResponse);
|
|
rpc GetActiveBuildStatus(GetActiveBuildStatusRequest) returns (GetActiveBuildStatusResponse);
|
|
rpc GetDependencyGraph(GetDependencyGraphRequest) returns (GetDependencyGraphResponse);
|
|
}
|