308 lines
8.2 KiB
Protocol Buffer
308 lines
8.2 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
import "google/protobuf/timestamp.proto";
|
|
import "google/protobuf/struct.proto";
|
|
|
|
package databuild.v1;
|
|
|
|
message PartitionRef {
|
|
string str = 1;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Jobs
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// Job Config
|
|
//
|
|
|
|
// The type of dependency
|
|
enum DepType {
|
|
QUERY = 0; // Default
|
|
MATERIALIZE = 1;
|
|
}
|
|
|
|
// Represents a data dependency
|
|
message DataDep {
|
|
DepType dep_type = 1;
|
|
PartitionRef partition_ref = 2;
|
|
}
|
|
|
|
// Configuration for a job
|
|
message JobConfig {
|
|
// The partitions that this parameterization produces
|
|
repeated PartitionRef outputs = 1;
|
|
|
|
// Required data dependencies
|
|
repeated DataDep inputs = 2;
|
|
|
|
// Command line arguments
|
|
repeated string args = 3;
|
|
|
|
// Environment variables
|
|
map<string, string> env = 4;
|
|
}
|
|
|
|
// Request message for job configuration service
|
|
message JobConfigureRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
// Response message for job configuration service
|
|
message JobConfigureResponse { repeated JobConfig configs = 1; }
|
|
|
|
// Implemented by the job.cfg bazel rule
|
|
service JobConfigure {
|
|
rpc Configure(JobConfigureRequest) returns (JobConfigureResponse);
|
|
}
|
|
|
|
//
|
|
// Job Exec
|
|
//
|
|
|
|
// Manifest that records the literal partitions consumed (and their manifests) in order to
|
|
// produce the specified partitions
|
|
message PartitionManifest {
|
|
// The refs of the partitions produced by this job
|
|
repeated PartitionRef outputs = 1;
|
|
|
|
// Input partition manifests
|
|
repeated PartitionManifest inputs = 2;
|
|
|
|
// Start time of job execution (Unix timestamp)
|
|
google.protobuf.Timestamp start_time = 3;
|
|
|
|
// End time of job execution (Unix timestamp)
|
|
google.protobuf.Timestamp end_time = 4;
|
|
|
|
// The configuration used to run the job
|
|
Task task = 5;
|
|
}
|
|
|
|
message JobExecuteRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
// Metadata for the complete set of partitions produced by this job
|
|
message JobExecuteResponse { repeated PartitionManifest manifests = 1; }
|
|
|
|
// Implemented by the job.exec bazel rule
|
|
service JobExecute {
|
|
rpc Execute(JobExecuteRequest) returns (JobExecuteResponse);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Graphs
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// GraphLookup
|
|
//
|
|
|
|
message JobLabel {
|
|
// The bazel label the references the job_target
|
|
string label = 1;
|
|
}
|
|
|
|
message GraphLookupRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
// Represents a not-yet configured task
|
|
message TaskRef {
|
|
// The job whose configure/exec targets will be used
|
|
JobLabel job = 1;
|
|
|
|
// The partition refs this task is responsible for producing, and with which the configure
|
|
// target will be invoked
|
|
repeated PartitionRef outputs = 2;
|
|
}
|
|
|
|
// Represents the complete set of tasks needed to produce the requested partitions
|
|
message GraphLookupResponse { repeated TaskRef task_refs = 1; }
|
|
|
|
// Implemented per graph
|
|
service GraphLookup {
|
|
rpc Lookup(GraphLookupRequest) returns (GraphLookupResponse);
|
|
}
|
|
|
|
// Request message for graph analyze service
|
|
message GraphAnalyzeRequest { repeated PartitionRef outputs = 1; }
|
|
|
|
//
|
|
// JobGraph
|
|
//
|
|
|
|
message Task {
|
|
// The bazel label uniquely identifying the job
|
|
JobLabel job = 1;
|
|
|
|
// The configuration for the job
|
|
JobConfig config = 2;
|
|
}
|
|
|
|
// The bazel label referencing the graph
|
|
message GraphLabel { string label = 1; }
|
|
|
|
// Represents a job graph
|
|
message JobGraph {
|
|
// The bazel label of the graph to be executed
|
|
GraphLabel label = 1;
|
|
|
|
// The output partitions to be produced by this graph
|
|
repeated PartitionRef outputs = 2;
|
|
|
|
// The job configurations that make up this graph
|
|
repeated Task nodes = 3;
|
|
}
|
|
|
|
// Response message for graph analyze service
|
|
message GraphAnalyzeResponse { JobGraph graph = 1; }
|
|
|
|
message GraphExecuteResponse { repeated PartitionManifest manifests = 1; }
|
|
message GraphBuildRequest { repeated PartitionRef outputs = 1; }
|
|
message GraphBuildResponse { repeated PartitionManifest manifests = 1; }
|
|
|
|
// Service for job configuration and graph analysis
|
|
service DataBuildService {
|
|
// // Get job configurations for the specified output references
|
|
// rpc GetJobConfigs(JobConfigureRequest) returns (JobConfigureResponse) {}
|
|
|
|
// Analyze and get the job graph for the specified output references
|
|
rpc AnalyzeGraph(GraphAnalyzeRequest) returns (GraphAnalyzeResponse);
|
|
|
|
// Execute the specified job graph (implemented by databuild)
|
|
rpc Execute(JobGraph) returns (GraphExecuteResponse);
|
|
|
|
// User-facing: build the desired partitions
|
|
rpc Build(GraphBuildRequest) returns (GraphBuildResponse);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Catalog
|
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// Job Run Events
|
|
//
|
|
|
|
// The complete lifecycle that each job goes through
|
|
enum JobRunStatus {
|
|
// Initial state
|
|
NOT_SCHEDULED = 0;
|
|
// Job preconditions met, job scheduled
|
|
JOB_SCHEDULED = 1;
|
|
// Job execution has begun
|
|
JOB_RUNNING = 2;
|
|
// Job execution finished successfully
|
|
JOB_SUCCESS = 3;
|
|
// Job execution failed
|
|
JOB_FAILED = 4;
|
|
}
|
|
|
|
message JobConfigEvent { JobConfig config = 1; }
|
|
message JobLogsEvent { repeated string logs = 1; }
|
|
|
|
//
|
|
message JobEvent {
|
|
// IDs
|
|
// Unique ID for this job graph run
|
|
string job_graph_run_id = 1;
|
|
// Unique ID for this job run
|
|
string job_run_id = 2;
|
|
// Unique ID for this event
|
|
string job_graph_run_event_id = 3;
|
|
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
|
|
int64 sequence = 4;
|
|
|
|
// Metadata
|
|
// Status of the job run as of this event
|
|
JobRunStatus status = 10;
|
|
// Hash key of (label, outputs) to associate different runs over time
|
|
string job_key = 11;
|
|
JobLabel label = 12;
|
|
repeated PartitionRef outputs = 13;
|
|
|
|
// Sum type of the below events
|
|
JobConfigEvent config = 20;
|
|
JobLogsEvent logs = 21;
|
|
}
|
|
|
|
//
|
|
// Job Graph Run Events
|
|
//
|
|
|
|
message GraphAnalyzeEvent { JobGraph graph = 1; }
|
|
|
|
// Represents a change in status for a datadep
|
|
message GraphDataDepEvent {}
|
|
|
|
// The complete lifecycle that each job graph run goes through
|
|
enum JobGraphRunStatus {
|
|
// Initial state - graph will be analyzed before executing
|
|
GRAPH_STARTED = 0;
|
|
// Analysis completed
|
|
GRAPH_ANALYZED = 1;
|
|
// Graph is in this state until deps are satisfied for at least 1 job
|
|
AWAITING_DEPS = 2;
|
|
// Graph is executing at least 1 job (if this drops to 0, state goes back to AWAITING_DEPS)
|
|
GRAPH_RUNNING = 3;
|
|
// Graph execution finished successfully
|
|
GRAPH_SUCCESS = 4;
|
|
// Graph execution failed
|
|
GRAPH_FAILED = 5;
|
|
}
|
|
|
|
message GraphEvent {
|
|
// IDs
|
|
// Unique ID for this job graph run
|
|
string job_graph_run_id = 1;
|
|
// Unique ID for this event
|
|
string job_graph_run_event_id = 2;
|
|
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
|
|
int64 sequence = 3;
|
|
|
|
// Metadata
|
|
// Current status of the job graph run as of this event
|
|
JobGraphRunStatus status = 10;
|
|
GraphLabel label = 11;
|
|
|
|
// Sum type of below events
|
|
GraphAnalyzeEvent analysis = 20;
|
|
}
|
|
|
|
// The sequence of events that completely describes progress of the job graph build
|
|
message JobGraphRunEvent {
|
|
string job_graph_run_event_id = 1;
|
|
google.protobuf.Timestamp timestamp = 2;
|
|
|
|
// Sum type for potential events
|
|
JobEvent job_event = 10;
|
|
GraphEvent graph_event = 11;
|
|
}
|
|
|
|
message JobGraphRun {
|
|
string job_graph_run_id = 1;
|
|
JobGraph graph = 2;
|
|
repeated JobGraphRunEvent events = 3;
|
|
}
|
|
|
|
message JobGraphRunQuery {
|
|
// TODO
|
|
}
|
|
|
|
message ListJobGraphRunsRequest { JobGraphRunQuery query = 1; }
|
|
message ListJobGraphRunsResponse { repeated JobGraphRun runs = 1; }
|
|
|
|
message PartitionManifestsQuery {
|
|
// TODO
|
|
}
|
|
|
|
message ListPartitionManifestsRequest { PartitionManifestsQuery query = 1; }
|
|
message ListPartitionManifestsResponse { repeated PartitionManifest manifests = 1; }
|
|
|
|
service Catalog {
|
|
// JTBDs
|
|
// -
|
|
|
|
// Enables lookup of job graph runs (current or past)
|
|
rpc ListJobGraphRuns(ListJobGraphRunsRequest) returns (ListJobGraphRunsResponse);
|
|
|
|
// Enables lookup of partition manifests produced as part of prior job runs
|
|
rpc ListPartitions(ListPartitionManifestsRequest) returns (ListPartitionManifestsResponse);
|
|
}
|