databuild/databuild/databuild.proto

512 lines
14 KiB
Protocol Buffer

syntax = "proto3";
import "google/protobuf/timestamp.proto";
import "google/protobuf/struct.proto";
package databuild.v1;
message PartitionRef {
string str = 1;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Jobs
///////////////////////////////////////////////////////////////////////////////////////////////
//
// Job Config
//
// The type of dependency
enum DepType {
QUERY = 0; // Default
MATERIALIZE = 1;
}
// Represents a data dependency
message DataDep {
DepType dep_type = 1;
PartitionRef partition_ref = 2;
}
// Configuration for a job
message JobConfig {
// The partitions that this parameterization produces
repeated PartitionRef outputs = 1;
// Required data dependencies
repeated DataDep inputs = 2;
// Command line arguments
repeated string args = 3;
// Environment variables
map<string, string> env = 4;
}
// Request message for job configuration service
message JobConfigureRequest { repeated PartitionRef outputs = 1; }
// Response message for job configuration service
message JobConfigureResponse { repeated JobConfig configs = 1; }
// Implemented by the job.cfg bazel rule
service JobConfigure {
rpc Configure(JobConfigureRequest) returns (JobConfigureResponse);
}
//
// Job Exec
//
// Manifest that records the literal partitions consumed (and their manifests) in order to
// produce the specified partitions
message PartitionManifest {
// The refs of the partitions produced by this job
repeated PartitionRef outputs = 1;
// Input partition manifests
repeated PartitionManifest inputs = 2;
// Start time of job execution (Unix timestamp)
google.protobuf.Timestamp start_time = 3;
// End time of job execution (Unix timestamp)
google.protobuf.Timestamp end_time = 4;
// The configuration used to run the job
Task task = 5;
}
message JobExecuteRequest { repeated PartitionRef outputs = 1; }
// Metadata for the complete set of partitions produced by this job
message JobExecuteResponse { repeated PartitionManifest manifests = 1; }
// Implemented by the job.exec bazel rule
service JobExecute {
rpc Execute(JobExecuteRequest) returns (JobExecuteResponse);
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Graphs
///////////////////////////////////////////////////////////////////////////////////////////////
//
// GraphLookup
//
message JobLabel {
// The bazel label the references the job_target
string label = 1;
}
message GraphLookupRequest { repeated PartitionRef outputs = 1; }
// Represents a not-yet configured task
message TaskRef {
// The job whose configure/exec targets will be used
JobLabel job = 1;
// The partition refs this task is responsible for producing, and with which the configure
// target will be invoked
repeated PartitionRef outputs = 2;
}
// Represents the complete set of tasks needed to produce the requested partitions
message GraphLookupResponse { repeated TaskRef task_refs = 1; }
// Implemented per graph
service GraphLookup {
rpc Lookup(GraphLookupRequest) returns (GraphLookupResponse);
}
// Request message for graph analyze service
message GraphAnalyzeRequest { repeated PartitionRef outputs = 1; }
//
// JobGraph
//
message Task {
// The bazel label uniquely identifying the job
JobLabel job = 1;
// The configuration for the job
JobConfig config = 2;
}
// The bazel label referencing the graph
message GraphLabel { string label = 1; }
// Represents a job graph
message JobGraph {
// The bazel label of the graph to be executed
GraphLabel label = 1;
// The output partitions to be produced by this graph
repeated PartitionRef outputs = 2;
// The job configurations that make up this graph
repeated Task nodes = 3;
}
// Response message for graph analyze service
message GraphAnalyzeResponse { JobGraph graph = 1; }
message GraphExecuteResponse { repeated PartitionManifest manifests = 1; }
message GraphBuildRequest { repeated PartitionRef outputs = 1; }
message GraphBuildResponse { repeated PartitionManifest manifests = 1; }
// Service for job configuration and graph analysis
service DataBuildService {
// // Get job configurations for the specified output references
// rpc GetJobConfigs(JobConfigureRequest) returns (JobConfigureResponse) {}
// Analyze and get the job graph for the specified output references
rpc AnalyzeGraph(GraphAnalyzeRequest) returns (GraphAnalyzeResponse);
// Execute the specified job graph (implemented by databuild)
rpc Execute(JobGraph) returns (GraphExecuteResponse);
// User-facing: build the desired partitions
rpc Build(GraphBuildRequest) returns (GraphBuildResponse);
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Partition Activity Log
///////////////////////////////////////////////////////////////////////////////////////////////
// Partition lifecycle states
enum PartitionStatus {
PARTITION_UNKNOWN = 0;
PARTITION_REQUESTED = 1; // Partition requested but not yet scheduled
PARTITION_SCHEDULED = 2; // Job scheduled to produce this partition
PARTITION_BUILDING = 3; // Job actively building this partition
PARTITION_AVAILABLE = 4; // Partition successfully built and available
PARTITION_FAILED = 5; // Partition build failed
PARTITION_STALE = 6; // Partition exists but upstream dependencies changed
PARTITION_DELEGATED = 7; // Request delegated to existing build
}
// Individual partition activity event
message PartitionEvent {
// Event identity
string partition_event_id = 1;
google.protobuf.Timestamp timestamp = 2;
// Partition identification
PartitionRef partition_ref = 3;
PartitionStatus status = 4;
// Build context
string job_graph_run_id = 5; // Links to graph execution
string job_run_id = 6; // Links to specific job run
JobLabel producing_job = 7; // Which job produces this partition
// Coordination metadata
repeated string requesting_clients = 8; // Who requested this partition
string delegated_to_run_id = 9; // If delegated, which run
// Dependencies
repeated PartitionRef upstream_deps = 10;
repeated PartitionRef downstream_deps = 11;
// Data about the partition
PartitionManifest manifest = 12; // Present when status = AVAILABLE
string failure_reason = 13; // Present when status = FAILED
// Storage metadata
string storage_backend = 14;
map<string, string> storage_metadata = 15;
}
// Query capabilities
message PartitionEventQuery {
repeated PartitionRef partition_refs = 1;
repeated PartitionStatus statuses = 2;
repeated string job_graph_run_ids = 3;
TimeRange time_range = 4;
int32 limit = 5;
int32 offset = 6;
OrderBy order_by = 7;
}
message TimeRange {
google.protobuf.Timestamp start = 1;
google.protobuf.Timestamp end = 2;
}
message OrderBy {
string field = 1;
bool ascending = 2;
}
// Stream filtering
message EventStreamFilter {
repeated PartitionRef partition_refs = 1;
repeated PartitionStatus statuses = 2;
google.protobuf.Timestamp since = 3;
}
// Coordination support
message ActiveBuild {
PartitionRef partition_ref = 1;
string job_graph_run_id = 2;
PartitionStatus status = 3;
repeated string requesting_clients = 4;
google.protobuf.Timestamp started_at = 5;
}
message DependencyGraph {
PartitionRef root = 1;
repeated DependencyNode nodes = 2;
repeated DependencyEdge edges = 3;
}
message DependencyNode {
PartitionRef partition_ref = 1;
PartitionStatus status = 2;
google.protobuf.Timestamp last_updated = 3;
}
message DependencyEdge {
PartitionRef source = 1;
PartitionRef target = 2;
DependencyType type = 3;
}
enum DependencyType {
DEPENDS_ON = 0;
PRODUCES = 1;
}
// Request/Response message types
message AppendPartitionEventRequest {
PartitionEvent event = 1;
}
message AppendPartitionEventResponse {
string event_id = 1;
}
message GetLatestPartitionEventRequest {
PartitionRef partition_ref = 1;
}
message GetLatestPartitionEventResponse {
PartitionEvent event = 1;
}
message QueryPartitionEventsRequest {
PartitionEventQuery query = 1;
}
message QueryPartitionEventsResponse {
repeated PartitionEvent events = 1;
int64 total_count = 2;
}
message StreamPartitionEventsRequest {
EventStreamFilter filter = 1;
}
message RequestPartitionRequest {
PartitionRef partition_ref = 1;
string client_id = 2;
RequestOptions options = 3;
}
message RequestOptions {
bool allow_delegation = 1;
int32 timeout_seconds = 2;
}
message RequestPartitionResponse {
RequestResult result = 1;
}
message RequestResult {
oneof result {
PartitionLocation available = 1;
DelegationToken delegated = 2;
BuildToken building = 3;
}
}
message PartitionLocation {
PartitionRef partition_ref = 1;
PartitionManifest manifest = 2;
}
message DelegationToken {
string job_graph_run_id = 1;
google.protobuf.Timestamp estimated_completion = 2;
}
message BuildToken {
string job_graph_run_id = 1;
google.protobuf.Timestamp started_at = 2;
}
message GetActiveBuildStatusRequest {
repeated PartitionRef partition_refs = 1;
}
message GetActiveBuildStatusResponse {
repeated ActiveBuild active_builds = 1;
}
message GetDependencyGraphRequest {
PartitionRef partition_ref = 1;
int32 depth = 2;
}
message GetDependencyGraphResponse {
DependencyGraph graph = 1;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Catalog
///////////////////////////////////////////////////////////////////////////////////////////////
//
// Job Run Events
//
// The complete lifecycle that each job goes through
enum JobRunStatus {
// Initial state
NOT_SCHEDULED = 0;
// Job preconditions met, job scheduled
JOB_SCHEDULED = 1;
// Job execution has begun
JOB_RUNNING = 2;
// Job execution finished successfully
JOB_SUCCESS = 3;
// Job execution failed
JOB_FAILED = 4;
}
message JobConfigEvent { JobConfig config = 1; }
message JobLogsEvent { repeated string logs = 1; }
//
message JobEvent {
// IDs
// Unique ID for this job graph run
string job_graph_run_id = 1;
// Unique ID for this job run
string job_run_id = 2;
// Unique ID for this event
string job_graph_run_event_id = 3;
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
int64 sequence = 4;
// Metadata
// Status of the job run as of this event
JobRunStatus status = 10;
// Hash key of (label, outputs) to associate different runs over time
string job_key = 11;
JobLabel label = 12;
repeated PartitionRef outputs = 13;
// Sum type of the below events
JobConfigEvent config = 20;
JobLogsEvent logs = 21;
}
//
// Job Graph Run Events
//
message GraphAnalyzeEvent { JobGraph graph = 1; }
// Represents a change in status for a datadep
message GraphDataDepEvent {}
// The complete lifecycle that each job graph run goes through
enum JobGraphRunStatus {
// Initial state - graph will be analyzed before executing
GRAPH_STARTED = 0;
// Analysis completed
GRAPH_ANALYZED = 1;
// Graph is in this state until deps are satisfied for at least 1 job
AWAITING_DEPS = 2;
// Graph is executing at least 1 job (if this drops to 0, state goes back to AWAITING_DEPS)
GRAPH_RUNNING = 3;
// Graph execution finished successfully
GRAPH_SUCCESS = 4;
// Graph execution failed
GRAPH_FAILED = 5;
}
message GraphEvent {
// IDs
// Unique ID for this job graph run
string job_graph_run_id = 1;
// Unique ID for this event
string job_graph_run_event_id = 2;
// Sequence number for this event, such that the ordering within job_graph_run_id is correct
int64 sequence = 3;
// Metadata
// Current status of the job graph run as of this event
JobGraphRunStatus status = 10;
GraphLabel label = 11;
// Sum type of below events
GraphAnalyzeEvent analysis = 20;
}
// The sequence of events that completely describes progress of the job graph build
message JobGraphRunEvent {
string job_graph_run_event_id = 1;
google.protobuf.Timestamp timestamp = 2;
// Sum type for potential events
JobEvent job_event = 10;
GraphEvent graph_event = 11;
}
message JobGraphRun {
string job_graph_run_id = 1;
JobGraph graph = 2;
repeated JobGraphRunEvent events = 3;
}
message JobGraphRunQuery {
// TODO
}
message ListJobGraphRunsRequest { JobGraphRunQuery query = 1; }
message ListJobGraphRunsResponse { repeated JobGraphRun runs = 1; }
message PartitionManifestsQuery {
// TODO
}
message ListPartitionManifestsRequest { PartitionManifestsQuery query = 1; }
message ListPartitionManifestsResponse { repeated PartitionManifest manifests = 1; }
service Catalog {
// JTBDs
// -
// Enables lookup of job graph runs (current or past)
rpc ListJobGraphRuns(ListJobGraphRunsRequest) returns (ListJobGraphRunsResponse);
// Enables lookup of partition manifests produced as part of prior job runs
rpc ListPartitions(ListPartitionManifestsRequest) returns (ListPartitionManifestsResponse);
// Partition activity log methods
rpc AppendPartitionEvent(AppendPartitionEventRequest) returns (AppendPartitionEventResponse);
rpc GetLatestPartitionEvent(GetLatestPartitionEventRequest) returns (GetLatestPartitionEventResponse);
rpc QueryPartitionEvents(QueryPartitionEventsRequest) returns (QueryPartitionEventsResponse);
rpc StreamPartitionEvents(StreamPartitionEventsRequest) returns (stream PartitionEvent);
// Coordination methods
rpc RequestPartition(RequestPartitionRequest) returns (RequestPartitionResponse);
rpc GetActiveBuildStatus(GetActiveBuildStatusRequest) returns (GetActiveBuildStatusResponse);
rpc GetDependencyGraph(GetDependencyGraphRequest) returns (GetDependencyGraphResponse);
}