syntax = "proto3"; import "google/protobuf/timestamp.proto"; import "google/protobuf/struct.proto"; package databuild.v1; message PartitionRef { string str = 1; } /////////////////////////////////////////////////////////////////////////////////////////////// // Jobs /////////////////////////////////////////////////////////////////////////////////////////////// // // Job Config // // The type of dependency enum DepType { QUERY = 0; // Default MATERIALIZE = 1; } // Represents a data dependency message DataDep { DepType dep_type = 1; PartitionRef partition_ref = 2; } // Configuration for a job message JobConfig { // The partitions that this parameterization produces repeated PartitionRef outputs = 1; // Required data dependencies repeated DataDep inputs = 2; // Command line arguments repeated string args = 3; // Environment variables map env = 4; } // Request message for job configuration service message JobConfigureRequest { repeated PartitionRef outputs = 1; } // Response message for job configuration service message JobConfigureResponse { repeated JobConfig configs = 1; } // Implemented by the job.cfg bazel rule service JobConfigure { rpc Configure(JobConfigureRequest) returns (JobConfigureResponse); } // // Job Exec // // Manifest that records the literal partitions consumed (and their manifests) in order to // produce the specified partitions message PartitionManifest { // The refs of the partitions produced by this job repeated PartitionRef outputs = 1; // Input partition manifests repeated PartitionManifest inputs = 2; // Start time of job execution (Unix timestamp) google.protobuf.Timestamp start_time = 3; // End time of job execution (Unix timestamp) google.protobuf.Timestamp end_time = 4; // The configuration used to run the job Task task = 5; } message JobExecuteRequest { repeated PartitionRef outputs = 1; } // Metadata for the complete set of partitions produced by this job message JobExecuteResponse { repeated PartitionManifest manifests = 1; } // Implemented by the job.exec bazel rule service JobExecute { rpc Execute(JobExecuteRequest) returns (JobExecuteResponse); } /////////////////////////////////////////////////////////////////////////////////////////////// // Graphs /////////////////////////////////////////////////////////////////////////////////////////////// // // GraphLookup // message JobLabel { // The bazel label the references the job_target string label = 1; } message GraphLookupRequest { repeated PartitionRef outputs = 1; } // Represents a not-yet configured task message TaskRef { // The job whose configure/exec targets will be used JobLabel job = 1; // The partition refs this task is responsible for producing, and with which the configure // target will be invoked repeated PartitionRef outputs = 2; } // Represents the complete set of tasks needed to produce the requested partitions message GraphLookupResponse { repeated TaskRef task_refs = 1; } // Implemented per graph service GraphLookup { rpc Lookup(GraphLookupRequest) returns (GraphLookupResponse); } // Request message for graph analyze service message GraphAnalyzeRequest { repeated PartitionRef outputs = 1; } // // JobGraph // message Task { // The bazel label uniquely identifying the job JobLabel job = 1; // The configuration for the job JobConfig config = 2; } // The bazel label referencing the graph message GraphLabel { string label = 1; } // Represents a job graph message JobGraph { // The bazel label of the graph to be executed GraphLabel label = 1; // The output partitions to be produced by this graph repeated PartitionRef outputs = 2; // The job configurations that make up this graph repeated Task nodes = 3; } // Response message for graph analyze service message GraphAnalyzeResponse { JobGraph graph = 1; } message GraphExecuteResponse { repeated PartitionManifest manifests = 1; } message GraphBuildRequest { repeated PartitionRef outputs = 1; } message GraphBuildResponse { repeated PartitionManifest manifests = 1; } // Service for job configuration and graph analysis service DataBuildService { // // Get job configurations for the specified output references // rpc GetJobConfigs(JobConfigureRequest) returns (JobConfigureResponse) {} // Analyze and get the job graph for the specified output references rpc AnalyzeGraph(GraphAnalyzeRequest) returns (GraphAnalyzeResponse); // Execute the specified job graph (implemented by databuild) rpc Execute(JobGraph) returns (GraphExecuteResponse); // User-facing: build the desired partitions rpc Build(GraphBuildRequest) returns (GraphBuildResponse); } /////////////////////////////////////////////////////////////////////////////////////////////// // Catalog /////////////////////////////////////////////////////////////////////////////////////////////// // // Job Run Events // // The complete lifecycle that each job goes through enum JobRunStatus { // Initial state NOT_SCHEDULED = 0; // Job preconditions met, job scheduled JOB_SCHEDULED = 1; // Job execution has begun JOB_RUNNING = 2; // Job execution finished successfully JOB_SUCCESS = 3; // Job execution failed JOB_FAILED = 4; } message JobConfigEvent { JobConfig config = 1; } message JobLogsEvent { repeated string logs = 1; } // message JobEvent { // IDs // Unique ID for this job graph run string job_graph_run_id = 1; // Unique ID for this job run string job_run_id = 2; // Unique ID for this event string job_graph_run_event_id = 3; // Sequence number for this event, such that the ordering within job_graph_run_id is correct int64 sequence = 4; // Metadata // Status of the job run as of this event JobRunStatus status = 10; // Hash key of (label, outputs) to associate different runs over time string job_key = 11; JobLabel label = 12; repeated PartitionRef outputs = 13; // Sum type of the below events JobConfigEvent config = 20; JobLogsEvent logs = 21; } // // Job Graph Run Events // message GraphAnalyzeEvent { JobGraph graph = 1; } // Represents a change in status for a datadep message GraphDataDepEvent {} // The complete lifecycle that each job graph run goes through enum JobGraphRunStatus { // Initial state - graph will be analyzed before executing GRAPH_STARTED = 0; // Analysis completed GRAPH_ANALYZED = 1; // Graph is in this state until deps are satisfied for at least 1 job AWAITING_DEPS = 2; // Graph is executing at least 1 job (if this drops to 0, state goes back to AWAITING_DEPS) GRAPH_RUNNING = 3; // Graph execution finished successfully GRAPH_SUCCESS = 4; // Graph execution failed GRAPH_FAILED = 5; } message GraphEvent { // IDs // Unique ID for this job graph run string job_graph_run_id = 1; // Unique ID for this event string job_graph_run_event_id = 2; // Sequence number for this event, such that the ordering within job_graph_run_id is correct int64 sequence = 3; // Metadata // Current status of the job graph run as of this event JobGraphRunStatus status = 10; GraphLabel label = 11; // Sum type of below events GraphAnalyzeEvent analysis = 20; } // The sequence of events that completely describes progress of the job graph build message JobGraphRunEvent { string job_graph_run_event_id = 1; google.protobuf.Timestamp timestamp = 2; // Sum type for potential events JobEvent job_event = 10; GraphEvent graph_event = 11; } message JobGraphRun { string job_graph_run_id = 1; JobGraph graph = 2; repeated JobGraphRunEvent events = 3; } message JobGraphRunQuery { // TODO } message ListJobGraphRunsRequest { JobGraphRunQuery query = 1; } message ListJobGraphRunsResponse { repeated JobGraphRun runs = 1; } message PartitionManifestsQuery { // TODO } message ListPartitionManifestsRequest { PartitionManifestsQuery query = 1; } message ListPartitionManifestsResponse { repeated PartitionManifest manifests = 1; } service Catalog { // JTBDs // - // Enables lookup of job graph runs (current or past) rpc ListJobGraphRuns(ListJobGraphRunsRequest) returns (ListJobGraphRunsResponse); // Enables lookup of partition manifests produced as part of prior job runs rpc ListPartitions(ListPartitionManifestsRequest) returns (ListPartitionManifestsResponse); }