databuild/databuild/databuild.proto

644 lines
20 KiB
Protocol Buffer

syntax = "proto3";
package databuild.v1;
message PartitionRef {
string str = 1;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Jobs
///////////////////////////////////////////////////////////////////////////////////////////////
//
// Job Config
//
// The type of dependency
enum DepType {
QUERY = 0; // Default
MATERIALIZE = 1;
}
// Represents a data dependency
message DataDep {
DepType dep_type_code = 1; // Enum for programmatic use
string dep_type_name = 2; // Human-readable string ("query", "materialize")
PartitionRef partition_ref = 3; // Moved from field 2 to 3
}
// Configuration for a job
message JobConfig {
// The partitions that this parameterization produces
repeated PartitionRef outputs = 1;
// Required data dependencies
repeated DataDep inputs = 2;
// Command line arguments
repeated string args = 3;
// Environment variables
map<string, string> env = 4;
}
// Request message for job configuration service
message JobConfigureRequest { repeated PartitionRef outputs = 1; }
// Response message for job configuration service
message JobConfigureResponse { repeated JobConfig configs = 1; }
// Implemented by the job.cfg bazel rule
service JobConfigure {
rpc Configure(JobConfigureRequest) returns (JobConfigureResponse);
}
//
// Job Exec
//
// Manifest that records the literal partitions consumed (and their manifests) in order to
// produce the specified partitions
message PartitionManifest {
// The refs of the partitions produced by this job
repeated PartitionRef outputs = 1;
// Input partition manifests
repeated PartitionManifest inputs = 2;
// Start time of job execution (Unix timestamp seconds)
int64 start_time = 3;
// End time of job execution (Unix timestamp seconds)
int64 end_time = 4;
// The configuration used to run the job
Task task = 5;
// Arbitrary metadata about the produced partitions, keyed by partition ref
map<string, string> metadata = 6;
}
message JobExecuteRequest { repeated PartitionRef outputs = 1; }
// Metadata for the complete set of partitions produced by this job
message JobExecuteResponse { repeated PartitionManifest manifests = 1; }
// Implemented by the job.exec bazel rule
service JobExecute {
rpc Execute(JobExecuteRequest) returns (JobExecuteResponse);
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Graphs
///////////////////////////////////////////////////////////////////////////////////////////////
//
// GraphLookup
//
message JobLabel {
// The bazel label the references the job_target
string label = 1;
}
message GraphLookupRequest { repeated PartitionRef outputs = 1; }
// Represents a not-yet configured task
message TaskRef {
// The job whose configure/exec targets will be used
JobLabel job = 1;
// The partition refs this task is responsible for producing, and with which the configure
// target will be invoked
repeated PartitionRef outputs = 2;
}
// Represents the complete set of tasks needed to produce the requested partitions
message GraphLookupResponse { repeated TaskRef task_refs = 1; }
// Implemented per graph
service GraphLookup {
rpc Lookup(GraphLookupRequest) returns (GraphLookupResponse);
}
// Request message for graph analyze service
message GraphAnalyzeRequest { repeated PartitionRef outputs = 1; }
//
// JobGraph
//
message Task {
// The bazel label uniquely identifying the job
JobLabel job = 1;
// The configuration for the job
JobConfig config = 2;
}
// The bazel label referencing the graph
message GraphLabel { string label = 1; }
// Represents a job graph
message JobGraph {
// The bazel label of the graph to be executed
GraphLabel label = 1;
// The output partitions to be produced by this graph
repeated PartitionRef outputs = 2;
// The job configurations that make up this graph
repeated Task nodes = 3;
}
// Response message for graph analyze service
message GraphAnalyzeResponse { JobGraph graph = 1; }
message GraphExecuteResponse { repeated PartitionManifest manifests = 1; }
message GraphBuildRequest { repeated PartitionRef outputs = 1; }
message GraphBuildResponse { repeated PartitionManifest manifests = 1; }
///////////////////////////////////////////////////////////////////////////////////////////////
// Build Event Log
///////////////////////////////////////////////////////////////////////////////////////////////
// Partition lifecycle states
enum PartitionStatus {
PARTITION_UNKNOWN = 0;
PARTITION_REQUESTED = 1; // Partition requested but not yet analyzed
PARTITION_ANALYZED = 2; // Partition analyzed successfully
PARTITION_BUILDING = 3; // Job actively building this partition
PARTITION_AVAILABLE = 4; // Partition successfully built and available
PARTITION_FAILED = 5; // Partition build failed
PARTITION_DELEGATED = 6; // Request delegated to existing build
}
// Job execution lifecycle
enum JobStatus {
JOB_UNKNOWN = 0;
JOB_SCHEDULED = 1; // Job scheduled for execution
JOB_RUNNING = 2; // Job actively executing
JOB_COMPLETED = 3; // Job completed successfully
JOB_FAILED = 4; // Job execution failed
JOB_CANCELLED = 5; // Job execution cancelled
JOB_SKIPPED = 6; // Job skipped because target partitions already available
}
// Build request lifecycle
enum BuildRequestStatus {
BUILD_REQUEST_UNKNOWN = 0;
BUILD_REQUEST_RECEIVED = 1; // Build request received and queued
BUILD_REQUEST_PLANNING = 2; // Graph analysis in progress
BUILD_REQUEST_ANALYSIS_COMPLETED = 7; // Graph analysis completed successfully
BUILD_REQUEST_EXECUTING = 3; // Jobs are being executed
BUILD_REQUEST_COMPLETED = 4; // All requested partitions built
BUILD_REQUEST_FAILED = 5; // Build request failed
BUILD_REQUEST_CANCELLED = 6; // Build request cancelled
}
// Build request lifecycle event
message BuildRequestEvent {
BuildRequestStatus status_code = 1; // Enum for programmatic use
string status_name = 2; // Human-readable string
repeated PartitionRef requested_partitions = 3;
string message = 4; // Optional status message
}
// Partition state change event
message PartitionEvent {
PartitionRef partition_ref = 1;
PartitionStatus status_code = 2; // Enum for programmatic use
string status_name = 3; // Human-readable string
string message = 4; // Optional status message
string job_run_id = 5; // UUID of job run producing this partition (if applicable)
}
// Job execution event
message JobEvent {
string job_run_id = 1; // UUID for this job run
JobLabel job_label = 2; // Job being executed
repeated PartitionRef target_partitions = 3; // Partitions this job run produces
JobStatus status_code = 4; // Enum for programmatic use
string status_name = 5; // Human-readable string
string message = 6; // Optional status message
JobConfig config = 7; // Job configuration used (for SCHEDULED events)
repeated PartitionManifest manifests = 8; // Results (for COMPLETED events)
}
// Delegation event (when build request delegates to existing build)
message DelegationEvent {
PartitionRef partition_ref = 1;
string delegated_to_build_request_id = 2; // Build request handling this partition
string message = 3; // Optional message
}
// Job graph analysis result event (stores the analyzed job graph)
message JobGraphEvent {
JobGraph job_graph = 1; // The analyzed job graph
string message = 2; // Optional message
}
// Partition invalidation event
message PartitionInvalidationEvent {
PartitionRef partition_ref = 1; // Partition being invalidated
string reason = 2; // Reason for invalidation
}
// Task cancellation event
message TaskCancelEvent {
string job_run_id = 1; // UUID of the job run being cancelled
string reason = 2; // Reason for cancellation
}
// Build cancellation event
message BuildCancelEvent {
string reason = 1; // Reason for cancellation
}
// Partition Want
message WantSource {
// TODO
}
message PartitionWant {
PartitionRef partition_ref = 1; // Partition being requested
uint64 created_at = 2; // Server time when want registered
optional uint64 data_timestamp = 3; // Business time this partition represents
optional uint64 ttl_seconds = 4; // Give up after this long (from created_at)
optional uint64 sla_seconds = 5; // SLA violation after this long (from data_timestamp)
repeated string external_dependencies = 6; // Cross-graph dependencies
string want_id = 7; // Unique identifier
WantSource source = 8; // How this want was created
}
// Individual build event
message BuildEvent {
// Event metadata
string event_id = 1; // UUID for this event
int64 timestamp = 2; // Unix timestamp (nanoseconds)
string build_request_id = 3; // UUID of the build request
// Event type and payload (one of)
oneof event_type {
BuildRequestEvent build_request_event = 10;
PartitionEvent partition_event = 11;
JobEvent job_event = 12;
DelegationEvent delegation_event = 13;
JobGraphEvent job_graph_event = 14;
PartitionInvalidationEvent partition_invalidation_event = 15;
TaskCancelEvent task_cancel_event = 16;
BuildCancelEvent build_cancel_event = 17;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Job Wrapper Log Protocol
///////////////////////////////////////////////////////////////////////////////////////////////
// Structured log entry emitted by job wrapper to stdout
message JobLogEntry {
string timestamp = 1; // Unix timestamp
string job_id = 2; // UUID for this job execution
repeated PartitionRef outputs = 3; // Partitions being processed by this job
uint64 sequence_number = 4; // Monotonic sequence starting from 1
oneof content {
LogMessage log = 5;
MetricPoint metric = 6;
WrapperJobEvent job_event = 7; // Wrapper-specific job events
PartitionManifest manifest = 8;
}
}
// Log message from job stdout/stderr
message LogMessage {
enum LogLevel {
DEBUG = 0;
INFO = 1;
WARN = 2;
ERROR = 3;
}
LogLevel level = 1;
string message = 2;
map<string, string> fields = 3;
}
// Metric point emitted by job
message MetricPoint {
string name = 1;
double value = 2;
map<string, string> labels = 3;
string unit = 4;
}
// Job wrapper event (distinct from build event log JobEvent)
message WrapperJobEvent {
string event_type = 1; // "config_validate_success", "task_launch_success", etc
map<string, string> metadata = 2;
optional string job_status = 3; // JobStatus enum as string
optional int32 exit_code = 4;
optional string job_label = 5; // Job label for low-cardinality metrics
}
///////////////////////////////////////////////////////////////////////////////////////////////
// List Operations (Unified CLI/Service Responses)
///////////////////////////////////////////////////////////////////////////////////////////////
//
// Partitions List
//
message PartitionsListRequest {
optional uint32 limit = 1;
optional uint32 offset = 2;
optional string status_filter = 3;
}
message PartitionsListResponse {
repeated PartitionSummary partitions = 1;
uint32 total_count = 2;
bool has_more = 3;
}
message PartitionSummary {
PartitionRef partition_ref = 1;
PartitionStatus status_code = 2; // Enum for programmatic use
string status_name = 3; // Human-readable string
int64 last_updated = 4;
uint32 builds_count = 5;
uint32 invalidation_count = 6;
optional string last_successful_build = 7;
}
//
// Jobs List
//
message JobsListRequest {
optional uint32 limit = 1;
optional string search = 2;
}
message JobsListResponse {
repeated JobSummary jobs = 1;
uint32 total_count = 2;
}
message JobSummary {
string job_label = 1;
uint32 total_runs = 2;
uint32 successful_runs = 3;
uint32 failed_runs = 4;
uint32 cancelled_runs = 5;
double average_partitions_per_run = 6;
int64 last_run_timestamp = 7;
JobStatus last_run_status_code = 8; // Enum for programmatic use
string last_run_status_name = 9; // Human-readable string
repeated string recent_builds = 10;
}
//
// Tasks List
//
message TasksListRequest {
optional uint32 limit = 1;
}
message TasksListResponse {
repeated TaskSummary tasks = 1;
uint32 total_count = 2;
}
message TaskSummary {
string job_run_id = 1;
string job_label = 2;
string build_request_id = 3;
JobStatus status_code = 4; // Enum for programmatic use
string status_name = 5; // Human-readable string
repeated PartitionRef target_partitions = 6;
int64 scheduled_at = 7;
optional int64 started_at = 8;
optional int64 completed_at = 9;
optional int64 duration_ms = 10;
bool cancelled = 11;
string message = 12;
}
//
// Builds List
//
message BuildsListRequest {
optional uint32 limit = 1;
optional uint32 offset = 2;
optional string status_filter = 3;
}
message BuildsListResponse {
repeated BuildSummary builds = 1;
uint32 total_count = 2;
bool has_more = 3;
}
message BuildSummary {
string build_request_id = 1;
BuildRequestStatus status_code = 2; // Enum for programmatic use
string status_name = 3; // Human-readable string
repeated PartitionRef requested_partitions = 4;
uint32 total_jobs = 5;
uint32 completed_jobs = 6;
uint32 failed_jobs = 7;
uint32 cancelled_jobs = 8;
int64 requested_at = 9;
optional int64 started_at = 10;
optional int64 completed_at = 11;
optional int64 duration_ms = 12;
bool cancelled = 13;
}
//
// Activity Summary
//
message ActivityResponse {
uint32 active_builds_count = 1;
repeated BuildSummary recent_builds = 2;
repeated PartitionSummary recent_partitions = 3;
uint32 total_partitions_count = 4;
string system_status = 5;
string graph_name = 6;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Detail Operations (Unified CLI/Service Detail Responses)
///////////////////////////////////////////////////////////////////////////////////////////////
//
// Build Detail
//
message BuildDetailRequest {
string build_request_id = 1;
}
message BuildDetailResponse {
string build_request_id = 1;
BuildRequestStatus status_code = 2; // Enum for programmatic use
string status_name = 3; // Human-readable string
repeated PartitionRef requested_partitions = 4;
uint32 total_jobs = 5;
uint32 completed_jobs = 6;
uint32 failed_jobs = 7;
uint32 cancelled_jobs = 8;
int64 requested_at = 9;
optional int64 started_at = 10;
optional int64 completed_at = 11;
optional int64 duration_ms = 12;
bool cancelled = 13;
optional string cancel_reason = 14;
repeated BuildTimelineEvent timeline = 15;
}
message BuildTimelineEvent {
int64 timestamp = 1;
optional BuildRequestStatus status_code = 2; // Enum for programmatic use
optional string status_name = 3; // Human-readable string
string message = 4;
string event_type = 5;
optional string cancel_reason = 6;
}
//
// Partition Detail
//
message PartitionDetailRequest {
PartitionRef partition_ref = 1;
}
message PartitionDetailResponse {
PartitionRef partition_ref = 1;
PartitionStatus status_code = 2; // Enum for programmatic use
string status_name = 3; // Human-readable string
int64 last_updated = 4;
uint32 builds_count = 5;
optional string last_successful_build = 6;
uint32 invalidation_count = 7;
repeated PartitionTimelineEvent timeline = 8;
}
message PartitionTimelineEvent {
int64 timestamp = 1;
PartitionStatus status_code = 2; // Enum for programmatic use
string status_name = 3; // Human-readable string
string message = 4;
string build_request_id = 5;
optional string job_run_id = 6;
}
//
// Job Detail
//
message JobDetailRequest {
string job_label = 1;
}
message JobDetailResponse {
string job_label = 1;
uint32 total_runs = 2;
uint32 successful_runs = 3;
uint32 failed_runs = 4;
uint32 cancelled_runs = 5;
double average_partitions_per_run = 6;
int64 last_run_timestamp = 7;
JobStatus last_run_status_code = 8; // Enum for programmatic use
string last_run_status_name = 9; // Human-readable string
repeated string recent_builds = 10;
repeated JobRunDetail runs = 11;
}
message JobRunDetail {
string job_run_id = 1;
string build_request_id = 2;
repeated PartitionRef target_partitions = 3;
JobStatus status_code = 4; // Enum for programmatic use
string status_name = 5; // Human-readable string
optional int64 started_at = 6;
optional int64 completed_at = 7;
optional int64 duration_ms = 8;
string message = 9;
}
//
// Task Detail
//
message TaskDetailRequest {
string job_run_id = 1;
}
message TaskDetailResponse {
string job_run_id = 1;
string job_label = 2;
string build_request_id = 3;
JobStatus status_code = 4; // Enum for programmatic use
string status_name = 5; // Human-readable string
repeated PartitionRef target_partitions = 6;
int64 scheduled_at = 7;
optional int64 started_at = 8;
optional int64 completed_at = 9;
optional int64 duration_ms = 10;
bool cancelled = 11;
optional string cancel_reason = 12;
string message = 13;
repeated TaskTimelineEvent timeline = 14;
}
message TaskTimelineEvent {
int64 timestamp = 1;
optional JobStatus status_code = 2; // Enum for programmatic use
optional string status_name = 3; // Human-readable string
string message = 4;
string event_type = 5;
optional string cancel_reason = 6;
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Job Log Access (Unified CLI/Service Interface)
///////////////////////////////////////////////////////////////////////////////////////////////
// Request for retrieving job logs
message JobLogsRequest {
string job_run_id = 1; // UUID of the job run
int64 since_timestamp = 2; // Unix timestamp (nanoseconds) - only logs after this time
int32 min_level = 3; // Minimum LogLevel enum value (0=DEBUG, 1=INFO, 2=WARN, 3=ERROR)
uint32 limit = 4; // Maximum number of entries to return
}
// Response containing job log entries
message JobLogsResponse {
repeated JobLogEntry entries = 1; // Log entries matching the request criteria
bool has_more = 2; // True if more entries exist beyond the limit
}
///////////////////////////////////////////////////////////////////////////////////////////////
// Services
///////////////////////////////////////////////////////////////////////////////////////////////
// Service for job configuration and graph analysis
service DataBuildService {
// // Get job configurations for the specified output references
// rpc GetJobConfigs(JobConfigureRequest) returns (JobConfigureResponse) {}
// Analyze and get the job graph for the specified output references
rpc AnalyzeGraph(GraphAnalyzeRequest) returns (GraphAnalyzeResponse);
// Execute the specified job graph (implemented by databuild)
rpc Execute(JobGraph) returns (GraphExecuteResponse);
// User-facing: build the desired partitions
rpc Build(GraphBuildRequest) returns (GraphBuildResponse);
}