From bfcf7cdfd223aff025a36323314b8aba84de0b71 Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke Date: Wed, 20 Aug 2025 20:05:50 -0700 Subject: [PATCH] Update databuild.proto for v2 --- databuild/databuild.proto | 504 +++++++++++++++++++++++++++++++--- design/questions.md | 5 + plans/19-client-server-cli.md | 1 - 3 files changed, 471 insertions(+), 39 deletions(-) create mode 100644 design/questions.md diff --git a/databuild/databuild.proto b/databuild/databuild.proto index 700609a..947643c 100644 --- a/databuild/databuild.proto +++ b/databuild/databuild.proto @@ -50,7 +50,7 @@ message JobConfigureResponse { repeated JobConfig configs = 1; } // Implemented by the job.cfg bazel rule service JobConfigure { - rpc Configure(JobConfigureRequest) returns (JobConfigureResponse); + rpc Configure(JobConfigureRequest) returns (JobConfigureResponse); } // @@ -86,7 +86,7 @@ message JobExecuteResponse { repeated PartitionManifest manifests = 1; } // Implemented by the job.exec bazel rule service JobExecute { - rpc Execute(JobExecuteRequest) returns (JobExecuteResponse); + rpc Execute(JobExecuteRequest) returns (JobExecuteResponse); } /////////////////////////////////////////////////////////////////////////////////////////////// @@ -119,7 +119,7 @@ message GraphLookupResponse { repeated TaskRef task_refs = 1; } // Implemented per graph service GraphLookup { - rpc Lookup(GraphLookupRequest) returns (GraphLookupResponse); + rpc Lookup(GraphLookupRequest) returns (GraphLookupResponse); } // Request message for graph analyze service @@ -202,23 +202,46 @@ enum JobStatus { } // Build request lifecycle -enum BuildRequestStatus { +enum BuildRequestStatusCode { + // Not good BUILD_REQUEST_UNKNOWN = 0; - BUILD_REQUEST_RECEIVED = 1; // Build request received and queued - BUILD_REQUEST_PLANNING = 2; // Graph analysis in progress - BUILD_REQUEST_ANALYSIS_COMPLETED = 7; // Graph analysis completed successfully - BUILD_REQUEST_EXECUTING = 3; // Jobs are being executed - BUILD_REQUEST_COMPLETED = 4; // All requested partitions built - BUILD_REQUEST_FAILED = 5; // Build request failed - BUILD_REQUEST_CANCELLED = 6; // Build request cancelled + // Build request received + BUILD_REQUEST_RECEIVED = 1; + // Graph analysis in progress + BUILD_REQUEST_PLANNING = 2; + // Graph analysis completed successfully + BUILD_REQUEST_ANALYSIS_COMPLETED = 7; + // Jobs are being executed + BUILD_REQUEST_EXECUTING = 3; + // All requested partitions built + BUILD_REQUEST_COMPLETED = 4; + // Build precondition failed (e.g. required external data was not available) + BUILD_REQUEST_PRECONDITION_FAILED = 8; + // Build request failed + BUILD_REQUEST_FAILED = 5; + // Build request cancelled + BUILD_REQUEST_CANCELLED = 6; +} + +message BuildRequestStatus { + // Enum for programmatic use + BuildRequestStatusCode code = 1; + // Human readable string + string name = 2; } // Build request lifecycle event message BuildRequestEvent { - BuildRequestStatus status_code = 1; // Enum for programmatic use - string status_name = 2; // Human-readable string + // The status that this event indicates + BuildRequestStatus status = 1; + // Output partitions requested to be built as part of this build repeated PartitionRef requested_partitions = 3; - string message = 4; // Optional status message + // Optional status message + string message = 4; + // The comment attached to the request - contains arbitrary text + string comment = 5; + // The id of the want that triggered this build + string want_id = 6; } // Partition state change event @@ -272,20 +295,66 @@ message BuildCancelEvent { string reason = 1; // Reason for cancellation } -// Partition Want -message WantSource { - // TODO +message WantEvent { + repeated PartitionRef requested_partitions = 1; + // Unique identifier + string want_id = 2; + // How this want was created + WantSource source = 3; } message PartitionWant { - PartitionRef partition_ref = 1; // Partition being requested - uint64 created_at = 2; // Server time when want registered - optional uint64 data_timestamp = 3; // Business time this partition represents - optional uint64 ttl_seconds = 4; // Give up after this long (from created_at) - optional uint64 sla_seconds = 5; // SLA violation after this long (from data_timestamp) - repeated string external_dependencies = 6; // Cross-graph dependencies - string want_id = 7; // Unique identifier - WantSource source = 8; // How this want was created + string want_id = 1; + // The ref we want to materialize + PartitionRef ref = 2; + // Server time when want registered + uint64 created_at = 3; + // Business time this partition represents + uint64 data_timestamp = 4; + // Give up after this long (from created_at) + optional uint64 ttl_seconds = 5; + // SLA violation after this long (from data_timestamp) + optional uint64 sla_seconds = 6; + // Cross-graph dependencies determined in the analysis phase triggered upon want submission + // These are per-partition, since wants can be partially, marginally materialized + repeated string external_dependencies = 7; +} + +message WantSource { + // The source of the want + SourceType source_type = 1; + + // TODO implement something to record want actual want source for external requests when we have real use case +} + +message SourceType { + SourceTypeCode code = 1; + string name = 2; +} + +enum SourceTypeCode { + // Manual CLI request + CLI_MANUAL = 0; + // Manual dashboard request + DASHBOARD_MANUAL = 1; + // Scheduled/triggered job + SCHEDULED = 2; + // External API call + API_REQUEST = 3; +} + + +// Marks a partition as tainted, so that it will be rebuilt if a data dep points to it, and will be rebuilt if a live +// want points to it. +message TaintEvent { + // The list of partitions to be tainted + repeated PartitionRef refs = 1; + // When the taint was created + uint64 created_at = 2; + // The source of the taint event + SourceType source_type = 3; + // Free text comment attached to the taint + string comment = 4; } // Individual build event @@ -293,7 +362,6 @@ message BuildEvent { // Event metadata string event_id = 1; // UUID for this event int64 timestamp = 2; // Unix timestamp (nanoseconds) - string build_request_id = 3; // UUID of the build request // Event type and payload (one of) oneof event_type { @@ -305,6 +373,8 @@ message BuildEvent { PartitionInvalidationEvent partition_invalidation_event = 15; JobRunCancelEvent job_run_cancel_event = 16; BuildCancelEvent build_cancel_event = 17; + WantEvent want_event = 18; + TaintEvent taint_event = 19; } } @@ -460,7 +530,7 @@ message BuildsListResponse { message BuildSummary { string build_request_id = 1; - BuildRequestStatus status_code = 2; // Enum for programmatic use + BuildRequestStatusCode status_code = 2; // Enum for programmatic use string status_name = 3; // Human-readable string repeated PartitionRef requested_partitions = 4; uint32 total_jobs = 5; @@ -472,6 +542,7 @@ message BuildSummary { optional int64 completed_at = 11; optional int64 duration_ms = 12; bool cancelled = 13; + string comment = 14; } // @@ -501,7 +572,7 @@ message BuildDetailRequest { message BuildDetailResponse { string build_request_id = 1; - BuildRequestStatus status_code = 2; // Enum for programmatic use + BuildRequestStatusCode status_code = 2; // Enum for programmatic use string status_name = 3; // Human-readable string repeated PartitionRef requested_partitions = 4; uint32 total_jobs = 5; @@ -519,7 +590,7 @@ message BuildDetailResponse { message BuildTimelineEvent { int64 timestamp = 1; - optional BuildRequestStatus status_code = 2; // Enum for programmatic use + optional BuildRequestStatusCode status_code = 2; // Enum for programmatic use optional string status_name = 3; // Human-readable string string message = 4; string event_type = 5; @@ -641,20 +712,377 @@ message JobLogsResponse { } /////////////////////////////////////////////////////////////////////////////////////////////// -// Services +// Currently unused - implemented via HTTP REST API instead /////////////////////////////////////////////////////////////////////////////////////////////// +// Partition Want (Future feature - currently unused) +// message WantSource { +// // TODO +// } + +// message PartitionWant { +// PartitionRef partition_ref = 1; // Partition being requested +// uint64 created_at = 2; // Server time when want registered +// optional uint64 data_timestamp = 3; // Business time this partition represents +// optional uint64 ttl_seconds = 4; // Give up after this long (from created_at) +// optional uint64 sla_seconds = 5; // SLA violation after this long (from data_timestamp) +// repeated string external_dependencies = 6; // Cross-graph dependencies +// string want_id = 7; // Unique identifier +// WantSource source = 8; // How this want was created +// } + // Service for job configuration and graph analysis +// service DataBuildService { +// // Get job configurations for the specified output references +// // rpc GetJobConfigs(JobConfigureRequest) returns (JobConfigureResponse) {} + +// // Analyze and get the job graph for the specified output references +// rpc AnalyzeGraph(GraphAnalyzeRequest) returns (GraphAnalyzeResponse); + +// // Execute the specified job graph (implemented by databuild) +// rpc Execute(JobGraph) returns (GraphExecuteResponse); + +// // User-facing: build the desired partitions +// rpc Build(GraphBuildRequest) returns (GraphBuildResponse); +// } + + +/////////////////////////////////////////////////////////////////////////////////////////////// +// DataBuildService - v2 of service and CLI interface below +/////////////////////////////////////////////////////////////////////////////////////////////// + +// The service that vends all build status information +// Core objects are: +// - Build events - events emitted as part of the build process that indicate status/state +// - BuildRequests - the literal request to build 1+ partitions +// - Partitions - Atomic units of data that represent results of jobs, and act as sufficiency signals for other jobs +// - Jobs - the units of work that build partitions (a single run of one is a JobRun) +// - JobRuns - the specific runs of Jobs +// - Wants - the recorded "want" to build a partition, which will be acted on ASAP +// - Taints - invalidate built partitions, in cases where the result should not be used or should be rebuilt +// Each of these will have a list page, and all but build events will have a summary page. service DataBuildService { -// // Get job configurations for the specified output references -// rpc GetJobConfigs(JobConfigureRequest) returns (JobConfigureResponse) {} + // Build events - exposes literal events from build event log with filters + rpc GetBuildEvents(ListBuildEventsRequest) returns (ListBuildEventsResponse); - // Analyze and get the job graph for the specified output references - rpc AnalyzeGraph(GraphAnalyzeRequest) returns (GraphAnalyzeResponse); + // For batched requests + rpc Batched(BatchedRequest) returns (BatchedResponse); - // Execute the specified job graph (implemented by databuild) - rpc Execute(JobGraph) returns (GraphExecuteResponse); + // BUILDS + // List the available build requests with limited metadata about them (requested partitions, status, requested time, etc) + rpc ListBuildRequests(ListBuildsRequest) returns (ListBuildsResponse); + // Get build status, summary, and paginated lists of produced partitions, and other related metadata + rpc GetBuildSummary(BuildSummaryRequest) returns (BuildSummaryResponse); + // Get a mermaid description of the build request graph with its current status rendered + rpc GetBuildMermaid(BuildSummaryRequest) returns (BuildMermaidResponse); - // User-facing: build the desired partitions - rpc Build(GraphBuildRequest) returns (GraphBuildResponse); + // PARTITIONS + // List partitions (built, building, wanted) + rpc ListPartitions(ListPartitionsRequest) returns (ListPartitionsResponse); + // Get details about a specific partition (status, created at, past builds, job runs that built or are building it, etc) + rpc GetPartitionsSummary(PartitionSummaryRequest) returns (PartitionSummaryResponse); + + // JOBS + // List jobs described in the graph plus metadata (success rate, last result, last run at, etc) + rpc ListJobs(ListJobsRequest) returns (ListJobsResponse); + // Get details for a specific job + rpc GetJobSummary(JobSummaryRequest) returns (JobSummaryResponse); + + // JOB RUNS + // List job runs plus basic metadata (job they ran, result, runtime, etc) + rpc ListJobRuns(ListJobRunsRequest) returns (ListJobRunsResponse); + // Get details of a specific job run (above details plus produced partitions, paginated logs, etc) + rpc GetJobRunSummary(JobRunSummaryRequest) returns (JobRunSummaryResponse); + + // Wants + // List wants plus metadata (wanted partitions, created at, status) + rpc ListWants(ListWantsRequest) returns (ListWantsResponse); + // Get details for a want (above plus reasons for want being in current state, etc) + rpc GetWantSummary(WantSummaryRequest) returns (WantSummaryResponse); + // Register a want (list of partition refs, with user, reason, etc) + rpc PutWants(PutWantsRequest) returns (PutWantsResponse); + + // Taints + // List taints plus metadata (tainted partitions, created at, status) + rpc ListTaints(ListTaintsRequest) returns (ListTaintsResponse); + // Summarize the requested taint + rpc GetTaintSummary(TaintSummaryRequest) returns (TaintSummaryResponse); + // Register a taint (list of partition refs, with user, reason, etc) + rpc PutTaints(PutTaintsRequest) returns (PutTaintsResponse); +} + +message RequestContainer { + ListBuildEventsResponse list_build_events = 1; + BuildSummaryRequest build_request_status = 2; + + // TODO +} + +message ResponseContainer { + ListBuildEventsResponse list_build_events = 1; + BuildSummaryResponse build_request_status = 2; + // TODO +} + +message ErrorContainer { + string error_message = 1; +} + +message BatchedRequest { + map requests = 1; +} + +message BatchedResponse { + map responses = 1; + map errors = 2; +} + +// BEL events + +message ListBuildEventsRequest { + EventFilter filters = 1; + + // Either one of the following must be provided + // Scrolls backwards from the specified timestamp + uint64 max_timestamp_ns = 2; + // Scrolls forward from the specified timestamp + uint64 min_timestamp_ns = 3; +} + +message ListBuildEventsResponse { + // Resulting events are ordered + repeated BuildEvent events = 1; + bool has_more = 2; +} + +// BUILD REQUESTS + +// ANDed filters +message ListBuildsRequest { + // The max time the service will search until to find build requests + uint64 started_until = 1; + // Filters returned build requests those that currently have this status + repeated string build_status = 2; + // Filters build requests to those that built one of these partitions + repeated string built_partition = 3; + // Filters build requests to those that output one of these partitions (excluding those that were not explicitly + // requested in the build request) + repeated string output_partition = 4; + // Filters by jobs that were run as part of the build + repeated string run_jobs = 5; + // Filters by the ID of the want that triggered the build + repeated string triggering_want_ids = 6; + // Filters by contains match against build request comment + string comment_contains = 7; +} + +// Ordered and paginated by build start time +message ListBuildsResponse { + // Resulting builds + repeated BuildSummary builds = 1; + // Paging bounds for requesting next page + uint64 min_started = 2; + // Indicates if there are more to request + bool has_more = 3; +} + +message BuildSummaryRequest { + string build_id = 1; +} + +message BuildSummaryResponse { + string build_id = 1; + + // Overall status of the build + BuildRequestStatusCode status = 2; + // Summary of the build + BuildSummary summary = 3; + // Partitions produced by the build + repeated PartitionBuildStatus partitions = 4; + +} + +message PartitionBuildStatus { + PartitionRef ref = 1; + PartitionStatus status = 2; +} + +message BuildMermaidResponse { + string build_id = 1; + string mermaid = 2; +} + +// PARTITIONS + +message ListPartitionsRequest { + // Optional regex filter + string ref_pattern = 1; + // Optional ORing partition status filter + repeated PartitionStatus partition_status = 2; + // Basic pagination mechanism - returns partitions sorted after the provided ref + string last_partition = 3; +} + +message ListPartitionsResponse { + repeated PartitionSummaryV2 refs = 1; +} + +message PartitionStatusV2 { + PartitionStatus code = 1; + string name = 2; +} + +message PartitionSummaryV2 { + PartitionRef partition_ref = 1; + PartitionStatusV2 status = 2; + uint64 last_updated = 4; + uint64 last_invalidated_at = 6; + repeated string past_build_request = 7; +} + +message PartitionSummaryRequest { + PartitionRef ref = 1; +} + +message PartitionSummaryResponse { + PartitionSummaryV2 partition = 1; +} + +// JOBS + +// No query params - if you need to paginate here something is insane or you're google +message ListJobsRequest {} + +message ListJobsResponse { + repeated JobSummary jobs = 1; +} + +message JobSummaryRequest { + string job_label = 1; +} + +message JobSummaryResponse { + JobSummary job = 1; +} + +// JOB RUNS + +// Paginates backwards +message ListJobRunsRequest { + // Filters to job runs started until this point + uint64 started_until = 1; + // ORing filter matching job run IDs + repeated string job_run_ids = 2; + // ORing filters to job runs that are defined by one of these job labels + repeated string job_labels = 3; + // ORing filters to job runs that were involved in one of these build requests + repeated string build_reqeust_ids = 4; + // ORing filters to partitions produced by these job runs + repeated string built_partition_refs = 5; +} + +message ListJobRunsResponse { + repeated JobRunSummary job_runs = 1; + uint64 min_start_at = 2; +} + +message JobRunSummaryRequest { + string job_run_id = 1; +} + +message JobRunSummaryResponse { + JobRunSummary job_run = 1; +} + +// WANTS + +message ListWantsRequest { + // Filters the latest time the want could been requested until + uint64 requested_until = 1; + // Filters to wants whose ttl expires after ttl_until (allows querying "currently wanted" + uint64 ttl_until = 2; +} + +message ListWantsResponse { + repeated PartitionWantSummary wants = 1; + uint64 min_requested_at = 2; +} + +message LabeledPartitionBuildStatus { + PartitionRef ref = 1; + PartitionBuildStatus status = 2; +} + +message PartitionWantSummary { + PartitionWant want = 1; + repeated PartitionSummary partitions = 2; + repeated LabeledPartitionBuildStatus external_partitions = 3; + string comment = 4; +} + +message WantSummaryRequest { + string want_id = 1; +} + +message WantSummaryResponse { + PartitionWantSummary want = 1; +} + +message IndividualWantRequest { + PartitionRef ref = 1; + uint64 date_timestamp = 2; + uint64 ttl_seconds = 3; + uint64 sla_seconds = 4; +} + +message PutWantsRequest { + repeated IndividualWantRequest wants = 1; + WantSource source = 2; + string comment = 3; +} + +message CreatedWant { + PartitionRef ref = 1; + string want_id = 2; +} + +message PutWantsResponse { + repeated CreatedWant wants = 1; +} + +// TAINTS + +message ListTaintsRequest { + uint64 tainted_at_until = 1; +} + +message ListTaintsResponse { + repeated PartitionTaintSummary taints = 1; + uint64 min_tainted_at = 2; +} + +message PartitionTaintSummary { + string taint_id = 1; + repeated PartitionRef refs = 2; + uint64 tainted_at = 3; + SourceType source = 4; + string comment = 5; +} + +message TaintSummaryRequest { + string taint_id = 1; +} + +message TaintSummaryResponse { + PartitionTaintSummary taint = 1; +} + +message PutTaintsRequest { + repeated PartitionRef refs = 1; + SourceType source = 2; + string comment = 3; +} + +message PutTaintsResponse { + string taint_id = 1; } diff --git a/design/questions.md b/design/questions.md new file mode 100644 index 0000000..3a70b41 --- /dev/null +++ b/design/questions.md @@ -0,0 +1,5 @@ + +# Questions + +- What happens when we deploy a new graph, and nothing builds a wanted partition? + - Is the interaction model between graph_a -> graph_b actually graph_a registering a want in graph_b? diff --git a/plans/19-client-server-cli.md b/plans/19-client-server-cli.md index 5f1ea4a..7337445 100644 --- a/plans/19-client-server-cli.md +++ b/plans/19-client-server-cli.md @@ -42,7 +42,6 @@ This creates several limitations: - Add new endpoints to handle CLI build requests - Move analysis and execution logic from CLI to service - Service maintains orchestrator state and coordinates builds - - Add real-time progress streaming for CLI consumption 2. **Add CLI-Specific API Endpoints** - `/api/v1/cli/build` - Handle build requests from CLI