databuild/databuild/service/handlers.rs
2025-08-20 23:34:37 -07:00

1754 lines
No EOL
63 KiB
Rust

use super::*;
use crate::event_log::{current_timestamp_nanos, create_build_event};
use crate::orchestration::{BuildOrchestrator, BuildResult};
use crate::mermaid_utils;
use axum::{
extract::{Path, State},
http::StatusCode,
};
use axum_jsonschema::Json;
use log::{error, info};
use serde::Deserialize;
use schemars::JsonSchema;
use std::process::Command;
use std::env;
// Simple base64 URL-safe decoding function for job labels
fn base64_url_decode(encoded: &str) -> Result<String, Box<dyn std::error::Error>> {
// Convert URL-safe base64 back to regular base64
let mut padded = encoded.replace('-', "+").replace('_', "/");
// Add padding if needed
match padded.len() % 4 {
2 => padded.push_str("=="),
3 => padded.push_str("="),
_ => {}
}
// Manual base64 decoding (simplified)
let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut result = Vec::new();
let mut buffer = 0u32;
let mut bits = 0;
for c in padded.chars() {
if c == '=' { break; }
if let Some(index) = alphabet.find(c) {
buffer = (buffer << 6) | (index as u32);
bits += 6;
if bits >= 8 {
result.push(((buffer >> (bits - 8)) & 0xFF) as u8);
bits -= 8;
}
}
}
String::from_utf8(result).map_err(|e| e.into())
}
pub async fn submit_build_request(
State(service): State<ServiceState>,
Json(request): Json<BuildRequest>,
) -> Result<Json<BuildRequestResponse>, (StatusCode, Json<ErrorResponse>)> {
let build_request_id = BuildGraphService::generate_build_request_id();
let timestamp = current_timestamp_nanos();
info!("Received build request {} for partitions: {:?}", build_request_id, request.partitions);
// Create build request state
let build_state = BuildRequestState {
build_request_id: build_request_id.clone(),
status: BuildRequestStatusCode::BuildRequestReceived.status(),
requested_partitions: request.partitions.clone(),
created_at: timestamp,
updated_at: timestamp,
};
// Store in active builds
{
let mut active_builds = service.active_builds.write().await;
active_builds.insert(build_request_id.clone(), build_state);
}
// Create orchestrator and emit build request received event
let requested_partitions: Vec<PartitionRef> = request.partitions.iter()
.map(|p| PartitionRef { str: p.clone() })
.collect();
let orchestrator = BuildOrchestrator::new(
service.query_engine.clone(),
build_request_id.clone(),
requested_partitions,
);
if let Err(e) = orchestrator.start_build().await {
error!("Failed to log build request received event: {}", e);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to log build request: {}", e),
}),
));
}
// Start build execution in background
let service_clone = service.clone();
let build_request_id_clone = build_request_id.clone();
let partitions_clone = request.partitions.clone();
tokio::spawn(async move {
if let Err(e) = execute_build_request(
service_clone,
build_request_id_clone,
partitions_clone,
).await {
error!("Build request execution failed: {}", e);
}
});
Ok(Json(BuildRequestResponse { build_request_id }))
}
#[derive(Deserialize, JsonSchema)]
pub struct BuildStatusRequest {
pub build_request_id: String,
}
pub async fn get_build_status(
State(service): State<ServiceState>,
Path(BuildStatusRequest { build_request_id }): Path<BuildStatusRequest>,
) -> Result<Json<BuildDetailResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = crate::repositories::builds::BuildsRepository::new(service.query_engine.clone());
match repository.show_protobuf(&build_request_id).await {
Ok(Some(build_detail)) => {
Ok(Json(build_detail))
}
Ok(None) => {
Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: "Build request not found".to_string(),
}),
))
}
Err(e) => {
error!("Failed to get build status: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get build status: {}", e),
}),
))
}
}
}
#[derive(Deserialize, JsonSchema)]
pub struct CancelBuildRequest {
pub build_request_id: String,
}
pub async fn cancel_build_request(
State(service): State<ServiceState>,
Path(CancelBuildRequest { build_request_id }): Path<CancelBuildRequest>,
) -> Result<Json<BuildCancelResponse>, (StatusCode, Json<ErrorResponse>)> {
// Update build request state
{
let mut active_builds = service.active_builds.write().await;
if let Some(build_state) = active_builds.get_mut(&build_request_id) {
build_state.status = BuildRequestStatusCode::BuildRequestCancelled.status();
build_state.updated_at = current_timestamp_nanos();
} else {
return Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: "Build request not found".to_string(),
}),
));
}
}
// Log cancellation event
let event = create_build_event(
build_request_id.clone(),
crate::build_event::EventType::BuildRequestEvent(BuildRequestEvent {
status: Some(BuildRequestStatusCode::BuildRequestCancelled.status()),
requested_partitions: vec![],
message: "Build request cancelled".to_string(),
comment: None,
want_id: None,
}),
);
if let Err(e) = service.query_engine.append_event(event).await {
error!("Failed to log build request cancelled event: {}", e);
}
info!("Build request {} cancelled", build_request_id);
Ok(Json(BuildCancelResponse {
cancelled: true,
build_request_id,
}))
}
#[derive(Deserialize, JsonSchema)]
pub struct PartitionStatusRequest {
pub partition_ref: String,
}
pub async fn get_partition_status(
State(service): State<ServiceState>,
Path(PartitionStatusRequest { partition_ref }): Path<PartitionStatusRequest>,
) -> Result<Json<PartitionStatusResponse>, (StatusCode, Json<ErrorResponse>)> {
// Get latest partition status
let (status, last_updated) = match service.query_engine.get_latest_partition_status(&partition_ref).await {
Ok(Some((status, timestamp))) => (status, Some(timestamp)),
Ok(None) => {
// No partition events found - this is a legitimate 404
return Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: format!("Partition not found: {}", partition_ref),
}),
));
},
Err(e) => {
error!("Failed to get partition status: {}", e);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get partition status: {}", e),
}),
));
}
};
// Get active builds for this partition
let build_requests = match service.query_engine.get_active_builds_for_partition(&partition_ref).await {
Ok(builds) => builds,
Err(e) => {
error!("Failed to get active builds for partition: {}", e);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get active builds for partition: {}", e),
}),
));
}
};
Ok(Json(PartitionStatusResponse {
partition_ref,
status_code: status as i32,
status_name: status.to_display_string(),
last_updated,
build_requests,
}))
}
#[derive(Deserialize, JsonSchema)]
pub struct PartitionEventsRequest {
pub partition_ref: String,
}
pub async fn get_partition_events(
State(service): State<ServiceState>,
Path(PartitionEventsRequest { partition_ref }): Path<PartitionEventsRequest>,
) -> Result<Json<PartitionEventsResponse>, (StatusCode, Json<ErrorResponse>)> {
let decoded_partition_ref = base64_url_decode(&partition_ref).unwrap();
let events = match service.query_engine.get_partition_events(&decoded_partition_ref, None).await {
Ok(events) => events.into_iter().filter(|e| e.build_request_id.is_some()).map(|e| {
let (job_label, partition_ref, delegated_build_id) = extract_navigation_data(&e.event_type);
BuildEventSummary {
event_id: e.event_id,
timestamp: e.timestamp,
event_type: event_type_to_string(&e.event_type),
message: event_to_message(&e.event_type),
build_request_id: e.build_request_id.clone().unwrap(),
job_label,
partition_ref,
delegated_build_id,
}
}).collect(),
Err(e) => {
error!("Failed to get partition events: {}", e);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get partition events: {}", e),
}),
));
}
};
Ok(Json(PartitionEventsResponse {
partition_ref: decoded_partition_ref,
events,
}))
}
pub async fn analyze_build_graph(
State(service): State<ServiceState>,
Json(request): Json<AnalyzeRequest>,
) -> Result<Json<AnalyzeResponse>, (StatusCode, Json<ErrorResponse>)> {
// Call the analyze command (use temporary ID for analyze-only requests)
let temp_build_request_id = BuildGraphService::generate_build_request_id();
let analyze_result = run_analyze_command(&service, &temp_build_request_id, &request.partitions).await;
match analyze_result {
Ok(job_graph) => {
let job_graph_json = match serde_json::to_value(&job_graph) {
Ok(json) => json,
Err(e) => {
error!("Failed to serialize job graph: {}", e);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to serialize job graph: {}", e),
}),
));
}
};
Ok(Json(AnalyzeResponse {
job_graph: job_graph_json,
}))
}
Err(e) => {
error!("Failed to analyze build graph: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to analyze build graph: {}", e),
}),
))
}
}
}
async fn execute_build_request(
service: ServiceState,
build_request_id: String,
partitions: Vec<String>,
) -> Result<(), String> {
info!("Starting build execution for request {}", build_request_id);
// Create orchestrator for this build request
let requested_partitions: Vec<PartitionRef> = partitions.iter()
.map(|p| PartitionRef { str: p.clone() })
.collect();
let orchestrator = BuildOrchestrator::new(
service.query_engine.clone(),
build_request_id.clone(),
requested_partitions,
);
// Update status to planning
update_build_request_status(&service, &build_request_id, BuildRequestStatusCode::BuildRequestPlanning.status()).await;
// Log planning event
if let Err(e) = orchestrator.start_planning().await {
error!("Failed to log planning event: {}", e);
}
// Analyze the build graph
let job_graph = match run_analyze_command(&service, &build_request_id, &partitions).await {
Ok(graph) => graph,
Err(e) => {
error!("Failed to analyze build graph: {}", e);
update_build_request_status(&service, &build_request_id, BuildRequestStatusCode::BuildRequestFailed.status()).await;
// Log failure event
if let Err(log_err) = orchestrator.complete_build(BuildResult::Failed { jobs_completed: 0, jobs_failed: 1 }).await {
error!("Failed to log failure event: {}", log_err);
}
return Err(e);
}
};
// Update status to executing
update_build_request_status(&service, &build_request_id, BuildRequestStatusCode::BuildRequestExecuting.status()).await;
// Log executing event
if let Err(e) = orchestrator.start_execution().await {
error!("Failed to log executing event: {}", e);
}
// Execute the build graph
match run_execute_command(&service, &build_request_id, &job_graph).await {
Ok(_) => {
info!("Build request {} completed successfully", build_request_id);
update_build_request_status(&service, &build_request_id, BuildRequestStatusCode::BuildRequestCompleted.status()).await;
// Log completion event
if let Err(e) = orchestrator.complete_build(BuildResult::Success { jobs_completed: 0 }).await {
error!("Failed to log completion event: {}", e);
}
Ok(())
}
Err(e) => {
error!("Build request {} failed: {}", build_request_id, e);
update_build_request_status(&service, &build_request_id, BuildRequestStatusCode::BuildRequestFailed.status()).await;
// Log failure event
if let Err(log_err) = orchestrator.complete_build(BuildResult::Failed { jobs_completed: 0, jobs_failed: 1 }).await {
error!("Failed to log failure event: {}", log_err);
}
Err(e)
}
}
}
async fn update_build_request_status(
service: &ServiceState,
build_request_id: &str,
status: BuildRequestStatus,
) {
let mut active_builds = service.active_builds.write().await;
if let Some(build_state) = active_builds.get_mut(build_request_id) {
build_state.status = status;
build_state.updated_at = current_timestamp_nanos();
}
}
async fn run_analyze_command(
service: &ServiceState,
build_request_id: &str,
partitions: &[String],
) -> Result<JobGraph, String> {
// Run analyze command
let analyze_binary = env::var("DATABUILD_ANALYZE_BINARY")
.unwrap_or_else(|_| "databuild_analyze".to_string());
let output = Command::new(&analyze_binary)
.args(partitions)
.env("DATABUILD_JOB_LOOKUP_PATH", &service.job_lookup_path)
.env("DATABUILD_CANDIDATE_JOBS", serde_json::to_string(&service.candidate_jobs).unwrap())
.env("DATABUILD_BUILD_EVENT_LOG", &service.event_log_uri)
.env("DATABUILD_BUILD_REQUEST_ID", build_request_id)
.output()
.map_err(|e| format!("Failed to execute analyze command: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(format!("Analyze command failed: {}", stderr));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let job_graph: JobGraph = serde_json::from_str(&stdout)
.map_err(|e| format!("Failed to parse analyze result: {}", e))?;
Ok(job_graph)
}
async fn run_execute_command(
service: &ServiceState,
build_request_id: &str,
job_graph: &JobGraph,
) -> Result<(), String> {
// Serialize job graph
let job_graph_json = serde_json::to_string(job_graph)
.map_err(|e| format!("Failed to serialize job graph: {}", e))?;
// Run execute command
let execute_binary = env::var("DATABUILD_EXECUTE_BINARY")
.unwrap_or_else(|_| "databuild_execute".to_string());
let mut child = Command::new(&execute_binary)
.env("DATABUILD_JOB_LOOKUP_PATH", &service.job_lookup_path)
.env("DATABUILD_CANDIDATE_JOBS", serde_json::to_string(&service.candidate_jobs).unwrap())
.env("DATABUILD_BUILD_EVENT_LOG", &service.event_log_uri)
.env("DATABUILD_BUILD_REQUEST_ID", build_request_id)
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| format!("Failed to spawn execute command: {}", e))?;
// Write job graph to stdin
if let Some(stdin) = child.stdin.take() {
use std::io::Write;
let mut stdin = stdin;
stdin.write_all(job_graph_json.as_bytes())
.map_err(|e| format!("Failed to write job graph to stdin: {}", e))?;
}
// Wait for completion
let output = child.wait_with_output()
.map_err(|e| format!("Failed to wait for execute command: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(format!("Execute command failed: {}", stderr));
}
Ok(())
}
fn event_type_to_string(event_type: &Option<crate::build_event::EventType>) -> String {
match event_type {
Some(crate::build_event::EventType::BuildRequestEvent(_)) => "build_request".to_string(),
Some(crate::build_event::EventType::PartitionEvent(_)) => "partition".to_string(),
Some(crate::build_event::EventType::JobEvent(_)) => "job".to_string(),
Some(crate::build_event::EventType::DelegationEvent(_)) => "delegation".to_string(),
Some(crate::build_event::EventType::JobGraphEvent(_)) => "job_graph".to_string(),
Some(crate::build_event::EventType::PartitionInvalidationEvent(_)) => "partition_invalidation".to_string(),
Some(crate::build_event::EventType::JobRunCancelEvent(_)) => "task_cancel".to_string(),
Some(crate::build_event::EventType::BuildCancelEvent(_)) => "build_cancel".to_string(),
Some(build_event::EventType::WantEvent(_)) => "want".to_string(),
Some(build_event::EventType::TaintEvent(_)) => "taint".to_string(),
None => "INVALID_EVENT_TYPE".to_string(),
}
}
fn event_to_message(event_type: &Option<crate::build_event::EventType>) -> String {
match event_type {
Some(crate::build_event::EventType::BuildRequestEvent(event)) => event.message.clone(),
Some(crate::build_event::EventType::PartitionEvent(event)) => event.message.clone(),
Some(crate::build_event::EventType::JobEvent(event)) => event.message.clone(),
Some(crate::build_event::EventType::DelegationEvent(event)) => event.message.clone(),
Some(crate::build_event::EventType::JobGraphEvent(event)) => event.message.clone(),
Some(crate::build_event::EventType::PartitionInvalidationEvent(event)) => event.reason.clone(),
Some(crate::build_event::EventType::JobRunCancelEvent(event)) => event.reason.clone(),
Some(crate::build_event::EventType::BuildCancelEvent(event)) => event.reason.clone(),
Some(build_event::EventType::WantEvent(event)) => event.comment.clone(),
Some(build_event::EventType::TaintEvent(event)) => event.comment.clone(),
None => "INVALID_EVENT_NO_MESSAGE".to_string(),
}
}
fn extract_navigation_data(event_type: &Option<crate::build_event::EventType>) -> (Option<String>, Option<String>, Option<String>) {
match event_type {
Some(crate::build_event::EventType::JobEvent(event)) => {
let job_label = event.job_label.as_ref().map(|l| l.label.clone());
(job_label, None, None)
},
Some(crate::build_event::EventType::PartitionEvent(event)) => {
let partition_ref = event.partition_ref.as_ref().map(|r| r.str.clone());
(None, partition_ref, None)
},
Some(crate::build_event::EventType::DelegationEvent(event)) => {
let delegated_build_id = Some(event.delegated_to_build_request_id.clone());
(None, None, delegated_build_id)
},
Some(crate::build_event::EventType::BuildRequestEvent(_)) => {
// Build request events don't need navigation links (self-referential)
(None, None, None)
},
Some(crate::build_event::EventType::JobGraphEvent(_)) => {
// Job graph events don't need navigation links
(None, None, None)
},
Some(crate::build_event::EventType::PartitionInvalidationEvent(event)) => {
let partition_ref = event.partition_ref.as_ref().map(|r| r.str.clone());
(None, partition_ref, None)
},
Some(crate::build_event::EventType::JobRunCancelEvent(_event)) => {
// Task cancel events reference job run IDs, which we could potentially navigate to
(None, None, None)
},
Some(crate::build_event::EventType::BuildCancelEvent(_)) => {
// Build cancel events don't need navigation links
(None, None, None)
},
Some(crate::build_event::EventType::WantEvent(_)) => {
(None, None, None)
},
Some(crate::build_event::EventType::TaintEvent(_)) => {
(None, None, None)
},
None => (None, None, None),
}
}
// New handlers for list endpoints
use axum::extract::Query;
use std::collections::HashMap;
pub async fn list_build_requests(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<crate::BuildsListResponse>, (StatusCode, Json<ErrorResponse>)> {
let limit = params.get("limit")
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(20)
.min(100); // Cap at 100
// Use repository with protobuf format
let builds_repo = BuildsRepository::new(service.query_engine.clone());
match builds_repo.list_protobuf(Some(limit as usize)).await {
Ok(builds) => {
let total_count = builds.len() as u32;
let response = crate::BuildsListResponse {
builds,
total_count, // TODO: implement proper total count with pagination
has_more: false, // TODO: implement proper pagination
};
Ok(Json(response))
},
Err(e) => {
error!("Failed to list build requests: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list build requests: {}", e),
}),
))
}
}
}
pub async fn list_partitions(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<crate::PartitionsListResponse>, (StatusCode, Json<ErrorResponse>)> {
let limit = params.get("limit")
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(20)
.min(100); // Cap at 100
// Use repository with protobuf format
// TODO: Update PartitionsRepository to work with BELQueryEngine
// let partitions_repo = PartitionsRepository::new(service.query_engine.clone());
let request = PartitionsListRequest {
limit: Some(limit),
offset: None,
status_filter: None,
};
// TODO: Implement with PartitionsRepository using BELQueryEngine
let response = PartitionsListResponse {
partitions: vec![],
total_count: 0,
has_more: false,
};
Ok(Json(response))
}
// New unified protobuf-based handler for future migration
pub async fn list_partitions_unified(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<crate::PartitionsListResponse>, (StatusCode, Json<ErrorResponse>)> {
let limit = params.get("limit")
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(20)
.min(100); // Cap at 100
let offset = params.get("offset")
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(0);
let status_filter = params.get("status")
.and_then(|s| crate::PartitionStatus::from_display_string(s));
// Use repository with protobuf response format
// TODO: Update PartitionsRepository to work with BELQueryEngine
// let repository = crate::repositories::partitions::PartitionsRepository::new(service.query_engine.clone());
let request = crate::PartitionsListRequest {
limit: Some(limit),
offset: Some(offset),
status_filter: status_filter.map(|s| s.to_display_string()),
};
// TODO: Implement with PartitionsRepository using BELQueryEngine
let response = PartitionsListResponse {
partitions: vec![],
total_count: 0,
has_more: false,
};
Ok(Json(response))
}
pub async fn get_activity_summary(
State(service): State<ServiceState>,
) -> Result<Json<ActivityApiResponse>, (StatusCode, Json<ErrorResponse>)> {
// Build activity response using repositories to get dual status fields
let builds_repo = BuildsRepository::new(service.query_engine.clone());
// TODO: Update PartitionsRepository to work with BELQueryEngine
let partitions_repo = PartitionsRepository::new(service.query_engine.clone());
// Get recent builds and partitions with dual status fields
let recent_builds = builds_repo.list_protobuf(Some(5)).await.unwrap_or_else(|_| vec![]);
let recent_partitions_request = PartitionsListRequest {
limit: Some(10),
offset: None,
status_filter: None
};
let recent_partitions_response = partitions_repo.list_protobuf(recent_partitions_request).await
.unwrap_or_else(|_| crate::PartitionsListResponse {
partitions: vec![],
total_count: 0,
has_more: false
});
// Get activity counts (fallback to event log method for now)
let summary = service.query_engine.get_activity_summary().await.unwrap_or_else(|_| {
crate::event_log::ActivitySummary {
active_builds_count: 0,
recent_builds: vec![],
recent_partitions: vec![],
total_partitions_count: 0,
}
});
// Simple system status logic
let system_status = if summary.active_builds_count > 10 {
"degraded".to_string()
} else {
"healthy".to_string()
};
// Build protobuf activity response with dual status fields
let protobuf_response = crate::ActivityResponse {
active_builds_count: summary.active_builds_count,
recent_builds,
recent_partitions: recent_partitions_response.partitions,
total_partitions_count: summary.total_partitions_count,
system_status,
graph_name: service.graph_label.clone(),
};
let api_response = ActivityApiResponse {
data: protobuf_response,
request_id: None,
};
Ok(Json(api_response))
}
#[derive(Deserialize, JsonSchema)]
pub struct JobMetricsRequest {
pub label: String,
}
pub async fn list_jobs(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<crate::JobsListResponse>, (StatusCode, Json<ErrorResponse>)> {
let limit = params.get("limit")
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(20)
.min(100); // Cap at 100
let search = params.get("search").map(|s| s.to_string());
// Use repository with protobuf format
let jobs_repo = JobsRepository::new(service.query_engine.clone());
let request = JobsListRequest {
limit: Some(limit),
search,
};
match jobs_repo.list_protobuf(request).await {
Ok(response) => {
Ok(Json(response))
},
Err(e) => {
error!("Failed to list jobs: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list jobs: {}", e),
}),
))
}
}
}
pub async fn get_job_metrics(
State(service): State<ServiceState>,
Path(JobMetricsRequest { label }): Path<JobMetricsRequest>,
) -> Result<Json<JobMetricsResponse>, (StatusCode, Json<ErrorResponse>)> {
// Decode the base64-encoded job label
let decoded_label = match base64_url_decode(&label) {
Ok(decoded) => decoded,
Err(_) => {
return Err((
StatusCode::BAD_REQUEST,
Json(ErrorResponse {
error: "Invalid job label encoding".to_string(),
}),
));
}
};
log::info!("get_job_metrics: encoded='{}', decoded='{}'", label, decoded_label);
// Get overall job metrics
let metrics_query = "
WITH job_run_durations AS (
SELECT
be.build_request_id,
(MAX(be.timestamp) - MIN(be.timestamp)) / 1000000 as duration_ms
FROM job_events je
JOIN build_events be ON je.event_id = be.event_id
WHERE je.job_label = ?
GROUP BY be.build_request_id
HAVING MAX(CASE WHEN je.status IN ('3', '4', '5', '6') THEN 1 ELSE 0 END) = 1
)
SELECT
COUNT(CASE WHEN je.status IN ('3', '6') THEN 1 END) as completed_count,
COUNT(CASE WHEN je.status IN ('3', '4', '5', '6') THEN 1 END) as total_count,
COALESCE(AVG(jrd.duration_ms), 0) as avg_duration_ms
FROM job_events je
JOIN build_events be ON je.event_id = be.event_id
LEFT JOIN job_run_durations jrd ON be.build_request_id = jrd.build_request_id
WHERE je.job_label = ?";
let (success_rate, total_runs, avg_duration_ms) = match service.query_engine.execute_query(&metrics_query.replace("?", &format!("'{}'", decoded_label)).replace("?", &format!("'{}'", decoded_label))).await {
Ok(result) if !result.rows.is_empty() => {
let row = &result.rows[0];
let completed_count: u32 = row[0].parse().unwrap_or(0);
let total_count: u32 = row[1].parse().unwrap_or(0);
let avg_duration: Option<i64> = row[2].parse::<f64>().ok().map(|f| f as i64);
let success_rate = if total_count > 0 {
completed_count as f64 / total_count as f64
} else {
0.0
};
(success_rate, total_count, avg_duration)
}
_ => (0.0, 0, None),
};
// Get recent runs - consolidated by build request to show final status per job run
let recent_runs_query = "
SELECT
be.build_request_id,
je.target_partitions,
je.status,
MIN(be.timestamp) as started_at,
MAX(be.timestamp) as completed_at
FROM job_events je
JOIN build_events be ON je.event_id = be.event_id
WHERE je.job_label = ?
GROUP BY be.build_request_id, je.target_partitions
HAVING je.status = (
SELECT je2.status
FROM job_events je2
JOIN build_events be2 ON je2.event_id = be2.event_id
WHERE je2.job_label = ?
AND be2.build_request_id = be.build_request_id
ORDER BY be2.timestamp DESC
LIMIT 1
)
ORDER BY started_at DESC
LIMIT 50";
let recent_runs = match service.query_engine.execute_query(&recent_runs_query.replace("?", &format!("'{}'", decoded_label)).replace("?", &format!("'{}'", decoded_label))).await {
Ok(result) => {
result.rows.into_iter().map(|row| {
let build_request_id = row[0].clone();
let partitions_json: String = row[1].clone();
let status_code: String = row[2].clone();
let started_at: i64 = row[3].parse().unwrap_or(0);
let completed_at: i64 = row[4].parse().unwrap_or(started_at);
let duration_ms: Option<i64> = if completed_at > started_at {
Some(completed_at - started_at)
} else {
None
};
let partitions: Vec<String> = serde_json::from_str::<Vec<serde_json::Value>>(&partitions_json)
.unwrap_or_default()
.into_iter()
.filter_map(|v| {
v.get("str").and_then(|s| s.as_str()).map(|s| s.to_string())
})
.collect();
let (status_code_int, status_name) = match status_code.as_str() {
"1" => (1, "scheduled"),
"2" => (2, "running"),
"3" => (3, "completed"),
"4" => (4, "failed"),
"5" => (5, "cancelled"),
"6" => (6, "skipped"),
_ => (0, "unknown"),
};
JobRunSummary {
build_request_id,
partitions,
status_code: status_code_int,
status_name: status_name.to_string(),
duration_ms,
started_at,
}
}).collect()
}
Err(_) => Vec::new(),
};
// Get daily stats (simplified - just recent days)
let daily_stats_query = "
WITH daily_job_durations AS (
SELECT
date(be.timestamp/1000000000, 'unixepoch') as date,
be.build_request_id,
(MAX(be.timestamp) - MIN(be.timestamp)) / 1000000 as duration_ms
FROM job_events je
JOIN build_events be ON je.event_id = be.event_id
WHERE je.job_label = ?
AND be.timestamp > (strftime('%s', 'now', '-30 days') * 1000000000)
GROUP BY date(be.timestamp/1000000000, 'unixepoch'), be.build_request_id
HAVING MAX(CASE WHEN je.status IN ('3', '4', '5', '6') THEN 1 ELSE 0 END) = 1
)
SELECT
date(be.timestamp/1000000000, 'unixepoch') as date,
COUNT(CASE WHEN je.status IN ('3', '6') THEN 1 END) as completed_count,
COUNT(CASE WHEN je.status IN ('3', '4', '5', '6') THEN 1 END) as total_count,
COALESCE(AVG(djd.duration_ms), 0) as avg_duration_ms
FROM job_events je
JOIN build_events be ON je.event_id = be.event_id
LEFT JOIN daily_job_durations djd ON date(be.timestamp/1000000000, 'unixepoch') = djd.date
WHERE je.job_label = ?
AND be.timestamp > (strftime('%s', 'now', '-30 days') * 1000000000)
GROUP BY date(be.timestamp/1000000000, 'unixepoch')
ORDER BY date DESC";
let daily_stats = match service.query_engine.execute_query(&daily_stats_query.replace("?", &format!("'{}'", decoded_label)).replace("?", &format!("'{}'", decoded_label))).await {
Ok(result) => {
result.rows.into_iter().map(|row| {
let date = row[0].clone();
let completed_count: u32 = row[1].parse().unwrap_or(0);
let total_count: u32 = row[2].parse().unwrap_or(0);
let avg_duration: Option<i64> = row[3].parse::<f64>().ok().map(|f| f as i64);
let success_rate = if total_count > 0 {
completed_count as f64 / total_count as f64
} else {
0.0
};
JobDailyStats {
date,
success_rate,
avg_duration_ms: avg_duration,
total_runs: total_count,
}
}).collect()
}
Err(_) => Vec::new(),
};
Ok(Json(JobMetricsResponse {
job_label: decoded_label,
success_rate,
avg_duration_ms,
total_runs,
recent_runs,
daily_stats,
}))
}
// Repository-based handlers for the new shared core functionality
use crate::repositories::{
partitions::PartitionsRepository,
jobs::JobsRepository,
tasks::TasksRepository,
builds::BuildsRepository,
};
/// Request for partition detail endpoint
#[derive(Deserialize, JsonSchema)]
pub struct PartitionDetailRequest {
pub partition_ref: String,
}
/// Get detailed partition information with timeline
pub async fn get_partition_detail(
State(service): State<ServiceState>,
Path(PartitionDetailRequest { partition_ref }): Path<PartitionDetailRequest>,
) -> Result<Json<PartitionDetailResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = PartitionsRepository::new(service.query_engine.clone());
let decoded_partition_ref = base64_url_decode(&partition_ref).unwrap();
match repository.show_protobuf(&decoded_partition_ref).await {
Ok(Some(protobuf_response)) => {
let timeline_events: Vec<PartitionTimelineEvent> = protobuf_response.timeline.into_iter().map(|event| {
PartitionTimelineEvent {
timestamp: event.timestamp,
status_code: event.status_code,
status_name: event.status_name,
message: event.message,
build_request_id: event.build_request_id,
job_run_id: event.job_run_id,
}
}).collect();
Ok(Json(PartitionDetailResponse {
partition_ref: protobuf_response.partition_ref,
status_code: protobuf_response.status_code,
status_name: protobuf_response.status_name,
last_updated: protobuf_response.last_updated,
builds_count: protobuf_response.builds_count,
last_successful_build: protobuf_response.last_successful_build,
invalidation_count: protobuf_response.invalidation_count,
timeline: timeline_events,
}))
}
Ok(None) => Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: format!("Partition '{}' not found", partition_ref),
}),
)),
Err(e) => {
error!("Failed to get partition detail: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get partition detail: {}", e),
}),
))
}
}
}
/// Invalidate a partition
#[derive(Deserialize, JsonSchema)]
pub struct InvalidatePartitionRequest {
pub reason: String,
pub build_request_id: String,
}
/// Request for partition invalidation endpoint path
#[derive(Deserialize, JsonSchema)]
pub struct PartitionInvalidatePathRequest {
pub partition_ref: String,
}
pub async fn invalidate_partition(
State(service): State<ServiceState>,
Path(PartitionInvalidatePathRequest { partition_ref }): Path<PartitionInvalidatePathRequest>,
Json(request): Json<InvalidatePartitionRequest>,
) -> Result<Json<PartitionInvalidateResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = PartitionsRepository::new(service.query_engine.clone());
match repository.invalidate(&partition_ref, request.reason.clone(), request.build_request_id).await {
Ok(()) => Ok(Json(PartitionInvalidateResponse {
invalidated: true,
partition_ref,
reason: request.reason,
})),
Err(e) => {
error!("Failed to invalidate partition: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to invalidate partition: {}", e),
}),
))
}
}
}
/// List partitions using repository
pub async fn list_partitions_repository(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<PartitionsListApiResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = PartitionsRepository::new(service.query_engine.clone());
let limit = params.get("limit").and_then(|s| s.parse().ok());
let request = PartitionsListRequest {
limit,
offset: None,
status_filter: None,
};
match repository.list_protobuf(request).await {
Ok(protobuf_response) => {
let total_count = protobuf_response.total_count;
let has_more = protobuf_response.has_more;
let api_response = PartitionsListApiResponse {
data: protobuf_response,
request_id: None, // TODO: add request ID tracking
pagination: Some(PaginationInfo {
total_count,
has_more,
limit: limit.map(|l| l as u32),
offset: None,
}),
};
Ok(Json(api_response))
},
Err(e) => {
error!("Failed to list partitions: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list partitions: {}", e),
}),
))
}
}
}
/// List tasks using repository
pub async fn list_tasks_repository(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<JobRunsListApiResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = TasksRepository::new(service.query_engine.clone());
let limit = params.get("limit").and_then(|s| s.parse().ok());
let request = JobRunsListRequest { limit };
match repository.list_protobuf(request).await {
Ok(protobuf_response) => {
let total_count = protobuf_response.total_count;
let api_response = JobRunsListApiResponse {
data: protobuf_response,
request_id: None, // TODO: add request ID tracking
pagination: Some(PaginationInfo {
total_count,
has_more: false, // Tasks list doesn't implement has_more yet
limit: limit.map(|l| l as u32),
offset: None,
}),
};
Ok(Json(api_response))
},
Err(e) => {
error!("Failed to list tasks: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list tasks: {}", e),
}),
))
}
}
}
/// List jobs using repository
pub async fn list_jobs_repository(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<JobsListApiResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = JobsRepository::new(service.query_engine.clone());
let limit = params.get("limit").and_then(|s| s.parse().ok());
let search = params.get("search").map(|s| s.to_string());
let request = JobsListRequest {
limit,
search,
};
match repository.list_protobuf(request).await {
Ok(protobuf_response) => {
let total_count = protobuf_response.total_count;
let api_response = JobsListApiResponse {
data: protobuf_response,
request_id: None, // TODO: add request ID tracking
pagination: Some(PaginationInfo {
total_count,
has_more: false, // Jobs list doesn't implement has_more yet
limit: limit.map(|l| l as u32),
offset: None,
}),
};
Ok(Json(api_response))
},
Err(e) => {
error!("Failed to list jobs: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list jobs: {}", e),
}),
))
}
}
}
/// Request for job detail endpoint
#[derive(Deserialize, JsonSchema)]
pub struct JobDetailRequest {
pub label: String,
}
/// Get detailed job information
pub async fn get_job_detail(
State(service): State<ServiceState>,
Path(JobDetailRequest { label }): Path<JobDetailRequest>,
) -> Result<Json<JobDetailResponse>, (StatusCode, Json<ErrorResponse>)> {
let job_label = base64_url_decode(&label).unwrap();
let repository = JobsRepository::new(service.query_engine.clone());
match repository.show_protobuf(&job_label).await {
Ok(Some(protobuf_response)) => {
let run_summaries: Vec<JobRunDetail> = protobuf_response.runs.into_iter().map(|run| {
JobRunDetail {
job_run_id: run.job_run_id,
build_request_id: run.build_request_id,
target_partitions: run.target_partitions,
status_code: run.status_code,
status_name: run.status_name,
started_at: run.started_at,
completed_at: run.completed_at,
duration_ms: run.duration_ms,
message: run.message,
}
}).collect();
Ok(Json(JobDetailResponse {
job_label: protobuf_response.job_label,
total_runs: protobuf_response.total_runs,
successful_runs: protobuf_response.successful_runs,
failed_runs: protobuf_response.failed_runs,
cancelled_runs: protobuf_response.cancelled_runs,
average_partitions_per_run: protobuf_response.average_partitions_per_run,
last_run_timestamp: protobuf_response.last_run_timestamp,
last_run_status_code: protobuf_response.last_run_status_code,
last_run_status_name: protobuf_response.last_run_status_name,
recent_builds: protobuf_response.recent_builds,
runs: run_summaries,
}))
}
Ok(None) => Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: format!("Job '{}' not found", job_label),
}),
)),
Err(e) => {
error!("Failed to get job detail: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get job detail: {}", e),
}),
))
}
}
}
/// List tasks using repository
pub async fn list_tasks(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<crate::JobRunsListResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = TasksRepository::new(service.query_engine.clone());
let limit = params.get("limit").and_then(|s| s.parse().ok());
let request = JobRunsListRequest { limit };
match repository.list_protobuf(request).await {
Ok(response) => {
Ok(Json(response))
}
Err(e) => {
error!("Failed to list tasks: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list tasks: {}", e),
}),
))
}
}
}
/// Request for task detail endpoint
#[derive(Deserialize, JsonSchema)]
pub struct TaskDetailRequest {
pub job_run_id: String,
}
/// Get detailed task information
pub async fn get_task_detail(
State(service): State<ServiceState>,
Path(TaskDetailRequest { job_run_id }): Path<TaskDetailRequest>,
) -> Result<Json<JobRunDetailResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = TasksRepository::new(service.query_engine.clone());
match repository.show_protobuf(&job_run_id).await {
Ok(Some(protobuf_response)) => {
let timeline_events: Vec<JobRunTimelineEvent> = protobuf_response.timeline.into_iter().map(|event| {
JobRunTimelineEvent {
timestamp: event.timestamp,
status_code: event.status_code,
status_name: event.status_name,
message: event.message,
event_type: event.event_type,
cancel_reason: event.cancel_reason,
}
}).collect();
Ok(Json(JobRunDetailResponse {
job_run_id: protobuf_response.job_run_id,
job_label: protobuf_response.job_label,
build_request_id: protobuf_response.build_request_id,
status_code: protobuf_response.status_code,
status_name: protobuf_response.status_name,
target_partitions: protobuf_response.target_partitions,
scheduled_at: protobuf_response.scheduled_at,
started_at: protobuf_response.started_at,
completed_at: protobuf_response.completed_at,
duration_ms: protobuf_response.duration_ms,
cancelled: protobuf_response.cancelled,
cancel_reason: protobuf_response.cancel_reason,
message: protobuf_response.message,
timeline: timeline_events,
}))
}
Ok(None) => Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: format!("Task '{}' not found", job_run_id),
}),
)),
Err(e) => {
error!("Failed to get task detail: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get task detail: {}", e),
}),
))
}
}
}
/// Cancel a task
#[derive(Deserialize, JsonSchema)]
pub struct CancelTaskRequest {
pub reason: String,
pub build_request_id: String,
}
/// Request for task cancel endpoint path
#[derive(Deserialize, JsonSchema)]
pub struct TaskCancelPathRequest {
pub job_run_id: String,
}
pub async fn cancel_task(
State(service): State<ServiceState>,
Path(TaskCancelPathRequest { job_run_id }): Path<TaskCancelPathRequest>,
Json(request): Json<CancelTaskRequest>,
) -> Result<Json<TaskCancelResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = TasksRepository::new(service.query_engine.clone());
match repository.cancel(&job_run_id, request.reason.clone(), request.build_request_id).await {
Ok(()) => Ok(Json(TaskCancelResponse {
cancelled: true,
job_run_id,
reason: request.reason,
})),
Err(e) => {
error!("Failed to cancel task: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to cancel task: {}", e),
}),
))
}
}
}
/// List builds using repository
pub async fn list_builds_repository(
State(service): State<ServiceState>,
Query(params): Query<HashMap<String, String>>,
) -> Result<Json<BuildsListApiResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = BuildsRepository::new(service.query_engine.clone());
let limit = params.get("limit").and_then(|s| s.parse().ok());
match repository.list_protobuf(limit).await {
Ok(builds) => {
let total_count = builds.len() as u32;
let protobuf_response = crate::BuildsListResponse {
builds,
total_count,
has_more: false, // TODO: implement proper pagination
};
let api_response = BuildsListApiResponse {
data: protobuf_response,
request_id: None, // TODO: add request ID tracking
pagination: Some(PaginationInfo {
total_count,
has_more: false,
limit: limit.map(|l| l as u32),
offset: None,
}),
};
Ok(Json(api_response))
},
Err(e) => {
error!("Failed to list builds: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list builds: {}", e),
}),
))
}
}
}
/// Request for build detail endpoint
#[derive(Deserialize, JsonSchema)]
pub struct BuildDetailRequest {
pub build_request_id: String,
}
/// Get detailed build information
pub async fn get_build_detail(
State(service): State<ServiceState>,
Path(BuildDetailRequest { build_request_id }): Path<BuildDetailRequest>,
) -> Result<Json<BuildDetailResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = BuildsRepository::new(service.query_engine.clone());
match repository.show_protobuf(&build_request_id).await {
Ok(Some(protobuf_response)) => {
// Convert protobuf response to service response (with dual status fields)
let timeline_events: Vec<BuildTimelineEvent> = protobuf_response.timeline.into_iter().map(|event| {
BuildTimelineEvent {
timestamp: event.timestamp,
status: event.status,
message: event.message,
event_type: event.event_type,
cancel_reason: event.cancel_reason,
}
}).collect();
Ok(Json(BuildDetailResponse {
build_request_id: protobuf_response.build_request_id,
status: protobuf_response.status,
requested_partitions: protobuf_response.requested_partitions,
total_jobs: protobuf_response.total_jobs,
completed_jobs: protobuf_response.completed_jobs,
failed_jobs: protobuf_response.failed_jobs,
cancelled_jobs: protobuf_response.cancelled_jobs,
requested_at: protobuf_response.requested_at,
started_at: protobuf_response.started_at,
completed_at: protobuf_response.completed_at,
duration_ms: protobuf_response.duration_ms,
cancelled: protobuf_response.cancelled,
cancel_reason: protobuf_response.cancel_reason,
timeline: timeline_events,
}))
}
Ok(None) => Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: format!("Build '{}' not found", build_request_id),
}),
)),
Err(e) => {
error!("Failed to get build detail: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get build detail: {}", e),
}),
))
}
}
}
/// Request for build cancel endpoint path
#[derive(Deserialize, JsonSchema)]
pub struct BuildCancelPathRequest {
pub build_request_id: String,
}
/// Cancel a build using repository
pub async fn cancel_build_repository(
State(service): State<ServiceState>,
Path(BuildCancelPathRequest { build_request_id }): Path<BuildCancelPathRequest>,
Json(request): Json<CancelBuildRepositoryRequest>,
) -> Result<Json<BuildCancelRepositoryResponse>, (StatusCode, Json<ErrorResponse>)> {
let repository = BuildsRepository::new(service.query_engine.clone());
match repository.cancel(&build_request_id, request.reason.clone()).await {
Ok(()) => Ok(Json(BuildCancelRepositoryResponse {
cancelled: true,
build_request_id,
})),
Err(e) => {
error!("Failed to cancel build: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to cancel build: {}", e),
}),
))
}
}
}
#[derive(Deserialize, JsonSchema)]
pub struct CancelBuildRepositoryRequest {
pub reason: String,
}
// === Job Logs and Metrics Endpoints ===
use crate::{log_access::LogReader, metrics_aggregator::{MetricsAggregator, MetricsConfig}, JobLogsRequest};
use serde::Serialize;
/// Path parameter for job logs endpoint
#[derive(Deserialize, JsonSchema)]
pub struct JobLogsPathRequest {
pub job_run_id: String,
}
/// Query parameters for job logs endpoint
#[derive(Deserialize, JsonSchema)]
pub struct JobLogsQueryRequest {
#[serde(default)]
pub since_timestamp: i64,
#[serde(default)]
pub min_level: i32,
#[serde(default = "default_logs_limit")]
pub limit: u32,
}
fn default_logs_limit() -> u32 {
1000
}
/// Response for job logs endpoint
#[derive(Serialize, JsonSchema)]
pub struct JobLogsApiResponse {
pub entries: Vec<crate::JobLogEntry>,
pub has_more: bool,
}
/// Get job logs for a specific job run ID
pub async fn get_job_logs(
Path(JobLogsPathRequest { job_run_id }): Path<JobLogsPathRequest>,
axum::extract::Query(query): axum::extract::Query<JobLogsQueryRequest>,
) -> Result<Json<JobLogsApiResponse>, (StatusCode, Json<ErrorResponse>)> {
let log_reader = LogReader::default();
let request = JobLogsRequest {
job_run_id,
since_timestamp: query.since_timestamp,
min_level: query.min_level,
limit: query.limit,
};
match log_reader.get_job_logs(&request) {
Ok(response) => Ok(Json(JobLogsApiResponse {
entries: response.entries,
has_more: response.has_more,
})),
Err(e) => {
error!("Failed to get job logs: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get job logs: {}", e),
}),
))
}
}
}
/// List available job run IDs
#[derive(Deserialize, JsonSchema)]
pub struct ListJobsQueryRequest {
pub start_date: Option<String>,
pub end_date: Option<String>,
}
/// Response for list jobs endpoint
#[derive(Serialize, JsonSchema)]
pub struct ListJobsResponse {
pub job_run_ids: Vec<String>,
}
pub async fn list_available_jobs(
axum::extract::Query(query): axum::extract::Query<ListJobsQueryRequest>,
) -> Result<Json<ListJobsResponse>, (StatusCode, Json<ErrorResponse>)> {
let log_reader = LogReader::default();
let date_range = if let (Some(start), Some(end)) = (query.start_date, query.end_date) {
Some((start, end))
} else {
None
};
match log_reader.list_available_jobs(date_range) {
Ok(job_ids) => Ok(Json(ListJobsResponse {
job_run_ids: job_ids,
})),
Err(e) => {
error!("Failed to list available jobs: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to list available jobs: {}", e),
}),
))
}
}
}
/// Query parameters for metrics endpoint
#[derive(Deserialize, JsonSchema)]
pub struct MetricsQueryRequest {
#[serde(default = "default_time_range_hours")]
pub time_range_hours: u64,
#[serde(default)]
pub include_job_id_labels: bool,
#[serde(default = "default_max_cardinality")]
pub max_cardinality_per_metric: usize,
}
fn default_time_range_hours() -> u64 {
24
}
fn default_max_cardinality() -> usize {
1000
}
/// Get Prometheus metrics from job logs
pub async fn get_prometheus_metrics(
axum::extract::Query(query): axum::extract::Query<MetricsQueryRequest>,
) -> Result<String, (StatusCode, Json<ErrorResponse>)> {
let config = MetricsConfig {
max_cardinality_per_metric: query.max_cardinality_per_metric,
time_range_hours: query.time_range_hours,
include_job_id_labels: query.include_job_id_labels,
max_jobs_per_metric: 100,
};
let aggregator = MetricsAggregator::new(
crate::log_collector::LogCollector::default_logs_dir(),
config
);
match aggregator.to_prometheus_format() {
Ok(prometheus_output) => Ok(prometheus_output),
Err(e) => {
error!("Failed to generate Prometheus metrics: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to generate Prometheus metrics: {}", e),
}),
))
}
}
}
/// Get log-based metrics for a specific job run
pub async fn get_job_run_metrics(
Path(JobLogsPathRequest { job_run_id }): Path<JobLogsPathRequest>,
) -> Result<Json<Vec<crate::MetricPoint>>, (StatusCode, Json<ErrorResponse>)> {
let log_reader = LogReader::default();
match log_reader.get_job_metrics(&job_run_id) {
Ok(metrics) => Ok(Json(metrics)),
Err(e) => {
error!("Failed to get job metrics: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get job metrics: {}", e),
}),
))
}
}
}
/// Request for build mermaid diagram endpoint
#[derive(Deserialize, JsonSchema)]
pub struct BuildMermaidRequest {
pub build_request_id: String,
}
/// Response for build mermaid diagram endpoint
#[derive(serde::Serialize, JsonSchema)]
pub struct BuildMermaidResponse {
pub mermaid: String,
}
/// Get Mermaid diagram for a specific build request ID
pub async fn get_build_mermaid_diagram(
State(service): State<ServiceState>,
Path(BuildMermaidRequest { build_request_id }): Path<BuildMermaidRequest>,
) -> Result<Json<BuildMermaidResponse>, (StatusCode, Json<ErrorResponse>)> {
info!("Generating mermaid diagram for build request {}", build_request_id);
// Get build events for this build request
let events = match service.query_engine.get_build_request_events(&build_request_id, None).await {
Ok(events) => events,
Err(e) => {
error!("Failed to get build events for {}: {}", build_request_id, e);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Failed to get build events: {}", e),
}),
));
}
};
if events.is_empty() {
return Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: "Build request not found".to_string(),
}),
));
}
// Find job graph event to get the graph structure
let job_graph = events.iter()
.find_map(|event| {
match &event.event_type {
Some(crate::build_event::EventType::JobGraphEvent(graph_event)) => {
graph_event.job_graph.as_ref()
}
_ => None,
}
});
match job_graph {
Some(graph) => {
// Generate mermaid diagram with current status
let mermaid_diagram = mermaid_utils::generate_mermaid_with_status(graph, &events);
Ok(Json(BuildMermaidResponse {
mermaid: mermaid_diagram,
}))
}
None => {
Err((
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: "No job graph found for this build request".to_string(),
}),
))
}
}
}