implement job canceling
This commit is contained in:
parent
7debea96a2
commit
cfcb201285
3 changed files with 72 additions and 17 deletions
|
|
@ -197,7 +197,8 @@ enum JobRunStatusCode {
|
||||||
JobRunQueued = 0;
|
JobRunQueued = 0;
|
||||||
JobRunRunning = 1;
|
JobRunRunning = 1;
|
||||||
JobRunFailed = 2;
|
JobRunFailed = 2;
|
||||||
JobRunSucceeded = 3;
|
JobRunCanceled = 3;
|
||||||
|
JobRunSucceeded = 4;
|
||||||
}
|
}
|
||||||
message JobRunDetail {
|
message JobRunDetail {
|
||||||
string id = 1;
|
string id = 1;
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::util::current_timestamp;
|
use crate::util::current_timestamp;
|
||||||
use crate::{JobRunStatus, JobRunStatusCode, PartitionRef, PartitionStatus, PartitionStatusCode, WantCancelEventV1, WantCreateEventV1, WantDetail, WantStatus, WantStatusCode};
|
use crate::{event_source, EventSource, JobRunStatus, JobRunStatusCode, ManuallyTriggeredEvent, PartitionRef, PartitionStatus, PartitionStatusCode, WantCancelEventV1, WantCreateEventV1, WantDetail, WantStatus, WantStatusCode};
|
||||||
use crate::data_build_event::Event;
|
use crate::data_build_event::Event;
|
||||||
|
|
||||||
impl From<&WantCreateEventV1> for WantDetail {
|
impl From<&WantCreateEventV1> for WantDetail {
|
||||||
|
|
@ -68,3 +68,9 @@ impl From<JobRunStatusCode> for JobRunStatus {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<ManuallyTriggeredEvent> for EventSource {
|
||||||
|
fn from(value: ManuallyTriggeredEvent) -> Self {
|
||||||
|
Self { source: Some(event_source::Source::ManuallyTriggered(value)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use crate::build_event_log::{BELStorage, MemoryBELStorage};
|
use crate::build_event_log::{BELStorage, MemoryBELStorage};
|
||||||
use crate::data_build_event::Event;
|
use crate::data_build_event::Event;
|
||||||
use crate::{JobRunHeartbeatEventV1, JobRunStatus, JobRunStatusCode, JobRunSuccessEventV1, JobRunFailureEventV1};
|
use crate::{JobRunHeartbeatEventV1, JobRunStatus, JobRunStatusCode, JobRunSuccessEventV1, JobRunFailureEventV1, JobRunCancelEventV1, EventSource};
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
use std::process::{Child, Command, Stdio};
|
use std::process::{Child, Command, Stdio};
|
||||||
|
|
@ -16,7 +16,7 @@ pub trait JobRun {
|
||||||
Visit is responsible for observing the state of the job run,
|
Visit is responsible for observing the state of the job run,
|
||||||
*/
|
*/
|
||||||
fn visit(&mut self) -> Result<JobRunPollResult, Box<dyn Error>>;
|
fn visit(&mut self) -> Result<JobRunPollResult, Box<dyn Error>>;
|
||||||
fn cancel(&mut self) -> Result<(), Box<dyn Error>>;
|
fn cancel(&mut self, source: EventSource) -> Result<JobRunCancelEventV1, Box<dyn Error>>;
|
||||||
fn run_with_env(&mut self, env: Option<HashMap<String, String>>) -> Result<JobRunHeartbeatEventV1, Box<dyn Error>>;
|
fn run_with_env(&mut self, env: Option<HashMap<String, String>>) -> Result<JobRunHeartbeatEventV1, Box<dyn Error>>;
|
||||||
fn run(&mut self) -> Result<JobRunHeartbeatEventV1, Box<dyn Error>> {
|
fn run(&mut self) -> Result<JobRunHeartbeatEventV1, Box<dyn Error>> {
|
||||||
self.run_with_env(None)
|
self.run_with_env(None)
|
||||||
|
|
@ -43,6 +43,7 @@ pub struct SubProcessJobRun {
|
||||||
pub args: Vec<String>,
|
pub args: Vec<String>,
|
||||||
pub stdout_buffer: Vec<String>, // Buffered stdout lines
|
pub stdout_buffer: Vec<String>, // Buffered stdout lines
|
||||||
pub exit_code: Option<i32>, // Cached exit code
|
pub exit_code: Option<i32>, // Cached exit code
|
||||||
|
pub cancel_source: Option<EventSource>, // Source of cancellation if canceled
|
||||||
}
|
}
|
||||||
|
|
||||||
impl JobRun for SubProcessJobRun {
|
impl JobRun for SubProcessJobRun {
|
||||||
|
|
@ -70,12 +71,14 @@ impl JobRun for SubProcessJobRun {
|
||||||
if let Some(stdout) = child.stdout.take() {
|
if let Some(stdout) = child.stdout.take() {
|
||||||
let reader = BufReader::new(stdout);
|
let reader = BufReader::new(stdout);
|
||||||
for line in reader.lines() {
|
for line in reader.lines() {
|
||||||
|
// TODO we should write lines to the job's file logs
|
||||||
if let Ok(line) = line {
|
if let Ok(line) = line {
|
||||||
self.stdout_buffer.push(line);
|
self.stdout_buffer.push(line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Check exit status
|
// Check exit status
|
||||||
match exit_status.code() {
|
match exit_status.code() {
|
||||||
Some(0) => {
|
Some(0) => {
|
||||||
|
|
@ -122,8 +125,27 @@ impl JobRun for SubProcessJobRun {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cancel(&mut self) -> Result<(), Box<dyn Error>> {
|
fn cancel(&mut self, source: EventSource) -> Result<JobRunCancelEventV1, Box<dyn Error>> {
|
||||||
todo!()
|
if !self.running {
|
||||||
|
return Err("cancel() called on non-running job".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let child = self.process.as_mut()
|
||||||
|
.ok_or("cancel() called but no process present")?;
|
||||||
|
|
||||||
|
// Kill the process
|
||||||
|
child.kill()?;
|
||||||
|
|
||||||
|
// Wait for it to actually terminate
|
||||||
|
child.wait()?;
|
||||||
|
|
||||||
|
self.running = false;
|
||||||
|
|
||||||
|
Ok(JobRunCancelEventV1 {
|
||||||
|
job_run_id: self.job_run_id.to_string(),
|
||||||
|
source: Some(source),
|
||||||
|
comment: Some("Job was canceled".to_string()),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mostly for test purposes
|
/// Mostly for test purposes
|
||||||
|
|
@ -151,6 +173,7 @@ impl SubProcessJobRun {
|
||||||
args,
|
args,
|
||||||
stdout_buffer: Vec::new(),
|
stdout_buffer: Vec::new(),
|
||||||
exit_code: None,
|
exit_code: None,
|
||||||
|
cancel_source: None,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -163,8 +186,9 @@ pub struct JobRunPollResult {
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use crate::data_build_event::Event;
|
use crate::data_build_event::Event;
|
||||||
use crate::job_run::SubProcessJobRun;
|
use crate::job_run::{JobRun, SubProcessJobRun};
|
||||||
use crate::JobRunStatusCode;
|
use crate::{JobRunStatusCode, ManuallyTriggeredEvent};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
fn test_helper_path() -> String {
|
fn test_helper_path() -> String {
|
||||||
std::env::var("TEST_SRCDIR")
|
std::env::var("TEST_SRCDIR")
|
||||||
|
|
@ -208,13 +232,6 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Job that runs for more than 1 heartbeat interval should emit a JobRunHeartbeatEventV1 event
|
|
||||||
#[test]
|
|
||||||
#[ignore]
|
|
||||||
fn test_running_job_run_poll_returns_heartbeat() {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Job run that fails should emit a JobRunFailureEventV1
|
/// Job run that fails should emit a JobRunFailureEventV1
|
||||||
#[test]
|
#[test]
|
||||||
fn test_job_run_failure_returns_job_run_failure_event() {
|
fn test_job_run_failure_returns_job_run_failure_event() {
|
||||||
|
|
@ -261,9 +278,40 @@ mod tests {
|
||||||
/// - Stop the actual subprocess (e.g. no output file should be written)
|
/// - Stop the actual subprocess (e.g. no output file should be written)
|
||||||
/// - Emitting a JobRunCancelEventV1 event
|
/// - Emitting a JobRunCancelEventV1 event
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
|
||||||
fn test_job_run_cancel_returns_job_run_cancel_event() {
|
fn test_job_run_cancel_returns_job_run_cancel_event() {
|
||||||
todo!()
|
use std::fs;
|
||||||
|
use crate::ManuallyTriggeredEvent;
|
||||||
|
|
||||||
|
// Create a temp file path for the test
|
||||||
|
let temp_file = format!("/tmp/databuild_test_cancel_{}", Uuid::new_v4());
|
||||||
|
|
||||||
|
// Spawn a job run that will sleep for 1 second and write a file
|
||||||
|
let mut job_run = SubProcessJobRun::spawn(test_helper_path(), vec![]).unwrap();
|
||||||
|
|
||||||
|
let env: HashMap<String, String> = HashMap::from([
|
||||||
|
("DATABUILD_TEST_SLEEP_MS".to_string(), "1000".to_string()),
|
||||||
|
("DATABUILD_TEST_OUTPUT_FILE".to_string(), temp_file.clone()),
|
||||||
|
("DATABUILD_TEST_FILE_CONTENT".to_string(), "completed".to_string()),
|
||||||
|
]);
|
||||||
|
job_run.run_with_env(Some(env)).unwrap();
|
||||||
|
|
||||||
|
// Give it a tiny bit of time to start
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||||
|
|
||||||
|
// Cancel the job before it can complete - this returns the cancel event
|
||||||
|
let cancel_event = job_run.cancel(ManuallyTriggeredEvent { user: "test_user".into() }.into()).unwrap();
|
||||||
|
|
||||||
|
// Verify we got the cancel event
|
||||||
|
assert_eq!(cancel_event.job_run_id, job_run.id().to_string());
|
||||||
|
assert!(cancel_event.source.is_some());
|
||||||
|
assert_eq!(cancel_event.comment, Some("Job was canceled".to_string()));
|
||||||
|
|
||||||
|
// Verify the output file was NOT written (process was killed before it could complete)
|
||||||
|
assert!(!std::path::Path::new(&temp_file).exists(),
|
||||||
|
"Output file should not exist - process should have been killed");
|
||||||
|
|
||||||
|
// Cleanup just in case
|
||||||
|
let _ = fs::remove_file(&temp_file);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Job run that fails and emits a recognized "dep miss" statement should emit a JobRunMissingDepsEventV1
|
/// Job run that fails and emits a recognized "dep miss" statement should emit a JobRunMissingDepsEventV1
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue