add job run basics
This commit is contained in:
parent
bc61d8f530
commit
873f766aa0
4 changed files with 64 additions and 167 deletions
|
|
@ -188,9 +188,23 @@ message TaintDetail {
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
message JobRunDetail {
|
message JobRunStatus {
|
||||||
// TODO
|
JobRunStatusCode code = 1;
|
||||||
|
string name = 2;
|
||||||
}
|
}
|
||||||
|
enum JobRunStatusCode {
|
||||||
|
JobRunQueued = 0;
|
||||||
|
JobRunRunning = 1;
|
||||||
|
JobRunFailed = 2;
|
||||||
|
JobRunSucceeded = 3;
|
||||||
|
}
|
||||||
|
message JobRunDetail {
|
||||||
|
string id = 1;
|
||||||
|
JobRunStatus status = 2;
|
||||||
|
optional uint64 last_heartbeat_at = 3;
|
||||||
|
repeated WantAttributedPartitions servicing_wants = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
message EventFilter {
|
message EventFilter {
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::job_run::JobRun;
|
use crate::job_run::{spawn_job_run, JobRun, JobRunConfig};
|
||||||
use crate::{PartitionRef, WantDetail};
|
use crate::{PartitionRef, WantDetail};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
@ -7,16 +7,17 @@ use std::error::Error;
|
||||||
pub struct JobConfiguration {
|
pub struct JobConfiguration {
|
||||||
pub label: String,
|
pub label: String,
|
||||||
pub pattern: String,
|
pub pattern: String,
|
||||||
pub entrypoint: String,
|
pub entry_point: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl JobConfiguration {
|
impl JobConfiguration {
|
||||||
/** Launch job to build the partitions specified by the provided wants. */
|
/** Launch job to build the partitions specified by the provided wants. */
|
||||||
pub fn spawn(&self, wants: Vec<WantDetail>) -> Result<JobRun, Box<dyn Error>> {
|
pub fn spawn(&self, wants: Vec<WantDetail>) -> Result<Box<dyn JobRun>, Box<dyn Error>> {
|
||||||
let wanted_refs: Vec<PartitionRef> =
|
let wanted_refs: Vec<PartitionRef> =
|
||||||
wants.iter().flat_map(|want| want.partitions.clone()).collect();
|
wants.iter().flat_map(|want| want.partitions.clone()).collect();
|
||||||
let args: Vec<String> = wanted_refs.iter().map(|pref| pref.r#ref.clone()).collect();
|
let args: Vec<String> = wanted_refs.iter().map(|pref| pref.r#ref.clone()).collect();
|
||||||
JobRun::spawn(self.entrypoint.clone(), args)
|
let config = JobRunConfig::SubProcess { entry_point: self.entry_point.clone(), args };
|
||||||
|
spawn_job_run(config)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn matches(&self, refs: &PartitionRef) -> bool {
|
pub fn matches(&self, refs: &PartitionRef) -> bool {
|
||||||
|
|
|
||||||
|
|
@ -1,164 +1,62 @@
|
||||||
use crate::build_event_log::{BELStorage, MemoryBELStorage};
|
use crate::build_event_log::{BELStorage, MemoryBELStorage};
|
||||||
use crate::data_build_event::Event;
|
use crate::data_build_event::Event;
|
||||||
use crate::data_build_event::Event::{JobRunFailureV1, JobRunSuccessV1};
|
use crate::JobRunStatus;
|
||||||
use crate::{DataBuildEvent, JobRunFailureEventV1, JobRunHeartbeatEventV1, JobRunSuccessEventV1};
|
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::BufRead;
|
||||||
use std::process::{Child, Command, ExitStatus, Stdio};
|
use std::process::Child;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
/** Wrapper type that can be mocked */
|
// TODO log to /var/log/databuild/jobruns/$JOB_RUN_ID/, and rotate over max size (e.g. only ever use 1GB for logs)
|
||||||
trait JobRunChild {
|
// Leave door open to background log processor that tails job logs, but don't include in jobrun concept
|
||||||
fn exit_status(&mut self) -> Option<ExitStatus>;
|
|
||||||
fn stdout_lines(&mut self) -> Vec<String>;
|
pub trait JobRun {
|
||||||
|
fn id(&self) -> Uuid;
|
||||||
|
fn visit(&mut self) -> Result<JobRunPollResult, Box<dyn Error>>;
|
||||||
|
fn cancel(&mut self) -> Result<(), Box<dyn Error>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
pub enum JobRunConfig {
|
||||||
struct WrappedProcessChild(Child);
|
SubProcess { entry_point: String, args: Vec<String> },
|
||||||
|
}
|
||||||
|
|
||||||
impl JobRunChild for WrappedProcessChild {
|
pub fn spawn_job_run(config: JobRunConfig) -> Result<Box<dyn JobRun>, Box<dyn Error>> {
|
||||||
fn exit_status(&mut self) -> Option<ExitStatus> {
|
match config {
|
||||||
self.0.try_wait().expect("coudn't wait")
|
JobRunConfig::SubProcess { entry_point, args } => Ok(SubProcessJobRun::spawn(entry_point, args)?),
|
||||||
}
|
_ => Err("No impl for this job config type".into()),
|
||||||
|
|
||||||
fn stdout_lines(&mut self) -> Vec<String> {
|
|
||||||
let mut stdout_lines = Vec::new();
|
|
||||||
let stdout = self.0.stdout.take().expect("stdout not piped");
|
|
||||||
let reader = BufReader::new(stdout);
|
|
||||||
for line in reader.lines() {
|
|
||||||
stdout_lines.push(line.expect("stdout not piped"));
|
|
||||||
}
|
|
||||||
stdout_lines
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Child> for WrappedProcessChild {
|
pub struct SubProcessJobRun {
|
||||||
fn from(child: Child) -> Self {
|
pub job_run_id: Uuid,
|
||||||
Self { 0: child }
|
pub process: Child,
|
||||||
}
|
pub storage: MemoryBELStorage,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct JobRun {
|
impl JobRun for SubProcessJobRun {
|
||||||
job_run_id: Uuid,
|
fn id(&self) -> Uuid {
|
||||||
events: MemoryBELStorage,
|
self.job_run_id
|
||||||
child: Box<dyn JobRunChild>,
|
|
||||||
unhandled_lines: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
const EVENT_SIZE_LIMIT: u64 = 1000000;
|
|
||||||
|
|
||||||
impl JobRun {
|
|
||||||
pub fn spawn(command: String, args: Vec<String>) -> Result<JobRun, Box<dyn Error>> {
|
|
||||||
Ok(JobRun {
|
|
||||||
job_run_id: Default::default(),
|
|
||||||
events: Default::default(),
|
|
||||||
child: Box::new(WrappedProcessChild::from(
|
|
||||||
Command::new(command)
|
|
||||||
.args(args)
|
|
||||||
.stdout(Stdio::piped())
|
|
||||||
.spawn()?,
|
|
||||||
)),
|
|
||||||
unhandled_lines: Default::default(),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn visit(&mut self, since_idx: u64) -> Result<Vec<DataBuildEvent>, Box<dyn Error>> {
|
fn visit(&mut self) -> Result<JobRunPollResult, Box<dyn Error>> {
|
||||||
// Collect new lines from child process
|
|
||||||
|
|
||||||
// Parse BEL events from child process
|
|
||||||
let new_events = Self::process_lines(self.job_run_id, &self.unhandled_lines);
|
|
||||||
for event in new_events {
|
|
||||||
self.events.append_event(&event)?;
|
|
||||||
}
|
|
||||||
self.unhandled_lines.drain(..);
|
|
||||||
|
|
||||||
// Potentially react to job completion
|
|
||||||
match self.exit_status() {
|
|
||||||
None => {} // No exit -> no harm
|
|
||||||
Some(status) => {
|
|
||||||
if status.success() {
|
|
||||||
self.events
|
|
||||||
.append_event(&JobRunSuccessV1(JobRunSuccessEventV1 {
|
|
||||||
job_run_id: self.job_run_id.into(),
|
|
||||||
}))?;
|
|
||||||
} else {
|
|
||||||
self.events
|
|
||||||
.append_event(&JobRunFailureV1(JobRunFailureEventV1 {
|
|
||||||
job_run_id: self.job_run_id.into(),
|
|
||||||
}))?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return BEL events since provided idx
|
|
||||||
self.events
|
|
||||||
.list_events(since_idx, EVENT_SIZE_LIMIT)
|
|
||||||
.and_then(|events| {
|
|
||||||
if events.len() as u64 == EVENT_SIZE_LIMIT {
|
|
||||||
Err(format!(
|
|
||||||
"Returned {} events - that's way too many.",
|
|
||||||
EVENT_SIZE_LIMIT
|
|
||||||
)
|
|
||||||
.into())
|
|
||||||
} else {
|
|
||||||
Ok(events)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn cancel(&mut self) {
|
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn event_for_line(line: String) -> Option<Event> {
|
fn cancel(&mut self) -> Result<(), Box<dyn Error>> {
|
||||||
// TODO parse missing data dep event
|
todo!()
|
||||||
// TODO parse job state
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn process_lines(job_run_id: Uuid, lines: &Vec<String>) -> Vec<Event> {
|
impl SubProcessJobRun {
|
||||||
let mut events: Vec<Event> = Default::default();
|
pub fn spawn(entry_point: String, args: Vec<String>) -> Result<Box<dyn JobRun>, Box<dyn Error>> {
|
||||||
|
todo!()
|
||||||
if lines.len() > 0 {
|
|
||||||
// If any lines were written to stdout, we should heartbeat
|
|
||||||
events.push(Event::JobRunHeartbeatV1(JobRunHeartbeatEventV1 {
|
|
||||||
job_run_id: job_run_id.clone().into(),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
for event in lines
|
|
||||||
.iter()
|
|
||||||
.flat_map(|line| Self::event_for_line(line.clone()))
|
|
||||||
{
|
|
||||||
events.push(event);
|
|
||||||
}
|
|
||||||
|
|
||||||
events
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn exit_status(&mut self) -> Option<ExitStatus> {
|
pub struct JobRunPollResult {
|
||||||
self.child.exit_status()
|
pub new_events: Vec<Event>, // Parsed BEL events, not raw lines
|
||||||
}
|
pub status: JobRunStatus,
|
||||||
}
|
}
|
||||||
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::job_run::JobRun;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_process_lines_empty() {
|
|
||||||
let lines = Vec::<String>::new();
|
|
||||||
let events = JobRun::process_lines(Default::default(), &lines);
|
|
||||||
assert_eq!(events.len(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_process_lines_heartbeat() {
|
|
||||||
let lines_1 = vec!["Hello, salem".to_string()];
|
|
||||||
let events_1 = JobRun::process_lines(Default::default(), &lines_1);
|
|
||||||
assert_eq!(events_1.len(), 1);
|
|
||||||
|
|
||||||
let lines_2 = vec!["Hello, salem".to_string(), "Hello, pippin".to_string()];
|
|
||||||
let events_2 = JobRun::process_lines(Default::default(), &lines_2);
|
|
||||||
assert_eq!(events_2.len(), 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::build_event_log::{BELStorage, BuildEventLog, MemoryBELStorage};
|
use crate::build_event_log::{BELStorage, BuildEventLog, MemoryBELStorage};
|
||||||
use crate::job::JobConfiguration;
|
use crate::job::JobConfiguration;
|
||||||
use crate::job_run::JobRun;
|
use crate::job_run::JobRun;
|
||||||
use crate::{PartitionRef, WantDetail};
|
use crate::{JobRunStatusCode, PartitionRef, WantDetail};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
@ -13,7 +13,7 @@ the visitor pattern to monitor job exec progress and liveness, and adds
|
||||||
|
|
||||||
struct Orchestrator<S: BELStorage + Debug> {
|
struct Orchestrator<S: BELStorage + Debug> {
|
||||||
bel: BuildEventLog<S>,
|
bel: BuildEventLog<S>,
|
||||||
job_runs: Vec<JobRunHandle>,
|
job_runs: Vec<Box<dyn JobRun>>,
|
||||||
config: OrchestratorConfig,
|
config: OrchestratorConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,20 +63,6 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct JobRunHandle {
|
|
||||||
job_run: JobRun,
|
|
||||||
bel_idx: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<JobRun> for JobRunHandle {
|
|
||||||
fn from(job_run: JobRun) -> Self {
|
|
||||||
Self {
|
|
||||||
job_run,
|
|
||||||
bel_idx: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct OrchestratorConfig {
|
struct OrchestratorConfig {
|
||||||
jobs: Vec<JobConfiguration>,
|
jobs: Vec<JobConfiguration>,
|
||||||
|
|
@ -134,13 +120,11 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
// Visit existing jobs, remove completed
|
// Visit existing jobs, remove completed
|
||||||
self.job_runs.retain_mut(|jr| {
|
self.job_runs.retain_mut(|jr| {
|
||||||
// Append emitted events
|
// Append emitted events
|
||||||
let events = jr
|
let result = jr
|
||||||
.job_run
|
.visit()
|
||||||
.visit(jr.bel_idx.clone())
|
|
||||||
.expect("Job visit failed");
|
.expect("Job visit failed");
|
||||||
events
|
result.new_events
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|event| event.event.clone())
|
|
||||||
.for_each(|event| {
|
.for_each(|event| {
|
||||||
self.bel
|
self.bel
|
||||||
.append_event(&event)
|
.append_event(&event)
|
||||||
|
|
@ -148,7 +132,7 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
});
|
});
|
||||||
|
|
||||||
// Retain job run if it doesn't yet have an exit code (still running)
|
// Retain job run if it doesn't yet have an exit code (still running)
|
||||||
jr.job_run.exit_status().is_none()
|
result.status.code == JobRunStatusCode::JobRunRunning as i32
|
||||||
});
|
});
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
@ -179,7 +163,7 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
} else {
|
} else {
|
||||||
for wg in grouped_wants.want_groups {
|
for wg in grouped_wants.want_groups {
|
||||||
let job_run = wg.job.spawn(wg.wants)?;
|
let job_run = wg.job.spawn(wg.wants)?;
|
||||||
self.job_runs.push(JobRunHandle::from(job_run));
|
self.job_runs.push(job_run);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
@ -341,7 +325,7 @@ mod tests {
|
||||||
JobConfiguration {
|
JobConfiguration {
|
||||||
label: label.to_string(),
|
label: label.to_string(),
|
||||||
pattern: pattern.to_string(),
|
pattern: pattern.to_string(),
|
||||||
entrypoint: "test_entrypoint".to_string(),
|
entry_point: "test_entrypoint".to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue