refactor job runs to have internal state objects
This commit is contained in:
parent
8208af6605
commit
55f51125c3
3 changed files with 312 additions and 238 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::job_run::{NotStartedJobRun, SubProcessBackend};
|
use crate::job_run::{JobRun, SubProcessBackend};
|
||||||
use crate::{JobConfig, PartitionRef, WantDetail};
|
use crate::{JobConfig, PartitionRef, WantDetail};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use crate::util::DatabuildError;
|
use crate::util::DatabuildError;
|
||||||
|
|
@ -12,11 +12,11 @@ pub struct JobConfiguration {
|
||||||
|
|
||||||
impl JobConfiguration {
|
impl JobConfiguration {
|
||||||
/** Launch job to build the partitions specified by the provided wants. */
|
/** Launch job to build the partitions specified by the provided wants. */
|
||||||
pub fn spawn(&self, wants: Vec<WantDetail>) -> Result<NotStartedJobRun<SubProcessBackend>, std::io::Error> {
|
pub fn spawn(&self, wants: Vec<WantDetail>) -> Result<JobRun<SubProcessBackend>, std::io::Error> {
|
||||||
let wanted_refs: Vec<PartitionRef> =
|
let wanted_refs: Vec<PartitionRef> =
|
||||||
wants.iter().flat_map(|want| want.partitions.clone()).collect();
|
wants.iter().flat_map(|want| want.partitions.clone()).collect();
|
||||||
let args: Vec<String> = wanted_refs.iter().map(|pref| pref.r#ref.clone()).collect();
|
let args: Vec<String> = wanted_refs.iter().map(|pref| pref.r#ref.clone()).collect();
|
||||||
Ok(NotStartedJobRun::spawn(self.entry_point.clone(), args))
|
Ok(JobRun::spawn(self.entry_point.clone(), args))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn matches(&self, refs: &PartitionRef) -> bool {
|
pub fn matches(&self, refs: &PartitionRef) -> bool {
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,6 @@ pub trait JobRunBackend: Sized {
|
||||||
/// Create a new not-started job run
|
/// Create a new not-started job run
|
||||||
fn create(entry_point: String, args: Vec<String>) -> Self::NotStartedState;
|
fn create(entry_point: String, args: Vec<String>) -> Self::NotStartedState;
|
||||||
|
|
||||||
/// Convenience method to spawn a new job run (calls create and wraps in JobRun)
|
|
||||||
fn spawn(entry_point: String, args: Vec<String>) -> NotStartedJobRun<Self> {
|
|
||||||
NotStartedJobRun::spawn(entry_point, args)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Transition from NotStarted to Running
|
/// Transition from NotStarted to Running
|
||||||
fn start(
|
fn start(
|
||||||
not_started: Self::NotStartedState,
|
not_started: Self::NotStartedState,
|
||||||
|
|
@ -59,103 +54,35 @@ pub enum PollResult<C, F, D> {
|
||||||
DepMiss(D),
|
DepMiss(D),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generic JobRun that works with any backend, parameterized by state
|
// ===== TYPE-SAFE STATE MACHINE PATTERN =====
|
||||||
pub struct JobRun<B: JobRunBackend, S> {
|
// Uses parameterized JobRunWithState wrapped in JobRun enum for storage
|
||||||
|
|
||||||
|
/// New JobRun with embedded state enum
|
||||||
|
/// Type-safe job run struct, parameterized by backend and state
|
||||||
|
/// This struct can only perform operations valid for its current state type
|
||||||
|
pub struct JobRunWithState<B: JobRunBackend, S> {
|
||||||
pub job_run_id: Uuid,
|
pub job_run_id: Uuid,
|
||||||
pub state: S,
|
pub state: S,
|
||||||
pub _backend: PhantomData<B>,
|
pub _backend: PhantomData<B>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Type aliases for specific states
|
/// Wrapper enum for storing job runs in a single collection
|
||||||
pub type NotStartedJobRun<B> = JobRun<B, <B as JobRunBackend>::NotStartedState>;
|
/// This allows us to store jobs in different states together while maintaining type safety
|
||||||
pub type RunningJobRun<B> = JobRun<B, <B as JobRunBackend>::RunningState>;
|
pub enum JobRun<B: JobRunBackend> {
|
||||||
pub type CompletedJobRun<B> = JobRun<B, <B as JobRunBackend>::CompletedState>;
|
NotStarted(JobRunWithState<B, B::NotStartedState>),
|
||||||
pub type FailedJobRun<B> = JobRun<B, <B as JobRunBackend>::FailedState>;
|
Running(JobRunWithState<B, B::RunningState>),
|
||||||
pub type CanceledJobRun<B> = JobRun<B, <B as JobRunBackend>::CanceledState>;
|
Completed(JobRunWithState<B, B::CompletedState>),
|
||||||
pub type DepMissJobRun<B> = JobRun<B, <B as JobRunBackend>::DepMissState>;
|
Failed(JobRunWithState<B, B::FailedState>),
|
||||||
|
Canceled(JobRunWithState<B, B::CanceledState>),
|
||||||
// Methods available on all JobRun states
|
DepMiss(JobRunWithState<B, B::DepMissState>),
|
||||||
impl<B: JobRunBackend, S> JobRun<B, S> {
|
|
||||||
pub fn id(&self) -> Uuid {
|
|
||||||
self.job_run_id
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Methods available only on NotStarted state
|
/// Result of visiting a running job - returns the typed states
|
||||||
impl<B: JobRunBackend> NotStartedJobRun<B> {
|
pub enum VisitResult<B: JobRunBackend> {
|
||||||
pub fn spawn(entry_point: String, args: Vec<String>) -> Self {
|
StillRunning(JobRunWithState<B, B::RunningState>),
|
||||||
JobRun {
|
Completed(JobRunWithState<B, B::CompletedState>),
|
||||||
job_run_id: Uuid::new_v4(),
|
Failed(JobRunWithState<B, B::FailedState>),
|
||||||
state: B::create(entry_point, args),
|
DepMiss(JobRunWithState<B, B::DepMissState>),
|
||||||
_backend: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn run(self) -> Result<RunningJobRun<B>, DatabuildError> {
|
|
||||||
self.run_with_env(None)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn run_with_env(
|
|
||||||
self,
|
|
||||||
env: Option<HashMap<String, String>>,
|
|
||||||
) -> Result<RunningJobRun<B>, DatabuildError> {
|
|
||||||
let running_state = B::start(self.state, env)?;
|
|
||||||
Ok(JobRun {
|
|
||||||
job_run_id: self.job_run_id,
|
|
||||||
state: running_state,
|
|
||||||
_backend: PhantomData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Methods available only on Running state
|
|
||||||
impl<B: JobRunBackend> RunningJobRun<B> {
|
|
||||||
pub fn visit(&mut self) -> Result<JobRunVisitResult<B>, DatabuildError> {
|
|
||||||
match B::poll(&mut self.state)? {
|
|
||||||
PollResult::StillRunning => Ok(JobRunVisitResult::StillRunning),
|
|
||||||
PollResult::Completed(completed_state) => {
|
|
||||||
let job_run_id = self.job_run_id;
|
|
||||||
Ok(JobRunVisitResult::Completed(JobRun {
|
|
||||||
job_run_id,
|
|
||||||
state: completed_state,
|
|
||||||
_backend: PhantomData,
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
PollResult::Failed(failed_state) => {
|
|
||||||
let job_run_id = self.job_run_id;
|
|
||||||
Ok(JobRunVisitResult::Failed(JobRun {
|
|
||||||
job_run_id,
|
|
||||||
state: failed_state,
|
|
||||||
_backend: PhantomData,
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
PollResult::DepMiss(state) => {
|
|
||||||
let job_run_id = self.job_run_id;
|
|
||||||
Ok(JobRunVisitResult::DepMiss(JobRun {
|
|
||||||
job_run_id,
|
|
||||||
state,
|
|
||||||
_backend: PhantomData,
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn cancel(self, source: EventSource) -> Result<CanceledJobRun<B>, DatabuildError> {
|
|
||||||
let canceled_state = B::cancel_job(self.state, source)?;
|
|
||||||
Ok(JobRun {
|
|
||||||
job_run_id: self.job_run_id,
|
|
||||||
state: canceled_state,
|
|
||||||
_backend: PhantomData,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Result of visiting a running job
|
|
||||||
pub enum JobRunVisitResult<B: JobRunBackend> {
|
|
||||||
StillRunning,
|
|
||||||
Completed(CompletedJobRun<B>),
|
|
||||||
Failed(FailedJobRun<B>),
|
|
||||||
DepMiss(DepMissJobRun<B>),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum JobRunConfig {
|
pub enum JobRunConfig {
|
||||||
|
|
@ -368,40 +295,164 @@ pub struct JobRunPollResult {
|
||||||
pub status: JobRunStatus,
|
pub status: JobRunStatus,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ===== Type-Safe State Transition Implementation =====
|
||||||
|
|
||||||
|
// Factory and helper methods on the JobRun enum
|
||||||
|
impl<B: JobRunBackend> JobRun<B> {
|
||||||
|
/// Create a new job run in the NotStarted state
|
||||||
|
pub fn spawn(entry_point: String, args: Vec<String>) -> Self {
|
||||||
|
JobRun::NotStarted(JobRunWithState {
|
||||||
|
job_run_id: Uuid::new_v4(),
|
||||||
|
state: B::create(entry_point, args),
|
||||||
|
_backend: PhantomData,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the job run ID regardless of state
|
||||||
|
pub fn job_run_id(&self) -> &Uuid {
|
||||||
|
match self {
|
||||||
|
JobRun::NotStarted(j) => &j.job_run_id,
|
||||||
|
JobRun::Running(j) => &j.job_run_id,
|
||||||
|
JobRun::Completed(j) => &j.job_run_id,
|
||||||
|
JobRun::Failed(j) => &j.job_run_id,
|
||||||
|
JobRun::Canceled(j) => &j.job_run_id,
|
||||||
|
JobRun::DepMiss(j) => &j.job_run_id,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the job is in a terminal state
|
||||||
|
pub fn is_terminal(&self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self,
|
||||||
|
JobRun::Completed(_) | JobRun::Failed(_) | JobRun::Canceled(_) | JobRun::DepMiss(_)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Type-safe transition: NotStarted -> Running
|
||||||
|
// This method can ONLY be called on NotStarted jobs - compile error otherwise!
|
||||||
|
impl<B: JobRunBackend> JobRunWithState<B, B::NotStartedState> {
|
||||||
|
pub fn run(
|
||||||
|
self,
|
||||||
|
env: Option<HashMap<String, String>>,
|
||||||
|
) -> Result<JobRunWithState<B, B::RunningState>, DatabuildError> {
|
||||||
|
let running = B::start(self.state, env)?;
|
||||||
|
Ok(JobRunWithState {
|
||||||
|
job_run_id: self.job_run_id,
|
||||||
|
state: running,
|
||||||
|
_backend: PhantomData,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Type-safe transition: Running -> (Running | Completed | Failed | DepMiss)
|
||||||
|
// This method can ONLY be called on Running jobs - compile error otherwise!
|
||||||
|
impl<B: JobRunBackend> JobRunWithState<B, B::RunningState> {
|
||||||
|
pub fn visit(mut self) -> Result<VisitResult<B>, DatabuildError> {
|
||||||
|
match B::poll(&mut self.state)? {
|
||||||
|
PollResult::StillRunning => Ok(VisitResult::StillRunning(self)),
|
||||||
|
PollResult::Completed(completed) => Ok(VisitResult::Completed(JobRunWithState {
|
||||||
|
job_run_id: self.job_run_id,
|
||||||
|
state: completed,
|
||||||
|
_backend: PhantomData,
|
||||||
|
})),
|
||||||
|
PollResult::Failed(failed) => Ok(VisitResult::Failed(JobRunWithState {
|
||||||
|
job_run_id: self.job_run_id,
|
||||||
|
state: failed,
|
||||||
|
_backend: PhantomData,
|
||||||
|
})),
|
||||||
|
PollResult::DepMiss(dep_miss) => Ok(VisitResult::DepMiss(JobRunWithState {
|
||||||
|
job_run_id: self.job_run_id,
|
||||||
|
state: dep_miss,
|
||||||
|
_backend: PhantomData,
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cancel(
|
||||||
|
self,
|
||||||
|
source: EventSource,
|
||||||
|
) -> Result<JobRunWithState<B, B::CanceledState>, DatabuildError> {
|
||||||
|
let canceled = B::cancel_job(self.state, source)?;
|
||||||
|
Ok(JobRunWithState {
|
||||||
|
job_run_id: self.job_run_id,
|
||||||
|
state: canceled,
|
||||||
|
_backend: PhantomData,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper trait for converting states to events
|
||||||
|
pub trait ToEvent {
|
||||||
|
fn to_event(&self, job_run_id: &Uuid) -> Event;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToEvent for SubProcessCompleted {
|
||||||
|
fn to_event(&self, job_run_id: &Uuid) -> Event {
|
||||||
|
Event::JobRunSuccessV1(JobRunSuccessEventV1 {
|
||||||
|
job_run_id: job_run_id.to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToEvent for SubProcessFailed {
|
||||||
|
fn to_event(&self, job_run_id: &Uuid) -> Event {
|
||||||
|
Event::JobRunFailureV1(JobRunFailureEventV1 {
|
||||||
|
job_run_id: job_run_id.to_string(),
|
||||||
|
reason: self.reason.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToEvent for SubProcessDepMiss {
|
||||||
|
fn to_event(&self, job_run_id: &Uuid) -> Event {
|
||||||
|
Event::JobRunMissingDepsV1(JobRunMissingDepsEventV1 {
|
||||||
|
job_run_id: job_run_id.to_string(),
|
||||||
|
missing_deps: self.missing_deps.clone(),
|
||||||
|
read_deps: self.read_deps.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::data_build_event::Event;
|
use crate::data_build_event::Event;
|
||||||
use crate::data_deps::DATABUILD_MISSING_DEPS_JSON;
|
use crate::data_deps::DATABUILD_MISSING_DEPS_JSON;
|
||||||
use crate::job_run::{JobRunBackend, JobRunVisitResult, SubProcessBackend};
|
use crate::job_run::{JobRun, JobRunBackend, VisitResult, SubProcessBackend};
|
||||||
use crate::mock_job_run::MockJobRun;
|
use crate::mock_job_run::MockJobRun;
|
||||||
use crate::{JobRunMissingDeps, ManuallyTriggeredEvent, MissingDeps};
|
use crate::{JobRunMissingDeps, MissingDeps};
|
||||||
|
|
||||||
/// Happy path - run that succeeds should emit a JobRunSuccessEventV1
|
/// Happy path - run that succeeds should emit a JobRunSuccessEventV1
|
||||||
#[test]
|
#[test]
|
||||||
fn test_job_run_success_returns_job_run_success_event() {
|
fn test_job_run_success_returns_job_run_success_event() {
|
||||||
// Spawn a job run that will succeed (exit code 0)
|
// Spawn a job run that will succeed (exit code 0)
|
||||||
let job_run = SubProcessBackend::spawn(MockJobRun::bin_path(), vec![]);
|
let job_run = JobRun::<SubProcessBackend>::spawn(MockJobRun::bin_path(), vec![]);
|
||||||
|
|
||||||
// Start the job - this consumes the NotStarted and returns Running
|
// Start the job - this consumes the NotStarted and returns Running
|
||||||
let mut running_job = job_run.run().unwrap();
|
let running_job = match job_run {
|
||||||
|
JobRun::NotStarted(not_started) => not_started.run(None).unwrap(),
|
||||||
|
_ => panic!("Expected NotStarted job"),
|
||||||
|
};
|
||||||
|
|
||||||
// Poll until we get completion
|
// Poll until we get completion
|
||||||
|
let mut current_job = running_job;
|
||||||
loop {
|
loop {
|
||||||
match running_job.visit().unwrap() {
|
match current_job.visit().unwrap() {
|
||||||
JobRunVisitResult::Completed(completed) => {
|
VisitResult::Completed(completed) => {
|
||||||
// Generate the event from the completed state
|
// Generate the event from the completed state
|
||||||
let event = completed.state.to_event(&completed.id());
|
let event = completed.state.to_event(&completed.job_run_id);
|
||||||
assert!(matches!(event, Event::JobRunSuccessV1(_)));
|
assert!(matches!(event, Event::JobRunSuccessV1(_)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
JobRunVisitResult::Failed(failed) => {
|
VisitResult::Failed(failed) => {
|
||||||
panic!("Job failed unexpectedly: {}", failed.state.reason);
|
panic!("Job failed unexpectedly: {}", failed.state.reason);
|
||||||
}
|
}
|
||||||
JobRunVisitResult::StillRunning => {
|
VisitResult::StillRunning(still_running) => {
|
||||||
// Sleep briefly and poll again
|
// Sleep briefly and poll again
|
||||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||||
|
current_job = still_running;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
JobRunVisitResult::DepMiss(dep_miss) => {
|
VisitResult::DepMiss(_dep_miss) => {
|
||||||
panic!("Job dep miss unexpectedly");
|
panic!("Job dep miss unexpectedly");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -412,30 +463,35 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_job_run_failure_returns_job_run_failure_event() {
|
fn test_job_run_failure_returns_job_run_failure_event() {
|
||||||
// Spawn a job run
|
// Spawn a job run
|
||||||
let job_run = SubProcessBackend::spawn(MockJobRun::bin_path(), vec![]);
|
let job_run = JobRun::<SubProcessBackend>::spawn(MockJobRun::bin_path(), vec![]);
|
||||||
|
|
||||||
// Start the job with an exit code that indicates failure (non-zero)
|
// Start the job with an exit code that indicates failure (non-zero)
|
||||||
let env = MockJobRun::new().exit_code(1).to_env();
|
let env = MockJobRun::new().exit_code(1).to_env();
|
||||||
let mut running_job = job_run.run_with_env(Some(env)).unwrap();
|
let running_job = match job_run {
|
||||||
|
JobRun::NotStarted(not_started) => not_started.run(Some(env)).unwrap(),
|
||||||
|
_ => panic!("Expected NotStarted job"),
|
||||||
|
};
|
||||||
|
|
||||||
// Poll until we get completion
|
// Poll until we get completion
|
||||||
|
let mut current_job = running_job;
|
||||||
loop {
|
loop {
|
||||||
match running_job.visit().unwrap() {
|
match current_job.visit().unwrap() {
|
||||||
JobRunVisitResult::Completed(_) => {
|
VisitResult::Completed(_) => {
|
||||||
panic!("Job succeeded unexpectedly");
|
panic!("Job succeeded unexpectedly");
|
||||||
}
|
}
|
||||||
JobRunVisitResult::Failed(failed) => {
|
VisitResult::Failed(failed) => {
|
||||||
// Generate the event from the failed state
|
// Generate the event from the failed state
|
||||||
let event = failed.state.to_event(&failed.id());
|
let event = failed.state.to_event(&failed.job_run_id);
|
||||||
assert!(matches!(event, Event::JobRunFailureV1(_)));
|
assert!(matches!(event, Event::JobRunFailureV1(_)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
JobRunVisitResult::StillRunning => {
|
VisitResult::StillRunning(still_running) => {
|
||||||
// Sleep briefly and poll again
|
// Sleep briefly and poll again
|
||||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||||
|
current_job = still_running;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
JobRunVisitResult::DepMiss(dep_miss) => {
|
VisitResult::DepMiss(_dep_miss) => {
|
||||||
panic!("Job dep miss unexpectedly");
|
panic!("Job dep miss unexpectedly");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -455,14 +511,17 @@ mod tests {
|
||||||
let temp_file = format!("/tmp/databuild_test_cancel_{}", Uuid::new_v4());
|
let temp_file = format!("/tmp/databuild_test_cancel_{}", Uuid::new_v4());
|
||||||
|
|
||||||
// Spawn a job run that will sleep for 1 second and write a file
|
// Spawn a job run that will sleep for 1 second and write a file
|
||||||
let job_run = SubProcessBackend::spawn(MockJobRun::bin_path(), vec![]);
|
let job_run = JobRun::<SubProcessBackend>::spawn(MockJobRun::bin_path(), vec![]);
|
||||||
|
|
||||||
let env = MockJobRun::new()
|
let env = MockJobRun::new()
|
||||||
.sleep_ms(1000)
|
.sleep_ms(1000)
|
||||||
.output_file(&temp_file, &"completed".to_string())
|
.output_file(&temp_file, &"completed".to_string())
|
||||||
.exit_code(0)
|
.exit_code(0)
|
||||||
.to_env();
|
.to_env();
|
||||||
let running_job = job_run.run_with_env(Some(env)).unwrap();
|
let running_job = match job_run {
|
||||||
|
JobRun::NotStarted(not_started) => not_started.run(Some(env)).unwrap(),
|
||||||
|
_ => panic!("Expected NotStarted job"),
|
||||||
|
};
|
||||||
|
|
||||||
// Give it a tiny bit of time to start
|
// Give it a tiny bit of time to start
|
||||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||||
|
|
@ -478,10 +537,10 @@ mod tests {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Generate the cancel event from the canceled state
|
// Generate the cancel event from the canceled state
|
||||||
let cancel_event = canceled_job.state.to_event(&canceled_job.id());
|
let cancel_event = canceled_job.state.to_event(&canceled_job.job_run_id);
|
||||||
|
|
||||||
// Verify we got the cancel event
|
// Verify we got the cancel event
|
||||||
assert_eq!(cancel_event.job_run_id, canceled_job.id().to_string());
|
assert_eq!(cancel_event.job_run_id, canceled_job.job_run_id.to_string());
|
||||||
assert!(cancel_event.source.is_some());
|
assert!(cancel_event.source.is_some());
|
||||||
assert_eq!(cancel_event.comment, Some("Job was canceled".to_string()));
|
assert_eq!(cancel_event.comment, Some("Job was canceled".to_string()));
|
||||||
|
|
||||||
|
|
@ -499,7 +558,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_job_run_fail_on_missing_deps_should_emit_missing_deps_event() {
|
fn test_job_run_fail_on_missing_deps_should_emit_missing_deps_event() {
|
||||||
// Spawn a job run that will sleep for 1 second and write a file
|
// Spawn a job run that will sleep for 1 second and write a file
|
||||||
let job_run = SubProcessBackend::spawn(MockJobRun::bin_path(), vec![]);
|
let job_run = JobRun::<SubProcessBackend>::spawn(MockJobRun::bin_path(), vec![]);
|
||||||
|
|
||||||
let expected_dep_miss = JobRunMissingDeps {
|
let expected_dep_miss = JobRunMissingDeps {
|
||||||
version: "1".into(),
|
version: "1".into(),
|
||||||
|
|
@ -515,24 +574,29 @@ mod tests {
|
||||||
.stdout_msg(&dep_miss_line)
|
.stdout_msg(&dep_miss_line)
|
||||||
.exit_code(1)
|
.exit_code(1)
|
||||||
.to_env();
|
.to_env();
|
||||||
let mut running_job = job_run.run_with_env(Some(env)).unwrap();
|
let running_job = match job_run {
|
||||||
|
JobRun::NotStarted(not_started) => not_started.run(Some(env)).unwrap(),
|
||||||
|
_ => panic!("Expected NotStarted job"),
|
||||||
|
};
|
||||||
|
|
||||||
// Poll until we get completion
|
// Poll until we get completion
|
||||||
|
let mut current_job = running_job;
|
||||||
loop {
|
loop {
|
||||||
match running_job.visit().unwrap() {
|
match current_job.visit().unwrap() {
|
||||||
JobRunVisitResult::Completed(_) => {
|
VisitResult::Completed(_) => {
|
||||||
panic!("Job succeeded unexpectedly");
|
panic!("Job succeeded unexpectedly");
|
||||||
}
|
}
|
||||||
JobRunVisitResult::Failed(failed) => {
|
VisitResult::Failed(_failed) => {
|
||||||
panic!("Job failed unexpectedly");
|
panic!("Job failed unexpectedly");
|
||||||
}
|
}
|
||||||
JobRunVisitResult::StillRunning => {
|
VisitResult::StillRunning(still_running) => {
|
||||||
// Sleep briefly and poll again
|
// Sleep briefly and poll again
|
||||||
std::thread::sleep(std::time::Duration::from_millis(10));
|
std::thread::sleep(std::time::Duration::from_millis(10));
|
||||||
|
current_job = still_running;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
JobRunVisitResult::DepMiss(backend) => {
|
VisitResult::DepMiss(dep_miss) => {
|
||||||
assert_eq!(backend.state.missing_deps, expected_dep_miss.missing_deps);
|
assert_eq!(dep_miss.state.missing_deps, expected_dep_miss.missing_deps);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,7 @@
|
||||||
use crate::build_event_log::{BELStorage, BuildEventLog, MemoryBELStorage};
|
use crate::build_event_log::{BELStorage, BuildEventLog, MemoryBELStorage};
|
||||||
use crate::data_build_event::Event;
|
use crate::data_build_event::Event;
|
||||||
use crate::job::JobConfiguration;
|
use crate::job::JobConfiguration;
|
||||||
use crate::job_run::{
|
use crate::job_run::SubProcessBackend;
|
||||||
CompletedJobRun, DepMissJobRun, FailedJobRun, JobRunVisitResult, NotStartedJobRun,
|
|
||||||
RunningJobRun, SubProcessBackend,
|
|
||||||
};
|
|
||||||
use crate::util::DatabuildError;
|
use crate::util::DatabuildError;
|
||||||
use crate::{JobRunBufferEventV1, PartitionRef, WantDetail};
|
use crate::{JobRunBufferEventV1, PartitionRef, WantDetail};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
@ -22,24 +19,16 @@ JTBDs:
|
||||||
*/
|
*/
|
||||||
struct Orchestrator<S: BELStorage + Debug> {
|
struct Orchestrator<S: BELStorage + Debug> {
|
||||||
pub bel: BuildEventLog<S>,
|
pub bel: BuildEventLog<S>,
|
||||||
pub not_started_jobs: Vec<NotStartedJobRun<SubProcessBackend>>,
|
|
||||||
pub running_jobs: Vec<RunningJobRun<SubProcessBackend>>,
|
|
||||||
pub completed_jobs: Vec<CompletedJobRun<SubProcessBackend>>,
|
|
||||||
pub failed_jobs: Vec<FailedJobRun<SubProcessBackend>>,
|
|
||||||
pub dep_miss_jobs: Vec<DepMissJobRun<SubProcessBackend>>,
|
|
||||||
pub config: OrchestratorConfig,
|
pub config: OrchestratorConfig,
|
||||||
|
pub job_runs: Vec<crate::job_run::JobRun<SubProcessBackend>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Orchestrator<MemoryBELStorage> {
|
impl Default for Orchestrator<MemoryBELStorage> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
bel: Default::default(),
|
bel: Default::default(),
|
||||||
not_started_jobs: Default::default(),
|
|
||||||
running_jobs: Default::default(),
|
|
||||||
completed_jobs: Default::default(),
|
|
||||||
failed_jobs: Default::default(),
|
|
||||||
dep_miss_jobs: Default::default(),
|
|
||||||
config: Default::default(),
|
config: Default::default(),
|
||||||
|
job_runs: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -48,12 +37,8 @@ impl Orchestrator<MemoryBELStorage> {
|
||||||
fn copy(&self) -> Self {
|
fn copy(&self) -> Self {
|
||||||
Self {
|
Self {
|
||||||
bel: self.bel.clone(),
|
bel: self.bel.clone(),
|
||||||
not_started_jobs: Default::default(),
|
|
||||||
running_jobs: Default::default(),
|
|
||||||
completed_jobs: Default::default(),
|
|
||||||
failed_jobs: Default::default(),
|
|
||||||
dep_miss_jobs: Default::default(),
|
|
||||||
config: self.config.clone(),
|
config: self.config.clone(),
|
||||||
|
job_runs: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -112,11 +97,6 @@ struct WantGroup {
|
||||||
wants: Vec<WantDetail>,
|
wants: Vec<WantDetail>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WantGroup {
|
|
||||||
pub fn spawn(&self) -> Result<NotStartedJobRun<SubProcessBackend>, std::io::Error> {
|
|
||||||
self.job.spawn(self.wants.clone())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct GroupedWants {
|
struct GroupedWants {
|
||||||
|
|
@ -140,63 +120,68 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
fn new(storage: S, config: OrchestratorConfig) -> Self {
|
fn new(storage: S, config: OrchestratorConfig) -> Self {
|
||||||
Self {
|
Self {
|
||||||
bel: BuildEventLog::new(storage, Default::default()),
|
bel: BuildEventLog::new(storage, Default::default()),
|
||||||
not_started_jobs: Vec::new(),
|
|
||||||
running_jobs: Vec::new(),
|
|
||||||
completed_jobs: Vec::new(),
|
|
||||||
failed_jobs: Vec::new(),
|
|
||||||
dep_miss_jobs: Vec::new(),
|
|
||||||
config,
|
config,
|
||||||
|
job_runs: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn schedule_queued_jobs(&mut self) -> Result<(), DatabuildError> {
|
fn schedule_queued_jobs(&mut self) -> Result<(), DatabuildError> {
|
||||||
// TODO need to incorporate concurrency limit/pools here, probably?
|
use crate::job_run::JobRun;
|
||||||
while let Some(job) = self.not_started_jobs.pop() {
|
|
||||||
self.running_jobs.push(job.run()?);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
let mut new_jobs = Vec::new();
|
||||||
|
for job in self.job_runs.drain(..) {
|
||||||
|
let transitioned = match job {
|
||||||
|
JobRun::NotStarted(not_started) => JobRun::Running(not_started.run(None)?),
|
||||||
|
other => other, // Pass through all other states unchanged
|
||||||
|
};
|
||||||
|
new_jobs.push(transitioned);
|
||||||
|
}
|
||||||
|
self.job_runs = new_jobs;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Visits individual job runs, appending resulting events, and moving runs between run status
|
/// Visits individual job runs, appending resulting events, and moving runs between run status
|
||||||
/// containers. Either jobs are still running, or they are moved to terminal states.
|
/// containers. Either jobs are still running, or they are moved to terminal states.
|
||||||
fn poll_job_runs(&mut self) -> Result<(), DatabuildError> {
|
fn poll_job_runs(&mut self) -> Result<(), DatabuildError> {
|
||||||
|
use crate::job_run::{JobRun, VisitResult};
|
||||||
|
|
||||||
self.schedule_queued_jobs()?;
|
self.schedule_queued_jobs()?;
|
||||||
|
|
||||||
// Visit running jobs and transition them to terminal states
|
// Visit all running jobs using type-safe transitions
|
||||||
let mut still_running = Vec::new();
|
let mut new_jobs = Vec::new();
|
||||||
// TODO make sure that failure in the middle can't mess up build state - likely need to
|
for job in self.job_runs.drain(..) {
|
||||||
// refactor here (e.g. turn state changes into data, commit them after all have been
|
let transitioned = match job {
|
||||||
// calculated and validated)
|
JobRun::Running(running) => {
|
||||||
for mut job in self.running_jobs.drain(..) {
|
match running.visit()? {
|
||||||
match job.visit()? {
|
VisitResult::StillRunning(still_running) => {
|
||||||
JobRunVisitResult::StillRunning => {
|
println!("Still running job: {:?}", still_running.job_run_id);
|
||||||
println!("Still running job: {:?}", job.id());
|
JobRun::Running(still_running)
|
||||||
still_running.push(job);
|
}
|
||||||
|
VisitResult::Completed(completed) => {
|
||||||
|
println!("Completed job: {:?}", completed.job_run_id);
|
||||||
|
let event = completed.state.to_event(&completed.job_run_id);
|
||||||
|
self.bel.append_event(&event)?;
|
||||||
|
JobRun::Completed(completed)
|
||||||
|
}
|
||||||
|
VisitResult::Failed(failed) => {
|
||||||
|
println!("Failed job: {:?}", failed.job_run_id);
|
||||||
|
let event = failed.state.to_event(&failed.job_run_id);
|
||||||
|
self.bel.append_event(&event)?;
|
||||||
|
JobRun::Failed(failed)
|
||||||
|
}
|
||||||
|
VisitResult::DepMiss(dep_miss) => {
|
||||||
|
println!("Dep miss job: {:?}", dep_miss.job_run_id);
|
||||||
|
let event = dep_miss.state.to_event(&dep_miss.job_run_id);
|
||||||
|
self.bel.append_event(&event)?;
|
||||||
|
JobRun::DepMiss(dep_miss)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
JobRunVisitResult::Completed(completed) => {
|
other => other, // Pass through all non-running states unchanged
|
||||||
// Emit success event
|
};
|
||||||
println!("Completed job: {:?}", completed.id());
|
new_jobs.push(transitioned);
|
||||||
self.bel
|
|
||||||
.append_event(&completed.state.to_event(&completed.id()))?;
|
|
||||||
self.completed_jobs.push(completed);
|
|
||||||
}
|
|
||||||
JobRunVisitResult::Failed(failed) => {
|
|
||||||
// Emit failure event
|
|
||||||
println!("Failed job: {:?}", failed.id());
|
|
||||||
let event: Event = failed.state.to_event(&failed.id());
|
|
||||||
self.bel.append_event(&event)?;
|
|
||||||
self.failed_jobs.push(failed);
|
|
||||||
}
|
|
||||||
JobRunVisitResult::DepMiss(dep_miss) => {
|
|
||||||
println!("Dep miss job: {:?}", dep_miss.job_run_id);
|
|
||||||
let event = dep_miss.state.to_event(&dep_miss.id());
|
|
||||||
self.bel.append_event(&event)?;
|
|
||||||
self.dep_miss_jobs.push(dep_miss);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
self.running_jobs = still_running;
|
self.job_runs = new_jobs;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
@ -243,12 +228,17 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn queue_job(&mut self, wg: WantGroup) -> Result<(), DatabuildError> {
|
fn queue_job(&mut self, wg: WantGroup) -> Result<(), DatabuildError> {
|
||||||
// Spawn job run (not started, but need only be `.run`'d)
|
use crate::job_run::JobRun;
|
||||||
let job_run = wg.spawn()?;
|
|
||||||
|
// Compute args from wants the same way JobConfiguration::spawn() does
|
||||||
|
let wanted_refs: Vec<crate::PartitionRef> =
|
||||||
|
wg.wants.iter().flat_map(|want| want.partitions.clone()).collect();
|
||||||
|
let args: Vec<String> = wanted_refs.iter().map(|pref| pref.r#ref.clone()).collect();
|
||||||
|
let job_run = JobRun::spawn(wg.job.entry_point.clone(), args);
|
||||||
|
|
||||||
// Create job run buffer event
|
// Create job run buffer event
|
||||||
let job_buffer_event = Event::JobRunBufferV1(JobRunBufferEventV1 {
|
let job_buffer_event = Event::JobRunBufferV1(JobRunBufferEventV1 {
|
||||||
job_run_id: job_run.job_run_id.into(),
|
job_run_id: job_run.job_run_id().to_string(),
|
||||||
job_label: wg.job.label,
|
job_label: wg.job.label,
|
||||||
building_partitions: wg
|
building_partitions: wg
|
||||||
.wants
|
.wants
|
||||||
|
|
@ -259,7 +249,7 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
want_attributed_partitions: wg.wants.iter().map(|w| w.into()).collect(),
|
want_attributed_partitions: wg.wants.iter().map(|w| w.into()).collect(),
|
||||||
});
|
});
|
||||||
self.bel.append_event(&job_buffer_event)?;
|
self.bel.append_event(&job_buffer_event)?;
|
||||||
self.not_started_jobs.push(job_run);
|
self.job_runs.push(job_run);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
@ -270,6 +260,36 @@ impl<S: BELStorage + Debug> Orchestrator<S> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper methods for tests to count jobs by state
|
||||||
|
#[cfg(test)]
|
||||||
|
fn count_running_jobs(&self) -> usize {
|
||||||
|
use crate::job_run::JobRun;
|
||||||
|
self.job_runs.iter().filter(|j| matches!(j, JobRun::Running(_))).count()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
fn count_terminal_jobs(&self) -> usize {
|
||||||
|
self.job_runs.iter().filter(|j| j.is_terminal()).count()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
fn count_not_started_jobs(&self) -> usize {
|
||||||
|
use crate::job_run::JobRun;
|
||||||
|
self.job_runs.iter().filter(|j| matches!(j, JobRun::NotStarted(_))).count()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
fn count_dep_miss_jobs(&self) -> usize {
|
||||||
|
use crate::job_run::JobRun;
|
||||||
|
self.job_runs.iter().filter(|j| matches!(j, JobRun::DepMiss(_))).count()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
fn count_completed_jobs(&self) -> usize {
|
||||||
|
use crate::job_run::JobRun;
|
||||||
|
self.job_runs.iter().filter(|j| matches!(j, JobRun::Completed(_))).count()
|
||||||
|
}
|
||||||
|
|
||||||
/** Entrypoint for running jobs */
|
/** Entrypoint for running jobs */
|
||||||
pub fn join(&mut self) -> Result<(), DatabuildError> {
|
pub fn join(&mut self) -> Result<(), DatabuildError> {
|
||||||
loop {
|
loop {
|
||||||
|
|
@ -376,14 +396,14 @@ mod tests {
|
||||||
fn test_empty_wants_noop() {
|
fn test_empty_wants_noop() {
|
||||||
let mut orchestrator = build_orchestrator();
|
let mut orchestrator = build_orchestrator();
|
||||||
// Should init with no work to do
|
// Should init with no work to do
|
||||||
assert!(orchestrator.not_started_jobs.is_empty());
|
assert_eq!(orchestrator.count_not_started_jobs(), 0);
|
||||||
assert!(orchestrator.running_jobs.is_empty());
|
assert_eq!(orchestrator.count_running_jobs(), 0);
|
||||||
orchestrator
|
orchestrator
|
||||||
.poll_wants()
|
.poll_wants()
|
||||||
.expect("shouldn't fail to poll empty wants");
|
.expect("shouldn't fail to poll empty wants");
|
||||||
// Should still be empty since no work to do
|
// Should still be empty since no work to do
|
||||||
assert!(orchestrator.not_started_jobs.is_empty());
|
assert_eq!(orchestrator.count_not_started_jobs(), 0);
|
||||||
assert!(orchestrator.running_jobs.is_empty());
|
assert_eq!(orchestrator.count_running_jobs(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use case: Some schedulable wants with jobs that can be matched should launch those jobs
|
// Use case: Some schedulable wants with jobs that can be matched should launch those jobs
|
||||||
|
|
@ -400,7 +420,7 @@ mod tests {
|
||||||
for e in events {
|
for e in events {
|
||||||
orchestrator.bel.append_event(&e).expect("append");
|
orchestrator.bel.append_event(&e).expect("append");
|
||||||
}
|
}
|
||||||
assert_eq!(orchestrator.not_started_jobs.len(), 0);
|
assert_eq!(orchestrator.count_not_started_jobs(), 0);
|
||||||
assert_eq!(orchestrator.bel.state.count_job_runs(), 0);
|
assert_eq!(orchestrator.bel.state.count_job_runs(), 0);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
|
|
@ -410,19 +430,13 @@ mod tests {
|
||||||
.expect("shouldn't fail to poll wants");
|
.expect("shouldn't fail to poll wants");
|
||||||
|
|
||||||
// Should schedule alpha job
|
// Should schedule alpha job
|
||||||
assert_eq!(orchestrator.not_started_jobs.len(), 1);
|
assert_eq!(orchestrator.count_not_started_jobs(), 1);
|
||||||
assert_eq!(
|
// Verify the job has the right args by checking the first NotStarted job
|
||||||
orchestrator
|
use crate::job_run::JobRun;
|
||||||
.not_started_jobs
|
let not_started_job = orchestrator.job_runs.iter().find(|j| matches!(j, JobRun::NotStarted(_))).unwrap();
|
||||||
.iter()
|
if let JobRun::NotStarted(job) = not_started_job {
|
||||||
.take(1)
|
assert_eq!(job.state.args, vec!["data/alpha"], "should have scheduled alpha job");
|
||||||
.last()
|
}
|
||||||
.unwrap()
|
|
||||||
.state
|
|
||||||
.args,
|
|
||||||
vec!["data/alpha"],
|
|
||||||
"should have scheduled alpha job"
|
|
||||||
);
|
|
||||||
assert_eq!(orchestrator.bel.state.count_job_runs(), 1);
|
assert_eq!(orchestrator.bel.state.count_job_runs(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -442,7 +456,7 @@ mod tests {
|
||||||
.expect("shouldn't fail to poll wants");
|
.expect("shouldn't fail to poll wants");
|
||||||
|
|
||||||
// Should not have scheduled any jobs
|
// Should not have scheduled any jobs
|
||||||
assert_eq!(orchestrator.not_started_jobs.len(), 0);
|
assert_eq!(orchestrator.count_not_started_jobs(), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -565,32 +579,29 @@ mod tests {
|
||||||
orchestrator
|
orchestrator
|
||||||
.poll_wants()
|
.poll_wants()
|
||||||
.expect("stage unscheduled jobs based on wants failed");
|
.expect("stage unscheduled jobs based on wants failed");
|
||||||
assert_eq!(orchestrator.not_started_jobs.len(), 1);
|
assert_eq!(orchestrator.count_not_started_jobs(), 1);
|
||||||
// poll job runs should start job run
|
// step should start job run
|
||||||
orchestrator.poll_job_runs().expect("should start run");
|
orchestrator.step().expect("should start run");
|
||||||
assert_eq!(orchestrator.running_jobs.len(), 1);
|
assert_eq!(orchestrator.count_running_jobs(), 1);
|
||||||
assert_eq!(orchestrator.bel.state.count_job_runs(), 1);
|
assert_eq!(orchestrator.bel.state.count_job_runs(), 1);
|
||||||
thread::sleep(Duration::from_millis(1));
|
thread::sleep(Duration::from_millis(1));
|
||||||
// Should still be running after 1ms
|
// Should still be running after 1ms
|
||||||
orchestrator
|
orchestrator
|
||||||
.poll_job_runs()
|
.step()
|
||||||
.expect("should still be running");
|
.expect("should still be running");
|
||||||
assert_eq!(orchestrator.running_jobs.len(), 1);
|
assert_eq!(orchestrator.count_running_jobs(), 1);
|
||||||
assert_eq!(orchestrator.bel.state.count_job_runs(), 1);
|
assert_eq!(orchestrator.bel.state.count_job_runs(), 1);
|
||||||
println!("STATE: {:?}", orchestrator.bel.state);
|
println!("STATE: {:?}", orchestrator.bel.state);
|
||||||
|
|
||||||
// Wait for it to complete
|
// Wait for it to complete
|
||||||
thread::sleep(Duration::from_millis(10));
|
thread::sleep(Duration::from_millis(10));
|
||||||
orchestrator
|
orchestrator
|
||||||
.poll_job_runs()
|
.step()
|
||||||
.expect("should be able to poll existing job run");
|
.expect("should be able to poll existing job run");
|
||||||
|
|
||||||
// Job run should have succeeded
|
// Job run should have succeeded
|
||||||
assert!(orchestrator.not_started_jobs.is_empty());
|
assert_eq!(orchestrator.count_not_started_jobs(), 0);
|
||||||
assert!(orchestrator.failed_jobs.is_empty());
|
assert_eq!(orchestrator.count_completed_jobs(), 1);
|
||||||
assert!(orchestrator.dep_miss_jobs.is_empty());
|
|
||||||
assert!(orchestrator.running_jobs.is_empty());
|
|
||||||
assert_eq!(orchestrator.completed_jobs.len(), 1);
|
|
||||||
|
|
||||||
// Build state should show partition as live
|
// Build state should show partition as live
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
@ -615,7 +626,7 @@ mod tests {
|
||||||
|
|
||||||
for _i in 0..max_steps {
|
for _i in 0..max_steps {
|
||||||
thread::sleep(Duration::from_millis(50));
|
thread::sleep(Duration::from_millis(50));
|
||||||
if orchestrator.running_jobs.is_empty() {
|
if orchestrator.count_running_jobs() == 0 {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
orchestrator
|
orchestrator
|
||||||
|
|
@ -702,7 +713,7 @@ echo 'Beta succeeded'
|
||||||
// Step 1: Should schedule beta job (want -> not_started_jobs)
|
// Step 1: Should schedule beta job (want -> not_started_jobs)
|
||||||
orchestrator.step().expect("step 1");
|
orchestrator.step().expect("step 1");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator.not_started_jobs.len(),
|
orchestrator.count_not_started_jobs(),
|
||||||
1,
|
1,
|
||||||
"beta job should be queued"
|
"beta job should be queued"
|
||||||
);
|
);
|
||||||
|
|
@ -710,7 +721,7 @@ echo 'Beta succeeded'
|
||||||
// Step 2: Should start beta job (not_started_jobs -> running_jobs)
|
// Step 2: Should start beta job (not_started_jobs -> running_jobs)
|
||||||
orchestrator.step().expect("step 2");
|
orchestrator.step().expect("step 2");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator.running_jobs.len(),
|
orchestrator.count_running_jobs(),
|
||||||
1,
|
1,
|
||||||
"beta job should be running"
|
"beta job should be running"
|
||||||
);
|
);
|
||||||
|
|
@ -719,7 +730,7 @@ echo 'Beta succeeded'
|
||||||
wait_for_jobs_to_complete(&mut orchestrator, 10).expect("beta job should complete");
|
wait_for_jobs_to_complete(&mut orchestrator, 10).expect("beta job should complete");
|
||||||
// (Beta should now be in dep_miss state, and a want for alpha should be created)
|
// (Beta should now be in dep_miss state, and a want for alpha should be created)
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator.dep_miss_jobs.len(),
|
orchestrator.count_dep_miss_jobs(),
|
||||||
1,
|
1,
|
||||||
"beta should have dep miss"
|
"beta should have dep miss"
|
||||||
);
|
);
|
||||||
|
|
@ -728,7 +739,7 @@ echo 'Beta succeeded'
|
||||||
// (dep miss handler created the alpha want, which will be picked up by poll_wants)
|
// (dep miss handler created the alpha want, which will be picked up by poll_wants)
|
||||||
orchestrator.step().expect("step 4");
|
orchestrator.step().expect("step 4");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator.running_jobs.len(),
|
orchestrator.count_running_jobs(),
|
||||||
1,
|
1,
|
||||||
"alpha job should be running"
|
"alpha job should be running"
|
||||||
);
|
);
|
||||||
|
|
@ -736,7 +747,7 @@ echo 'Beta succeeded'
|
||||||
// Step 6: Alpha completes successfully
|
// Step 6: Alpha completes successfully
|
||||||
wait_for_jobs_to_complete(&mut orchestrator, 10).expect("alpha job should complete");
|
wait_for_jobs_to_complete(&mut orchestrator, 10).expect("alpha job should complete");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator.completed_jobs.len(),
|
orchestrator.count_completed_jobs(),
|
||||||
1,
|
1,
|
||||||
"alpha should complete"
|
"alpha should complete"
|
||||||
);
|
);
|
||||||
|
|
@ -753,23 +764,22 @@ echo 'Beta succeeded'
|
||||||
|
|
||||||
// Step 7: Beta is rescheduled and started (want -> running_jobs)
|
// Step 7: Beta is rescheduled and started (want -> running_jobs)
|
||||||
orchestrator.step().expect("step 7");
|
orchestrator.step().expect("step 7");
|
||||||
assert_eq!(orchestrator.running_jobs.len(), 1, "beta should be running");
|
assert_eq!(orchestrator.count_running_jobs(), 1, "beta should be running");
|
||||||
|
|
||||||
// Step 8: Beta completes successfully
|
// Step 8: Beta completes successfully
|
||||||
wait_for_jobs_to_complete(&mut orchestrator, 10).expect("beta job should complete");
|
wait_for_jobs_to_complete(&mut orchestrator, 10).expect("beta job should complete");
|
||||||
|
|
||||||
// Then: Verify both partitions are live and both jobs completed
|
// Then: Verify both partitions are live and both jobs completed
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator.completed_jobs.len(),
|
orchestrator.count_completed_jobs(),
|
||||||
2,
|
2,
|
||||||
"both jobs should complete"
|
"both jobs should complete"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator.dep_miss_jobs.len(),
|
orchestrator.count_dep_miss_jobs(),
|
||||||
1,
|
1,
|
||||||
"should have one dep miss"
|
"should have one dep miss"
|
||||||
);
|
);
|
||||||
assert!(orchestrator.failed_jobs.is_empty(), "no jobs should fail");
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
orchestrator
|
orchestrator
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue