diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0a6afe78..3ec1bf7f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -197,6 +197,7 @@ jobs: run: > cargo mutants --no-shuffle -vV --in-diff git.diff --test-tool=${{matrix.test_tool}} --timeout=500 --build-timeout=500 + --exclude=windows.rs --exclude=console.rs - name: Archive mutants.out uses: actions/upload-artifact@v4 if: always() @@ -234,7 +235,8 @@ jobs: run: > cargo mutants --no-shuffle -vV --shard ${{ matrix.shard }}/10 --test-tool ${{ matrix.test_tool }} --baseline=skip --timeout=500 - --build-timeout=500 --in-place + --build-timeout=500 --in-place --exclude=windows.rs + --exclude=console.rs - name: Archive mutants.out uses: actions/upload-artifact@v4 if: always() diff --git a/src/cargo.rs b/src/cargo.rs index 5360017d..1cf59e67 100644 --- a/src/cargo.rs +++ b/src/cargo.rs @@ -16,7 +16,7 @@ use crate::options::{Options, TestTool}; use crate::outcome::{Phase, PhaseResult}; use crate::output::ScenarioOutput; use crate::package::PackageSelection; -use crate::process::{Process, ProcessStatus}; +use crate::process::{Exit, Process}; use crate::Result; /// Run cargo build, check, or test. @@ -56,7 +56,7 @@ pub fn run_cargo( )?; check_interrupted()?; debug!(?process_status, elapsed = ?start.elapsed()); - if let ProcessStatus::Failure(code) = process_status { + if let Exit::Failure(code) = process_status { // 100 "one or more tests failed" from ; // I'm not addind a dependency to just get one integer. if argv[1] == "nextest" && code != 100 { diff --git a/src/outcome.rs b/src/outcome.rs index 72413032..0ca5cab3 100644 --- a/src/outcome.rs +++ b/src/outcome.rs @@ -14,7 +14,7 @@ use serde::Serializer; use tracing::warn; use crate::console::plural; -use crate::process::ProcessStatus; +use crate::process::Exit; use crate::*; /// What phase of running a scenario. @@ -193,7 +193,7 @@ impl ScenarioOutcome { self.phase_results.last().unwrap().phase } - pub fn last_phase_result(&self) -> ProcessStatus { + pub fn last_phase_result(&self) -> Exit { self.phase_results.last().unwrap().process_status } @@ -284,7 +284,7 @@ pub struct PhaseResult { /// How long did it take? pub duration: Duration, /// Did it succeed? - pub process_status: ProcessStatus, + pub process_status: Exit, /// What command was run, as an argv list. pub argv: Vec, } @@ -324,7 +324,7 @@ pub enum SummaryOutcome { mod test { use std::time::Duration; - use crate::process::ProcessStatus; + use crate::process::Exit; use super::{Phase, PhaseResult, Scenario, ScenarioOutcome}; @@ -339,13 +339,13 @@ mod test { PhaseResult { phase: Phase::Build, duration: Duration::from_secs(2), - process_status: ProcessStatus::Success, + process_status: Exit::Success, argv: vec!["cargo".into(), "build".into()], }, PhaseResult { phase: Phase::Test, duration: Duration::from_secs(3), - process_status: ProcessStatus::Success, + process_status: Exit::Success, argv: vec!["cargo".into(), "test".into()], }, ], @@ -355,7 +355,7 @@ mod test { Some(&PhaseResult { phase: Phase::Build, duration: Duration::from_secs(2), - process_status: ProcessStatus::Success, + process_status: Exit::Success, argv: vec!["cargo".into(), "build".into()], }) ); diff --git a/src/process.rs b/src/process.rs index 25f0600e..17bbfb44 100644 --- a/src/process.rs +++ b/src/process.rs @@ -8,16 +8,14 @@ #![allow(clippy::option_map_unit_fn)] // I don't think it's clearer with if/let. use std::ffi::OsStr; -#[cfg(unix)] -use std::os::unix::process::{CommandExt, ExitStatusExt}; use std::process::{Child, Command, Stdio}; use std::thread::sleep; use std::time::{Duration, Instant}; -use anyhow::{bail, Context}; +use anyhow::Context; use camino::Utf8Path; use serde::Serialize; -use tracing::{debug, span, trace, warn, Level}; +use tracing::{debug, span, trace, Level}; use crate::console::Console; use crate::interrupt::check_interrupted; @@ -27,6 +25,16 @@ use crate::Result; /// How frequently to check if a subprocess finished. const WAIT_POLL_INTERVAL: Duration = Duration::from_millis(50); +#[cfg(windows)] +mod windows; +#[cfg(windows)] +use windows::{configure_command, terminate_child}; + +#[cfg(unix)] +mod unix; +#[cfg(unix)] +use unix::{configure_command, terminate_child}; + pub struct Process { child: Child, start: Instant, @@ -44,7 +52,7 @@ impl Process { jobserver: &Option, scenario_output: &mut ScenarioOutput, console: &Console, - ) -> Result { + ) -> Result { let mut child = Process::start(argv, env, cwd, timeout, jobserver, scenario_output)?; let process_status = loop { if let Some(exit_status) = child.poll()? { @@ -68,22 +76,21 @@ impl Process { scenario_output: &mut ScenarioOutput, ) -> Result { let start = Instant::now(); - let quoted_argv = cheap_shell_quote(argv); + let quoted_argv = quote_argv(argv); scenario_output.message("ed_argv)?; debug!(%quoted_argv, "start process"); let os_env = env.iter().map(|(k, v)| (OsStr::new(k), OsStr::new(v))); - let mut child = Command::new(&argv[0]); - child + let mut command = Command::new(&argv[0]); + command .args(&argv[1..]) .envs(os_env) .stdin(Stdio::null()) .stdout(scenario_output.open_log_append()?) .stderr(scenario_output.open_log_append()?) .current_dir(cwd); - jobserver.as_ref().map(|js| js.configure(&mut child)); - #[cfg(unix)] - child.process_group(0); - let child = child + jobserver.as_ref().map(|js| js.configure(&mut command)); + configure_command(&mut command); + let child = command .spawn() .with_context(|| format!("failed to spawn {}", argv.join(" ")))?; Ok(Process { @@ -95,28 +102,17 @@ impl Process { /// Check if the child process has finished; if so, return its status. #[mutants::skip] // It's hard to avoid timeouts if this never works... - pub fn poll(&mut self) -> Result> { + pub fn poll(&mut self) -> Result> { if self.timeout.is_some_and(|t| self.start.elapsed() > t) { debug!("timeout, terminating child process...",); self.terminate()?; - Ok(Some(ProcessStatus::Timeout)) + Ok(Some(Exit::Timeout)) } else if let Err(e) = check_interrupted() { debug!("interrupted, terminating child process..."); self.terminate()?; Err(e) } else if let Some(status) = self.child.try_wait()? { - if let Some(code) = status.code() { - if code == 0 { - return Ok(Some(ProcessStatus::Success)); - } else { - return Ok(Some(ProcessStatus::Failure(code as u32))); - } - } - #[cfg(unix)] - if let Some(signal) = status.signal() { - return Ok(Some(ProcessStatus::Signalled(signal as u8))); - } - Ok(Some(ProcessStatus::Other)) + Ok(Some(status.into())) } else { Ok(None) } @@ -126,12 +122,12 @@ impl Process { /// /// Blocks until the subprocess is terminated and then returns the exit status. /// - /// The status might not be Timeout if this raced with a normal exit. + /// The status might not be `Timeout` if this raced with a normal exit. #[mutants::skip] // would leak processes from tests if skipped fn terminate(&mut self) -> Result<()> { let _span = span!(Level::DEBUG, "terminate_child", pid = self.child.id()).entered(); debug!("terminating child process"); - terminate_child_impl(&mut self.child)?; + terminate_child(&mut self.child)?; trace!("wait for child after termination"); match self.child.wait() { Err(err) => debug!(?err, "Failed to wait for child after termination"), @@ -141,39 +137,9 @@ impl Process { } } -#[cfg(unix)] -#[allow(unknown_lints, clippy::needless_pass_by_ref_mut)] // To match Windows -#[mutants::skip] // hard to exercise the ESRCH edge case -fn terminate_child_impl(child: &mut Child) -> Result<()> { - use nix::errno::Errno; - use nix::sys::signal::{killpg, Signal}; - - let pid = nix::unistd::Pid::from_raw(child.id().try_into().unwrap()); - match killpg(pid, Signal::SIGTERM) { - Ok(()) => Ok(()), - Err(Errno::ESRCH) => { - Ok(()) // Probably already gone - } - Err(Errno::EPERM) if cfg!(target_os = "macos") => { - Ok(()) // If the process no longer exists then macos can return EPERM (maybe?) - } - Err(errno) => { - let message = format!("failed to terminate child: {errno}"); - warn!("{}", message); - bail!(message); - } - } -} - -#[cfg(windows)] -#[mutants::skip] // hard to exercise the ESRCH edge case -fn terminate_child_impl(child: &mut Child) -> Result<()> { - child.kill().context("Kill child") -} - /// The result of running a single child process. #[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)] -pub enum ProcessStatus { +pub enum Exit { /// Exited with status 0. Success, /// Exited with status non-0. @@ -181,53 +147,67 @@ pub enum ProcessStatus { /// Exceeded its timeout, and killed. Timeout, /// Killed by some signal. + #[cfg(unix)] Signalled(u8), /// Unknown or unexpected situation. Other, } -impl ProcessStatus { +impl Exit { pub fn is_success(&self) -> bool { - *self == ProcessStatus::Success + *self == Exit::Success } pub fn is_timeout(&self) -> bool { - *self == ProcessStatus::Timeout + *self == Exit::Timeout } pub fn is_failure(&self) -> bool { - matches!(self, ProcessStatus::Failure(_)) + matches!(self, Exit::Failure(_)) } } /// Quote an argv slice in Unix shell style. /// -/// This is not completely guaranteed, but is only for debug logs. -fn cheap_shell_quote, I: IntoIterator>(argv: I) -> String { - argv.into_iter() - .map(|s| { - s.as_ref() - .chars() - .flat_map(|c| match c { - ' ' | '\t' | '\n' | '\r' | '\\' | '\'' | '"' => vec!['\\', c], - _ => vec![c], - }) - .collect::() - }) - .collect::>() - .join(" ") +/// This isn't guaranteed to match the interpretation of a shell or to be safe. +/// It's just for debug logs. +fn quote_argv, I: IntoIterator>(argv: I) -> String { + let mut r = String::new(); + for s in argv { + if !r.is_empty() { + r.push(' '); + } + for c in s.as_ref().chars() { + match c { + '\t' => r.push_str(r"\t"), + '\n' => r.push_str(r"\n"), + '\r' => r.push_str(r"\r"), + ' ' | '\\' | '\'' | '"' => { + r.push('\\'); + r.push(c); + } + _ => r.push(c), + } + } + } + r } #[cfg(test)] mod test { - use super::cheap_shell_quote; + use super::quote_argv; #[test] fn shell_quoting() { - assert_eq!(cheap_shell_quote(["foo".to_string()]), "foo"); + assert_eq!(quote_argv(["foo".to_string()]), "foo"); + assert_eq!( + quote_argv(["foo bar", r#"\blah\x"#, r#""quoted""#]), + r#"foo\ bar \\blah\\x \"quoted\""# + ); + assert_eq!(quote_argv([""]), ""); assert_eq!( - cheap_shell_quote(["foo bar", r#"\blah\t"#, r#""quoted""#]), - r#"foo\ bar \\blah\\t \"quoted\""# + quote_argv(["with whitespace", "\r\n\t\t"]), + r#"with\ whitespace \r\n\t\t"# ); } } diff --git a/src/process/unix.rs b/src/process/unix.rs new file mode 100644 index 00000000..5243456f --- /dev/null +++ b/src/process/unix.rs @@ -0,0 +1,54 @@ +use std::os::unix::process::{CommandExt, ExitStatusExt}; +use std::process::{Child, Command, ExitStatus}; + +use anyhow::bail; +use nix::errno::Errno; +use nix::sys::signal::{killpg, Signal}; +use nix::unistd::Pid; +use tracing::warn; + +use crate::Result; + +use super::Exit; + +#[allow(unknown_lints, clippy::needless_pass_by_ref_mut)] // To match Windows +#[mutants::skip] // hard to exercise the ESRCH edge case +pub(super) fn terminate_child(child: &mut Child) -> Result<()> { + let pid = Pid::from_raw(child.id().try_into().unwrap()); + match killpg(pid, Signal::SIGTERM) { + Ok(()) => Ok(()), + Err(Errno::ESRCH) => { + Ok(()) // Probably already gone + } + Err(Errno::EPERM) if cfg!(target_os = "macos") => { + Ok(()) // If the process no longer exists then macos can return EPERM (maybe?) + } + Err(errno) => { + // TODO: Maybe strerror? + let message = format!("failed to terminate child: error {errno}"); + warn!("{}", message); + bail!(message); + } + } +} + +#[mutants::skip] +pub(super) fn configure_command(command: &mut Command) { + command.process_group(0); +} + +impl From for Exit { + fn from(status: ExitStatus) -> Self { + if let Some(code) = status.code() { + if code == 0 { + Exit::Success + } else { + Exit::Failure(code as u32) + } + } else if let Some(signal) = status.signal() { + return Exit::Signalled(signal as u8); + } else { + Exit::Other + } + } +} diff --git a/src/process/windows.rs b/src/process/windows.rs new file mode 100644 index 00000000..9bb913bf --- /dev/null +++ b/src/process/windows.rs @@ -0,0 +1,29 @@ +use std::process::{Child, Command, ExitStatus}; + +use anyhow::Context; + +use crate::Result; + +use super::Exit; + +#[mutants::skip] // hard to exercise the ESRCH edge case +pub(super) fn terminate_child(child: &mut Child) -> Result<()> { + child.kill().context("Kill child") +} + +#[mutants::skip] +pub(super) fn configure_command(_command: &mut Command) {} + +impl From for Exit { + fn from(status: ExitStatus) -> Self { + if let Some(code) = status.code() { + if code == 0 { + Exit::Success + } else { + Exit::Failure(code as u32) + } + } else { + Exit::Other + } + } +} diff --git a/tests/main.rs b/tests/main.rs index 9637a2ae..fd1bd89d 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -6,8 +6,6 @@ use std::collections::HashSet; use std::env; use std::fs::{self, create_dir, read_dir, read_to_string}; use std::path::Path; -use std::thread::sleep; -use std::time::Duration; use indoc::indoc; use itertools::Itertools; @@ -18,7 +16,7 @@ use pretty_assertions::assert_eq; use tempfile::TempDir; mod util; -use util::{copy_of_testdata, copy_testdata_to, run, MAIN_BINARY, OUTER_TIMEOUT}; +use util::{copy_of_testdata, copy_testdata_to, run, OUTER_TIMEOUT}; #[test] fn incorrect_cargo_subcommand() { @@ -657,90 +655,6 @@ fn timeout_when_unmutated_tree_test_hangs() { )); } -/// If the test hangs and the user (in this case the test suite) interrupts it, then -/// the `cargo test` child should be killed. -/// -/// This is a bit hard to directly observe: the property that we really most care -/// about is that _all_ grandchild processes are also killed and nothing is left -/// behind. (On Unix, this is accomplished by use of a pgroup.) However that's a bit -/// hard to mechanically check without reading and interpreting the process tree, which -/// seems likely to be a bit annoying to do portably and without flakes. -/// (But maybe we still should?) -/// -/// An easier thing to test is that the cargo-mutants process _thinks_ it has killed -/// the children, and we can observe this in the debug log. -/// -/// In this test cargo-mutants has a very long timeout, but the test driver has a -/// short timeout, so it should kill cargo-mutants. -#[test] -#[cfg(unix)] // Should in principle work on Windows, but does not at the moment. -fn interrupt_caught_and_kills_children() { - // Test a tree that has enough tests that we'll probably kill it before it completes. - - use std::process::{Command, Stdio}; - - use nix::libc::pid_t; - use nix::sys::signal::{kill, SIGTERM}; - use nix::unistd::Pid; - - let tmp_src_dir = copy_of_testdata("well_tested"); - // We can't use `assert_cmd` `timeout` here because that sends the child a `SIGKILL`, - // which doesn't give it a chance to clean up. And, `std::process::Command` only - // has an abrupt kill. - - // Drop RUST_BACKTRACE because the traceback mentions "panic" handler functions - // and we want to check that the process does not panic. - - // Skip baseline because firstly it should already pass but more importantly - // #333 exhibited only during non-baseline scenarios. - let args = [ - MAIN_BINARY.to_str().unwrap(), - "mutants", - "--timeout=300", - "--baseline=skip", - "--level=trace", - ]; - - println!("Running: {args:?}"); - let mut child = Command::new(args[0]) - .args(&args[1..]) - .current_dir(&tmp_src_dir) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .env_remove("RUST_BACKTRACE") - .spawn() - .expect("spawn child"); - - sleep(Duration::from_secs(2)); // Let it get started - assert!( - child.try_wait().expect("try to wait for child").is_none(), - "child exited early" - ); - - println!("Sending SIGTERM to cargo-mutants..."); - kill(Pid::from_raw(child.id() as pid_t), SIGTERM).expect("send SIGTERM"); - - println!("Wait for cargo-mutants to exit..."); - let output = child - .wait_with_output() - .expect("wait for child after SIGTERM"); - - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - println!("stdout:\n{stdout}"); - println!("stderr:\n{stderr}"); - - assert!(stderr.contains("interrupted")); - // We used to look here for some other trace messages about how it's interrupted, but - // that seems to be racy: sometimes the parent sees the child interrupted before it - // emits these messages? Anyhow, it's not essential. - - // This shouldn't cause a panic though (#333) - assert!(!stderr.contains("panic")); - // And we don't want duplicate messages about workers failing. - assert!(!stderr.contains("Worker thread failed")); -} - /// A tree that hangs when some functions are mutated does not hang cargo-mutants /// overall, because we impose a timeout. The timeout can be specified on the /// command line, with decimal seconds. diff --git a/tests/unix.rs b/tests/unix.rs new file mode 100644 index 00000000..84157a9f --- /dev/null +++ b/tests/unix.rs @@ -0,0 +1,91 @@ +#![cfg(unix)] + +use std::thread::sleep; +use std::time::Duration; + +mod util; +use util::{copy_of_testdata, MAIN_BINARY}; + +/// If the test hangs and the user (in this case the test suite) interrupts it, then +/// the `cargo test` child should be killed. +/// +/// This is a bit hard to directly observe: the property that we really most care +/// about is that _all_ grandchild processes are also killed and nothing is left +/// behind. (On Unix, this is accomplished by use of a pgroup.) However that's a bit +/// hard to mechanically check without reading and interpreting the process tree, which +/// seems likely to be a bit annoying to do portably and without flakes. +/// (But maybe we still should?) +/// +/// An easier thing to test is that the cargo-mutants process _thinks_ it has killed +/// the children, and we can observe this in the debug log. +/// +/// In this test cargo-mutants has a very long timeout, but the test driver has a +/// short timeout, so it should kill cargo-mutants. +// TODO: An equivalent test on Windows? +#[test] +fn interrupt_caught_and_kills_children() { + // Test a tree that has enough tests that we'll probably kill it before it completes. + + use std::process::{Command, Stdio}; + + use nix::libc::pid_t; + use nix::sys::signal::{kill, SIGTERM}; + use nix::unistd::Pid; + + let tmp_src_dir = copy_of_testdata("well_tested"); + // We can't use `assert_cmd` `timeout` here because that sends the child a `SIGKILL`, + // which doesn't give it a chance to clean up. And, `std::process::Command` only + // has an abrupt kill. + + // Drop RUST_BACKTRACE because the traceback mentions "panic" handler functions + // and we want to check that the process does not panic. + + // Skip baseline because firstly it should already pass but more importantly + // #333 exhibited only during non-baseline scenarios. + let args = [ + MAIN_BINARY.to_str().unwrap(), + "mutants", + "--timeout=300", + "--baseline=skip", + "--level=trace", + ]; + + println!("Running: {args:?}"); + let mut child = Command::new(args[0]) + .args(&args[1..]) + .current_dir(&tmp_src_dir) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .env_remove("RUST_BACKTRACE") + .spawn() + .expect("spawn child"); + + sleep(Duration::from_secs(2)); // Let it get started + assert!( + child.try_wait().expect("try to wait for child").is_none(), + "child exited early" + ); + + println!("Sending SIGTERM to cargo-mutants..."); + kill(Pid::from_raw(child.id() as pid_t), SIGTERM).expect("send SIGTERM"); + + println!("Wait for cargo-mutants to exit..."); + let output = child + .wait_with_output() + .expect("wait for child after SIGTERM"); + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + println!("stdout:\n{stdout}"); + println!("stderr:\n{stderr}"); + + assert!(stderr.contains("interrupted")); + // We used to look here for some other trace messages about how it's interrupted, but + // that seems to be racy: sometimes the parent sees the child interrupted before it + // emits these messages? Anyhow, it's not essential. + + // This shouldn't cause a panic though (#333) + assert!(!stderr.contains("panic")); + // And we don't want duplicate messages about workers failing. + assert!(!stderr.contains("Worker thread failed")); +}