diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0a6afe78..3ec1bf7f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -197,6 +197,7 @@ jobs: run: > cargo mutants --no-shuffle -vV --in-diff git.diff --test-tool=${{matrix.test_tool}} --timeout=500 --build-timeout=500 + --exclude=windows.rs --exclude=console.rs - name: Archive mutants.out uses: actions/upload-artifact@v4 if: always() @@ -234,7 +235,8 @@ jobs: run: > cargo mutants --no-shuffle -vV --shard ${{ matrix.shard }}/10 --test-tool ${{ matrix.test_tool }} --baseline=skip --timeout=500 - --build-timeout=500 --in-place + --build-timeout=500 --in-place --exclude=windows.rs + --exclude=console.rs - name: Archive mutants.out uses: actions/upload-artifact@v4 if: always() diff --git a/src/process.rs b/src/process.rs index 3078ea33..439c1f67 100644 --- a/src/process.rs +++ b/src/process.rs @@ -9,16 +9,14 @@ #![allow(clippy::redundant_else)] use std::ffi::OsStr; -#[cfg(unix)] -use std::os::unix::process::{CommandExt, ExitStatusExt}; use std::process::{Child, Command, Stdio}; use std::thread::sleep; use std::time::{Duration, Instant}; -use anyhow::{bail, Context}; +use anyhow::Context; use camino::Utf8Path; use serde::Serialize; -use tracing::{debug, span, trace, warn, Level}; +use tracing::{debug, span, trace, Level}; use crate::console::Console; use crate::interrupt::check_interrupted; @@ -28,6 +26,16 @@ use crate::Result; /// How frequently to check if a subprocess finished. const WAIT_POLL_INTERVAL: Duration = Duration::from_millis(50); +#[cfg(windows)] +mod windows; +#[cfg(windows)] +use windows::{configure_command, terminate_child}; + +#[cfg(unix)] +mod unix; +#[cfg(unix)] +use unix::{configure_command, terminate_child}; + pub struct Process { child: Child, start: Instant, @@ -68,12 +76,12 @@ impl Process { scenario_output: &mut ScenarioOutput, ) -> Result { let start = Instant::now(); - let quoted_argv = cheap_shell_quote(argv); + let quoted_argv = quote_argv(argv); scenario_output.message("ed_argv)?; debug!(%quoted_argv, "start process"); let os_env = env.iter().map(|(k, v)| (OsStr::new(k), OsStr::new(v))); - let mut child = Command::new(&argv[0]); - child + let mut command = Command::new(&argv[0]); + command .args(&argv[1..]) .envs(os_env) .stdin(Stdio::null()) @@ -81,11 +89,10 @@ impl Process { .stderr(scenario_output.open_log_append()?) .current_dir(cwd); if let Some(js) = jobserver { - js.configure(&mut child); + js.configure(&mut command); } - #[cfg(unix)] - child.process_group(0); - let child = child + configure_command(&mut command); + let child = command .spawn() .with_context(|| format!("failed to spawn {}", argv.join(" ")))?; Ok(Process { @@ -107,22 +114,7 @@ impl Process { self.terminate()?; Err(e) } else if let Some(status) = self.child.try_wait()? { - if let Some(code) = status.code() { - if code == 0 { - return Ok(Some(Exit::Success)); - } else { - return Ok(Some(Exit::Failure( - code.try_into().context("Read exit code as u32")?, - ))); - } - } - #[cfg(unix)] - if let Some(signal) = status.signal() { - return Ok(Some(Exit::Signalled( - signal.try_into().context("Read signal as u8")?, - ))); - } - Ok(Some(Exit::Other)) + Ok(Some(status.into())) } else { Ok(None) } @@ -132,12 +124,12 @@ impl Process { /// /// Blocks until the subprocess is terminated and then returns the exit status. /// - /// The status might not be Timeout if this raced with a normal exit. + /// The status might not be `Timeout` if this raced with a normal exit. #[mutants::skip] // would leak processes from tests if skipped fn terminate(&mut self) -> Result<()> { let _span = span!(Level::DEBUG, "terminate_child", pid = self.child.id()).entered(); debug!("terminating child process"); - terminate_child_impl(&mut self.child)?; + terminate_child(&mut self.child)?; trace!("wait for child after termination"); match self.child.wait() { Err(err) => debug!(?err, "Failed to wait for child after termination"), @@ -147,47 +139,18 @@ impl Process { } } -#[cfg(unix)] -#[allow(unknown_lints, clippy::needless_pass_by_ref_mut)] // To match Windows -#[mutants::skip] // hard to exercise the ESRCH edge case -fn terminate_child_impl(child: &mut Child) -> Result<()> { - use nix::errno::Errno; - use nix::sys::signal::{killpg, Signal}; - - let pid = nix::unistd::Pid::from_raw(child.id().try_into().unwrap()); - match killpg(pid, Signal::SIGTERM) { - Ok(()) => Ok(()), - Err(Errno::ESRCH) => { - Ok(()) // Probably already gone - } - Err(Errno::EPERM) if cfg!(target_os = "macos") => { - Ok(()) // If the process no longer exists then macos can return EPERM (maybe?) - } - Err(errno) => { - let message = format!("failed to terminate child: {errno}"); - warn!("{}", message); - bail!(message); - } - } -} - -#[cfg(windows)] -#[mutants::skip] // hard to exercise the ESRCH edge case -fn terminate_child_impl(child: &mut Child) -> Result<()> { - child.kill().context("Kill child") -} - /// The result of running a single child process. #[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)] pub enum Exit { /// Exited with status 0. Success, /// Exited with status non-0. - Failure(u32), + Failure(i32), /// Exceeded its timeout, and killed. Timeout, /// Killed by some signal. - Signalled(u8), + #[cfg(unix)] + Signalled(i32), /// Unknown or unexpected situation. Other, } @@ -208,32 +171,45 @@ impl Exit { /// Quote an argv slice in Unix shell style. /// -/// This is not completely guaranteed, but is only for debug logs. -fn cheap_shell_quote, I: IntoIterator>(argv: I) -> String { - argv.into_iter() - .map(|s| { - s.as_ref() - .chars() - .flat_map(|c| match c { - ' ' | '\t' | '\n' | '\r' | '\\' | '\'' | '"' => vec!['\\', c], - _ => vec![c], - }) - .collect::() - }) - .collect::>() - .join(" ") +/// This isn't guaranteed to match the interpretation of a shell or to be safe. +/// It's just for debug logs. +fn quote_argv, I: IntoIterator>(argv: I) -> String { + let mut r = String::new(); + for s in argv { + if !r.is_empty() { + r.push(' '); + } + for c in s.as_ref().chars() { + match c { + '\t' => r.push_str(r"\t"), + '\n' => r.push_str(r"\n"), + '\r' => r.push_str(r"\r"), + ' ' | '\\' | '\'' | '"' => { + r.push('\\'); + r.push(c); + } + _ => r.push(c), + } + } + } + r } #[cfg(test)] mod test { - use super::cheap_shell_quote; + use super::quote_argv; #[test] fn shell_quoting() { - assert_eq!(cheap_shell_quote(["foo".to_string()]), "foo"); + assert_eq!(quote_argv(["foo".to_string()]), "foo"); + assert_eq!( + quote_argv(["foo bar", r"\blah\x", r#""quoted""#]), + r#"foo\ bar \\blah\\x \"quoted\""# + ); + assert_eq!(quote_argv([""]), ""); assert_eq!( - cheap_shell_quote(["foo bar", r"\blah\t", r#""quoted""#]), - r#"foo\ bar \\blah\\t \"quoted\""# + quote_argv(["with whitespace", "\r\n\t\t"]), + r"with\ whitespace \r\n\t\t" ); } } diff --git a/src/process/unix.rs b/src/process/unix.rs new file mode 100644 index 00000000..d5f52aac --- /dev/null +++ b/src/process/unix.rs @@ -0,0 +1,54 @@ +use std::os::unix::process::{CommandExt, ExitStatusExt}; +use std::process::{Child, Command, ExitStatus}; + +use anyhow::bail; +use nix::errno::Errno; +use nix::sys::signal::{killpg, Signal}; +use nix::unistd::Pid; +use tracing::warn; + +use crate::Result; + +use super::Exit; + +#[allow(unknown_lints, clippy::needless_pass_by_ref_mut)] // To match Windows +#[mutants::skip] // hard to exercise the ESRCH edge case +pub(super) fn terminate_child(child: &mut Child) -> Result<()> { + let pid = Pid::from_raw(child.id().try_into().unwrap()); + match killpg(pid, Signal::SIGTERM) { + Ok(()) => Ok(()), + Err(Errno::ESRCH) => { + Ok(()) // Probably already gone + } + Err(Errno::EPERM) if cfg!(target_os = "macos") => { + Ok(()) // If the process no longer exists then macos can return EPERM (maybe?) + } + Err(errno) => { + // TODO: Maybe strerror? + let message = format!("failed to terminate child: error {errno}"); + warn!("{}", message); + bail!(message); + } + } +} + +#[mutants::skip] +pub(super) fn configure_command(command: &mut Command) { + command.process_group(0); +} + +impl From for Exit { + fn from(status: ExitStatus) -> Self { + if let Some(code) = status.code() { + if code == 0 { + Exit::Success + } else { + Exit::Failure(code) + } + } else if let Some(signal) = status.signal() { + return Exit::Signalled(signal); + } else { + Exit::Other + } + } +} diff --git a/src/process/windows.rs b/src/process/windows.rs new file mode 100644 index 00000000..c37950fd --- /dev/null +++ b/src/process/windows.rs @@ -0,0 +1,29 @@ +use std::process::{Child, Command, ExitStatus}; + +use anyhow::Context; + +use crate::Result; + +use super::Exit; + +#[mutants::skip] // hard to exercise the ESRCH edge case +pub(super) fn terminate_child(child: &mut Child) -> Result<()> { + child.kill().context("Kill child") +} + +#[mutants::skip] +pub(super) fn configure_command(_command: &mut Command) {} + +impl From for Exit { + fn from(status: ExitStatus) -> Self { + if let Some(code) = status.code() { + if code == 0 { + Exit::Success + } else { + Exit::Failure(code) + } + } else { + Exit::Other + } + } +} diff --git a/tests/main.rs b/tests/main.rs index 9637a2ae..fd1bd89d 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -6,8 +6,6 @@ use std::collections::HashSet; use std::env; use std::fs::{self, create_dir, read_dir, read_to_string}; use std::path::Path; -use std::thread::sleep; -use std::time::Duration; use indoc::indoc; use itertools::Itertools; @@ -18,7 +16,7 @@ use pretty_assertions::assert_eq; use tempfile::TempDir; mod util; -use util::{copy_of_testdata, copy_testdata_to, run, MAIN_BINARY, OUTER_TIMEOUT}; +use util::{copy_of_testdata, copy_testdata_to, run, OUTER_TIMEOUT}; #[test] fn incorrect_cargo_subcommand() { @@ -657,90 +655,6 @@ fn timeout_when_unmutated_tree_test_hangs() { )); } -/// If the test hangs and the user (in this case the test suite) interrupts it, then -/// the `cargo test` child should be killed. -/// -/// This is a bit hard to directly observe: the property that we really most care -/// about is that _all_ grandchild processes are also killed and nothing is left -/// behind. (On Unix, this is accomplished by use of a pgroup.) However that's a bit -/// hard to mechanically check without reading and interpreting the process tree, which -/// seems likely to be a bit annoying to do portably and without flakes. -/// (But maybe we still should?) -/// -/// An easier thing to test is that the cargo-mutants process _thinks_ it has killed -/// the children, and we can observe this in the debug log. -/// -/// In this test cargo-mutants has a very long timeout, but the test driver has a -/// short timeout, so it should kill cargo-mutants. -#[test] -#[cfg(unix)] // Should in principle work on Windows, but does not at the moment. -fn interrupt_caught_and_kills_children() { - // Test a tree that has enough tests that we'll probably kill it before it completes. - - use std::process::{Command, Stdio}; - - use nix::libc::pid_t; - use nix::sys::signal::{kill, SIGTERM}; - use nix::unistd::Pid; - - let tmp_src_dir = copy_of_testdata("well_tested"); - // We can't use `assert_cmd` `timeout` here because that sends the child a `SIGKILL`, - // which doesn't give it a chance to clean up. And, `std::process::Command` only - // has an abrupt kill. - - // Drop RUST_BACKTRACE because the traceback mentions "panic" handler functions - // and we want to check that the process does not panic. - - // Skip baseline because firstly it should already pass but more importantly - // #333 exhibited only during non-baseline scenarios. - let args = [ - MAIN_BINARY.to_str().unwrap(), - "mutants", - "--timeout=300", - "--baseline=skip", - "--level=trace", - ]; - - println!("Running: {args:?}"); - let mut child = Command::new(args[0]) - .args(&args[1..]) - .current_dir(&tmp_src_dir) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .env_remove("RUST_BACKTRACE") - .spawn() - .expect("spawn child"); - - sleep(Duration::from_secs(2)); // Let it get started - assert!( - child.try_wait().expect("try to wait for child").is_none(), - "child exited early" - ); - - println!("Sending SIGTERM to cargo-mutants..."); - kill(Pid::from_raw(child.id() as pid_t), SIGTERM).expect("send SIGTERM"); - - println!("Wait for cargo-mutants to exit..."); - let output = child - .wait_with_output() - .expect("wait for child after SIGTERM"); - - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); - println!("stdout:\n{stdout}"); - println!("stderr:\n{stderr}"); - - assert!(stderr.contains("interrupted")); - // We used to look here for some other trace messages about how it's interrupted, but - // that seems to be racy: sometimes the parent sees the child interrupted before it - // emits these messages? Anyhow, it's not essential. - - // This shouldn't cause a panic though (#333) - assert!(!stderr.contains("panic")); - // And we don't want duplicate messages about workers failing. - assert!(!stderr.contains("Worker thread failed")); -} - /// A tree that hangs when some functions are mutated does not hang cargo-mutants /// overall, because we impose a timeout. The timeout can be specified on the /// command line, with decimal seconds. diff --git a/tests/unix.rs b/tests/unix.rs new file mode 100644 index 00000000..84157a9f --- /dev/null +++ b/tests/unix.rs @@ -0,0 +1,91 @@ +#![cfg(unix)] + +use std::thread::sleep; +use std::time::Duration; + +mod util; +use util::{copy_of_testdata, MAIN_BINARY}; + +/// If the test hangs and the user (in this case the test suite) interrupts it, then +/// the `cargo test` child should be killed. +/// +/// This is a bit hard to directly observe: the property that we really most care +/// about is that _all_ grandchild processes are also killed and nothing is left +/// behind. (On Unix, this is accomplished by use of a pgroup.) However that's a bit +/// hard to mechanically check without reading and interpreting the process tree, which +/// seems likely to be a bit annoying to do portably and without flakes. +/// (But maybe we still should?) +/// +/// An easier thing to test is that the cargo-mutants process _thinks_ it has killed +/// the children, and we can observe this in the debug log. +/// +/// In this test cargo-mutants has a very long timeout, but the test driver has a +/// short timeout, so it should kill cargo-mutants. +// TODO: An equivalent test on Windows? +#[test] +fn interrupt_caught_and_kills_children() { + // Test a tree that has enough tests that we'll probably kill it before it completes. + + use std::process::{Command, Stdio}; + + use nix::libc::pid_t; + use nix::sys::signal::{kill, SIGTERM}; + use nix::unistd::Pid; + + let tmp_src_dir = copy_of_testdata("well_tested"); + // We can't use `assert_cmd` `timeout` here because that sends the child a `SIGKILL`, + // which doesn't give it a chance to clean up. And, `std::process::Command` only + // has an abrupt kill. + + // Drop RUST_BACKTRACE because the traceback mentions "panic" handler functions + // and we want to check that the process does not panic. + + // Skip baseline because firstly it should already pass but more importantly + // #333 exhibited only during non-baseline scenarios. + let args = [ + MAIN_BINARY.to_str().unwrap(), + "mutants", + "--timeout=300", + "--baseline=skip", + "--level=trace", + ]; + + println!("Running: {args:?}"); + let mut child = Command::new(args[0]) + .args(&args[1..]) + .current_dir(&tmp_src_dir) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .env_remove("RUST_BACKTRACE") + .spawn() + .expect("spawn child"); + + sleep(Duration::from_secs(2)); // Let it get started + assert!( + child.try_wait().expect("try to wait for child").is_none(), + "child exited early" + ); + + println!("Sending SIGTERM to cargo-mutants..."); + kill(Pid::from_raw(child.id() as pid_t), SIGTERM).expect("send SIGTERM"); + + println!("Wait for cargo-mutants to exit..."); + let output = child + .wait_with_output() + .expect("wait for child after SIGTERM"); + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + println!("stdout:\n{stdout}"); + println!("stderr:\n{stderr}"); + + assert!(stderr.contains("interrupted")); + // We used to look here for some other trace messages about how it's interrupted, but + // that seems to be racy: sometimes the parent sees the child interrupted before it + // emits these messages? Anyhow, it's not essential. + + // This shouldn't cause a panic though (#333) + assert!(!stderr.contains("panic")); + // And we don't want duplicate messages about workers failing. + assert!(!stderr.contains("Worker thread failed")); +}