diff --git a/Cargo.lock b/Cargo.lock index 1bb2f11b..17cb3713 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,9 +211,11 @@ dependencies = [ "indoc", "insta", "itertools 0.12.0", + "jobserver", "lazy_static", "mutants 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", "nix 0.28.0", + "num_cpus", "nutmeg", "patch", "path-slash", @@ -707,6 +709,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.69" @@ -870,6 +881,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", +] + [[package]] name = "nutmeg" version = "0.1.4" diff --git a/Cargo.toml b/Cargo.toml index f4f63060..d54accb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,9 @@ humantime = "2.1.0" ignore = "0.4.20" indoc = "2.0.0" itertools = "0.12" +jobserver = "0.1" mutants = "0.0.3" +num_cpus ="1.16" patch = "0.7" path-slash = "0.2" quote = "1.0.35" diff --git a/NEWS.md b/NEWS.md index a5db1746..34da220f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # cargo-mutants changelog +## Unreleased + +- New: cargo-mutants starts a GNU jobserver, shared across all children, so that running multiple `--jobs` does not spawn an excessive number of compiler processes. The jobserver is on by default and can be turned off with `--jobserver false`. + ## 24.7.1 - Changed: No build timeouts by default. Previously, cargo-mutants set a default build timeout based on the baseline build, but experience showed that this would sometimes make builds flaky, because build times can be quite variable. If mutants cause builds to hang, then you can still set a timeout using `--build-timeout` or `--build-timeout-multiplier`. diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index c2035616..24dd1508 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -25,6 +25,7 @@ - [Error values](error-values.md) - [Improving performance](performance.md) - [Parallelism](parallelism.md) + - [Jobserver](jobserver.md) - [Sharding](shards.md) - [Testing code changed in a diff](in-diff.md) - [Integrations](integrations.md) diff --git a/book/src/jobserver.md b/book/src/jobserver.md new file mode 100644 index 00000000..08798f29 --- /dev/null +++ b/book/src/jobserver.md @@ -0,0 +1,12 @@ +# Jobserver + +The GNU Jobserver protocol enables a build system to limit the total number of concurrent jobs at any point in time. + +By default, cargo-mutants starts a jobserver configured to allow one job per CPU. This limit applies across all the subprocesses spawned by cargo-mutants, including all parallel jobs. This allows you to use `--jobs` to run multiple test suites in parallel, without causing excessive load on the system from running too many compiler tasks in parallel. + +`--jobserver=false` disables running the jobserver. + +`--jobserver-tasks=N` sets the number of tasks that the jobserver will allow to run concurrently. + +The Rust test framework does not currently use the jobserver protocol, so it won't affect tests, only builds. However, the jobserver can be observed by tests +and build scripts in `CARGO_MAKEFLAGS`. diff --git a/src/cargo.rs b/src/cargo.rs index c32dc30e..a0c3e704 100644 --- a/src/cargo.rs +++ b/src/cargo.rs @@ -13,8 +13,10 @@ use crate::process::{Process, ProcessStatus}; use crate::*; /// Run cargo build, check, or test. +#[allow(clippy::too_many_arguments)] // I agree it's a lot but I'm not sure wrapping in a struct would be better. pub fn run_cargo( build_dir: &BuildDir, + jobserver: &Option, packages: Option<&[&Package]>, phase: Phase, timeout: Option, @@ -33,7 +35,15 @@ pub fn run_cargo( ("INSTA_UPDATE".to_owned(), "no".to_owned()), ("INSTA_FORCE_PASS".to_owned(), "0".to_owned()), ]; - let process_status = Process::run(&argv, &env, build_dir.path(), timeout, log_file, console)?; + let process_status = Process::run( + &argv, + &env, + build_dir.path(), + timeout, + jobserver, + log_file, + console, + )?; check_interrupted()?; debug!(?process_status, elapsed = ?start.elapsed()); if let ProcessStatus::Failure(code) = process_status { @@ -147,7 +157,7 @@ fn rustflags(options: &Options) -> String { rustflags .to_str() .expect("CARGO_ENCODED_RUSTFLAGS is not valid UTF-8") - .split(|c| c == '\x1f') + .split('\x1f') .map(|s| s.to_owned()) .collect() } else if let Some(rustflags) = env::var_os("RUSTFLAGS") { diff --git a/src/lab.rs b/src/lab.rs index 959cdb23..70e78fce 100644 --- a/src/lab.rs +++ b/src/lab.rs @@ -42,6 +42,15 @@ pub fn test_mutants( .map_or(workspace_dir, |p| p.as_path()), )?; console.set_debug_log(output_dir.open_debug_log()?); + let jobserver = options + .jobserver + .then(|| { + let n_tasks = options.jobserver_tasks.unwrap_or_else(num_cpus::get); + debug!(n_tasks, "starting jobserver"); + jobserver::Client::new(n_tasks) + }) + .transpose() + .context("Start jobserver")?; if options.shuffle { fastrand::shuffle(&mut mutants); @@ -66,6 +75,7 @@ pub fn test_mutants( let outcome = test_scenario( &build_dir, &output_mutex, + &jobserver, &Scenario::Baseline, &mutant_packages, Timeouts::for_baseline(&options), @@ -133,6 +143,7 @@ pub fn test_mutants( test_scenario( &build_dir, &output_mutex, + &jobserver, &Scenario::Mutant(mutant), &[&package], timeouts, @@ -194,9 +205,11 @@ pub fn test_mutants( /// /// The [BuildDir] is passed as mutable because it's for the exclusive use of this function for the /// duration of the test. +#[allow(clippy::too_many_arguments)] // I agree it's a lot but I'm not sure wrapping in a struct would be better. fn test_scenario( build_dir: &BuildDir, output_mutex: &Mutex, + jobserver: &Option, scenario: &Scenario, test_packages: &[&Package], timeouts: Timeouts, @@ -234,6 +247,7 @@ fn test_scenario( }; let phase_result = run_cargo( build_dir, + jobserver, Some(test_packages), phase, timeout, diff --git a/src/main.rs b/src/main.rs index 18b11417..e699b95c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -207,6 +207,16 @@ pub struct Args { )] jobs: Option, + /// Use a GNU Jobserver to cap concurrency between child processes. + #[arg(long, action = ArgAction::Set, help_heading = "Execution", default_value_t = true)] + jobserver: bool, + + /// Allow this many jobserver tasks in parallel, across all child processes. + /// + /// By default, NCPUS. + #[arg(long, help_heading = "Execution")] + jobserver_tasks: Option, + /// Output json (only for --list). #[arg(long, help_heading = "Output")] json: bool, diff --git a/src/options.rs b/src/options.rs index 170851a7..11d32871 100644 --- a/src/options.rs +++ b/src/options.rs @@ -36,6 +36,12 @@ pub struct Options { /// Don't copy at all; run tests in the source directory. pub in_place: bool, + /// Run a jobserver to limit concurrency between child processes. + pub jobserver: bool, + + /// Allow this many concurrent jobs, across all child processes. None means NCPU. + pub jobserver_tasks: Option, + /// Don't delete scratch directories. pub leak_dirs: bool, @@ -215,6 +221,8 @@ impl Options { gitignore: args.gitignore, in_place: args.in_place, jobs: args.jobs, + jobserver: args.jobserver, + jobserver_tasks: args.jobserver_tasks, leak_dirs: args.leak_dirs, minimum_test_timeout, output_in_dir: args.output.clone(), @@ -415,6 +423,30 @@ mod test { assert!(!options.features.all_features); } + #[test] + fn default_jobserver_settings() { + let args = Args::parse_from(["mutants"]); + let options = Options::new(&args, &Config::default()).unwrap(); + assert!(options.jobserver); + assert_eq!(options.jobserver_tasks, None); + } + + #[test] + fn disable_jobserver() { + let args = Args::parse_from(["mutants", "--jobserver=false"]); + let options = Options::new(&args, &Config::default()).unwrap(); + assert!(!options.jobserver); + assert_eq!(options.jobserver_tasks, None); + } + + #[test] + fn jobserver_tasks() { + let args = Args::parse_from(["mutants", "--jobserver-tasks=13"]); + let options = Options::new(&args, &Config::default()).unwrap(); + assert!(options.jobserver); + assert_eq!(options.jobserver_tasks, Some(13)); + } + #[test] fn all_features_arg() { let args = Args::try_parse_from([ diff --git a/src/process.rs b/src/process.rs index 8fc66828..27b6ceee 100644 --- a/src/process.rs +++ b/src/process.rs @@ -5,6 +5,8 @@ //! On Unix, the subprocess runs as its own process group, so that any //! grandchild processes are also signalled if it's interrupted. +#![allow(clippy::option_map_unit_fn)] // I don't think it's clearer with if/let. + use std::ffi::OsStr; #[cfg(unix)] use std::os::unix::process::{CommandExt, ExitStatusExt}; @@ -39,10 +41,11 @@ impl Process { env: &[(String, String)], cwd: &Utf8Path, timeout: Option, + jobserver: &Option, log_file: &mut LogFile, console: &Console, ) -> Result { - let mut child = Process::start(argv, env, cwd, timeout, log_file)?; + let mut child = Process::start(argv, env, cwd, timeout, jobserver, log_file)?; let process_status = loop { if let Some(exit_status) = child.poll()? { break exit_status; @@ -61,6 +64,7 @@ impl Process { env: &[(String, String)], cwd: &Utf8Path, timeout: Option, + jobserver: &Option, log_file: &mut LogFile, ) -> Result { let start = Instant::now(); @@ -76,6 +80,7 @@ impl Process { .stdout(log_file.open_append()?) .stderr(log_file.open_append()?) .current_dir(cwd); + jobserver.as_ref().map(|js| js.configure(&mut child)); #[cfg(unix)] child.process_group(0); let child = child