From d9437e7082e7d8f6e52a13434e5b29996e1f29de Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Sun, 3 Mar 2024 12:48:32 -0800 Subject: [PATCH] Improved glob matching * in globs does not match path separators But, globs can match directories (and everything inside them) Split out glob.rs Add more tests --- NEWS.md | 6 + book/src/skip_files.md | 13 +- src/glob.rs | 185 ++++++++++++++++++ src/main.rs | 1 + src/options.rs | 23 +-- ..._expected_mutants_for_own_source_tree.snap | 8 +- tests/list.rs | 6 +- tests/windows.rs | 6 +- 8 files changed, 212 insertions(+), 36 deletions(-) create mode 100644 src/glob.rs diff --git a/NEWS.md b/NEWS.md index eeccd46c..40a647ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,12 @@ ## Unreleased +- Changed: In globs, `*` no longer matches path separators, only parts of a filename. For example, `src/*.rs` will now only match files directly in `src/`, not in subdirectories. To include subdirectories, use `**` as in `src/**/*.rs`. + + And, patterns that do not contain a path separator match directories at any level, and all files within them. For example, `-f db` will match `src/db.rs` and `src/db/mod.rs` and all files in `src/db/` or in `other/db`. + + This may break existing configurations but is considered a bug fix because it brings the behavior in line with other tools and allows more precise expressions. + - Changed: Minimum Rust version (to build cargo-mutants, not to use it) increased to 1.74. ## 24.2.1 diff --git a/book/src/skip_files.md b/book/src/skip_files.md index cfe083d4..84ca7d08 100644 --- a/book/src/skip_files.md +++ b/book/src/skip_files.md @@ -12,11 +12,14 @@ If any `-f` options are given, only source files that match are considered; otherwise all files are considered. This list is then further reduced by exclusions. -If the glob contains `/` (or on Windows, `\`), then it matches against the path from the root of the source -tree. For example, `src/*/*.rs` will exclude all files in subdirectories of `src`. +Globs are treated differently depending on whether they contain a path separator or not. +`/` matches the path separator on both Unix and Windows. `\\` matches the path separator on Windows and is an escape character on Unix. -If the glob does not contain a path separator, it matches against filenames -in any directory. `/` matches the path separator on both Unix and Windows. +If the glob contains a path separator then it matches against the path from the root of the source +tree. For example, `src/*/*.rs` will match (and exclude or exclude) all files in subdirectories of `src`. Matches on paths can use `**` to match zero or more directory components: `src/**/*.rs` will match all `.rs` files in `src` and its subdirectories. + +If the glob does not contain a path separator, it matches against file and directory names, in any directory. For example, `t*.rs` will match all files whose name start with `t` and ends with `.rs`, in any directory. +in any directory. `--exclude console` excludes all files within directories called "console", but not files called "console.rs". Note that the glob must contain `.rs` (or a matching wildcard) to match source files with that suffix. For example, `-f network` will match @@ -39,7 +42,7 @@ Examples: - `cargo mutants -e console.rs` -- test mutants in any file except `console.rs`. -- `cargo mutants -f src/db/*.rs` -- test mutants in any file in this directory. +- `cargo mutants -f src/db/*.rs` -- test mutants in any file in this directory. This could also be written as `-f src/db`, or (if all the source is in `src`) as `-f db`. ## Configuring filters by filename diff --git a/src/glob.rs b/src/glob.rs new file mode 100644 index 00000000..28765d8f --- /dev/null +++ b/src/glob.rs @@ -0,0 +1,185 @@ +// Copyright 2024 Martin Pool + +//! Build globsets. + +use std::borrow::Cow; + +use anyhow::Context; +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; + +use crate::Result; + +pub fn build_glob_set(globs: &[S]) -> Result> +where + S: AsRef, +{ + if globs.is_empty() { + return Ok(None); + } + let mut builder = GlobSetBuilder::new(); + for glob_str in globs { + let glob_str = glob_str.as_ref(); + let match_whole_path = if cfg!(windows) { + glob_str.contains(['/', '\\']) + } else { + glob_str.contains('/') + }; + let adjusted = if match_whole_path { + vec![Cow::Borrowed(glob_str)] + } else { + vec![ + Cow::Owned(format!("**/{glob_str}")), + Cow::Owned(format!("**/{glob_str}/**")), + ] + }; + for g in adjusted { + builder.add( + GlobBuilder::new(&g) + .literal_separator(true) // * does not match / + .build() + .with_context(|| format!("Failed to build glob from {glob_str:?}"))?, + ); + } + } + Ok(Some(builder.build().context("Failed to build glob set")?)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn empty_globs() { + assert!(build_glob_set(&[] as &[&str]) + .expect("build GlobSet") + .is_none()); + } + + #[test] + fn literal_filename_matches_anywhere() { + let set = build_glob_set(&["foo.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(set.is_match("foo.rs")); + assert!(set.is_match("src/foo.rs")); + assert!(set.is_match("src/bar/foo.rs")); + assert!(!set.is_match("src/bar/foo.rs~")); + assert!(!set.is_match("src/bar/bar.rs")); + } + + #[test] + fn filename_matches_directories_and_their_contents() { + let set = build_glob_set(&["console"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(set.is_match("console")); + assert!(set.is_match("src/console")); + assert!(set.is_match("src/bar/console")); + assert!(set.is_match("src/bar/console/mod.rs")); + assert!(set.is_match("src/console/ansi.rs")); + } + + #[test] + fn glob_without_slash_matches_filename_anywhere() { + let set = build_glob_set(&["*.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(set.is_match("foo.rs")); + assert!(set.is_match("src/foo.rs")); + assert!(set.is_match("src/bar/foo.rs")); + assert!(!set.is_match("src/bar/foo.rs~")); + assert!(set.is_match("src/bar/bar.rs")); + } + + #[test] + fn set_with_multiple_filenames() { + let set = build_glob_set(&["foo.rs", "bar.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(set.is_match("foo.rs")); + assert!(set.is_match("bar.rs")); + assert!(!set.is_match("baz.rs")); + } + + #[test] + fn glob_with_slashes_matches_whole_path() { + let set = build_glob_set(&["src/*.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(!set.is_match("foo.rs")); + assert!(set.is_match("src/foo.rs")); + assert!(!set.is_match("src/bar/foo.rs")); + assert!(!set.is_match("src/foo.rs~")); + assert!( + !set.is_match("other/src/bar.rs"), + "Glob with slashes anchors to whole path" + ); + } + + #[test] + fn starstar_at_start_of_path_matches_anywhere() { + let set = build_glob_set(&["**/foo.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(set.is_match("foo.rs")); + assert!(set.is_match("src/foo.rs")); + assert!(set.is_match("src/bar/foo.rs")); + assert!(set.is_match("some/other/src/bar/foo.rs")); + assert!(!set.is_match("src/bar/foo.rs~")); + assert!(!set.is_match("src/bar/bar.rs")); + assert!(!set.is_match("foo.rs/bar/bar.rs")); + } + + #[test] + fn starstar_within_path_matches_zero_or_more_directories() { + let set = build_glob_set(&["src/**/f*.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(!set.is_match("foo.rs"), "path must start with src"); + assert!( + set.is_match("src/foo.rs"), + "starstar can match zero directories" + ); + assert!(set.is_match("src/bar/foo.rs")); + assert!(set.is_match("src/bar/freq.rs")); + assert!( + !set.is_match("some/other/src/bar/foo.rs"), + "path must start with src" + ); + assert!(!set.is_match("src/bar/foo.rs~")); + assert!(!set.is_match("src/bar/bar.rs")); + assert!(!set.is_match("foo.rs/bar/bar.rs")); + } + + #[test] + #[cfg(unix)] + fn on_unix_backslash_is_escape() { + // weird glob but ok + let set = build_glob_set(&["src\\*.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!( + !set.is_match("src/foo.rs"), + "backslash is not a path separator on Unix" + ); + assert!( + set.is_match("src*.rs"), + "backslash escapes star (and is removed itself)" + ); + } + + #[test] + #[cfg(windows)] + fn on_windows_backslash_is_path_separator() { + let set = build_glob_set(&["src\\*.rs"]) + .expect("build GlobSet") + .expect("GlobSet should not be empty"); + assert!(set.is_match("src\\foo.rs")); + assert!(!set.is_match("src\\bar\\foo.rs")); + assert!(!set.is_match("src\\foo.rs~")); + assert!( + !set.is_match("other\\src\\bar.rs"), + "Glob with slashes anchors to whole path" + ); + } +} diff --git a/src/main.rs b/src/main.rs index 9ad4ee65..a9b4bb1d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ mod console; mod copy_tree; mod exit_code; mod fnvalue; +mod glob; mod in_diff; mod interrupt; mod lab; diff --git a/src/options.rs b/src/options.rs index 674968b9..4aef645e 100644 --- a/src/options.rs +++ b/src/options.rs @@ -9,13 +9,14 @@ use std::time::Duration; use anyhow::Context; use camino::Utf8PathBuf; -use globset::{Glob, GlobSet, GlobSetBuilder}; +use globset::GlobSet; use regex::RegexSet; use serde::Deserialize; use strum::{Display, EnumString}; use tracing::warn; use crate::config::Config; +use crate::glob::build_glob_set; use crate::*; /// Options for mutation testing, based on both command-line arguments and the @@ -243,26 +244,6 @@ fn or_slices<'a: 'c, 'b: 'c, 'c, T>(a: &'a [T], b: &'b [T]) -> &'c [T] { } } -fn build_glob_set, I: IntoIterator>( - glob_set: I, -) -> Result> { - let mut glob_set = glob_set.into_iter().peekable(); - if glob_set.peek().is_none() { - return Ok(None); - } - - let mut builder = GlobSetBuilder::new(); - for glob_str in glob_set { - let glob_str = glob_str.as_ref(); - if glob_str.contains('/') || glob_str.contains(std::path::MAIN_SEPARATOR) { - builder.add(Glob::new(glob_str)?); - } else { - builder.add(Glob::new(&format!("**/{glob_str}"))?); - } - } - Ok(Some(builder.build()?)) -} - #[cfg(test)] mod test { use std::io::Write; diff --git a/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap b/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap index 48a4b578..ab6148b5 100644 --- a/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap +++ b/src/snapshots/cargo_mutants__visit__test__expected_mutants_for_own_source_tree.snap @@ -77,6 +77,9 @@ src/fnvalue.rs: replace path_is_nonzero_unsigned -> bool with false src/fnvalue.rs: replace match_first_type_arg -> Option<&'p Type> with None src/fnvalue.rs: replace match_first_type_arg -> Option<&'p Type> with Some(&Default::default()) src/fnvalue.rs: replace == with != in match_first_type_arg +src/glob.rs: replace build_glob_set -> Result> with Ok(None) +src/glob.rs: replace build_glob_set -> Result> with Ok(Some(Default::default())) +src/glob.rs: replace build_glob_set -> Result> with Err(::anyhow::anyhow!("mutated!")) src/in_diff.rs: replace diff_filter -> Result> with Ok(vec![]) src/in_diff.rs: replace diff_filter -> Result> with Ok(vec![Default::default()]) src/in_diff.rs: replace diff_filter -> Result> with Err(::anyhow::anyhow!("mutated!")) @@ -220,10 +223,6 @@ src/options.rs: replace Colors::active_stdout -> bool with true src/options.rs: replace Colors::active_stdout -> bool with false src/options.rs: replace or_slices -> &'c[T] with Vec::leak(Vec::new()) src/options.rs: replace or_slices -> &'c[T] with Vec::leak(vec![Default::default()]) -src/options.rs: replace build_glob_set -> Result> with Ok(None) -src/options.rs: replace build_glob_set -> Result> with Ok(Some(Default::default())) -src/options.rs: replace build_glob_set -> Result> with Err(::anyhow::anyhow!("mutated!")) -src/options.rs: replace || with && in build_glob_set src/outcome.rs: replace Phase::name -> &'static str with "" src/outcome.rs: replace Phase::name -> &'static str with "xyzzy" src/outcome.rs: replace ::fmt -> fmt::Result with Ok(Default::default()) @@ -549,4 +548,3 @@ src/workspace.rs: replace || with && in should_mutate_target src/workspace.rs: replace == with != in should_mutate_target src/workspace.rs: replace locate_project -> Result with Ok(Default::default()) src/workspace.rs: replace locate_project -> Result with Err(::anyhow::anyhow!("mutated!")) - diff --git a/tests/list.rs b/tests/list.rs index 5eb7d5c5..59e3389e 100644 --- a/tests/list.rs +++ b/tests/list.rs @@ -190,7 +190,7 @@ fn list_mutants_well_tested_exclude_name_filter() { fn list_mutants_well_tested_exclude_folder_filter() { run() .arg("mutants") - .args(["--list", "--exclude", "*/module/*"]) + .args(["--list", "--exclude", "module"]) .current_dir("testdata/with_child_directories") .assert_insta("list_mutants_well_tested_exclude_folder_filter"); } @@ -202,7 +202,7 @@ fn list_mutants_well_tested_examine_and_exclude_name_filter_combined() { .args([ "--list", "--file", - "src/module/utils/*.rs", + "src/module/utils/**/*.rs", "--exclude", "nested_function.rs", ]) @@ -257,7 +257,7 @@ fn list_mutants_regex_filters_json() { fn list_mutants_well_tested_multiple_examine_and_exclude_name_filter_with_files_and_folders() { run() .arg("mutants") - .args(["--list", "--file", "module_methods.rs", "--file", "*/utils/*", "--exclude", "*/sub_utils/*", "--exclude", "nested_function.rs"]) + .args(["--list", "--file", "module_methods.rs", "--file", "utils", "--exclude", "**/sub_utils/**", "--exclude", "nested_function.rs"]) .current_dir("testdata/with_child_directories") .assert_insta("list_mutants_well_tested_multiple_examine_and_exclude_name_filter_with_files_and_folders"); } diff --git a/tests/windows.rs b/tests/windows.rs index 26649178..72951732 100644 --- a/tests/windows.rs +++ b/tests/windows.rs @@ -1,4 +1,4 @@ -// Copyright 2021-2023 Martin Pool +// Copyright 2021-2024 Martin Pool #![cfg(windows)] @@ -12,9 +12,11 @@ use util::run; /// Only on Windows, backslash can be used as a path separator in filters. #[test] fn list_mutants_well_tested_exclude_folder_containing_backslash_on_windows() { + // This could be written more simply as `--exclude module` but we want to + // test that backslash is accepted. run() .arg("mutants") - .args(["--list", "--exclude", "*\\module\\*"]) + .args(["--list", "--exclude", "**\\module\\**\\*.rs"]) .current_dir("testdata/with_child_directories") .assert() .stdout(