sourcefrog · cl3joly · Nov 29, 2023 · cl3joly · Dec 1, 2023 · sourcefrog
diff --git a/src/incremental.rs b/src/incremental.rs
@@ -0,0 +1,44 @@
+// Copyright 2023 Paxos
+
+//! Logic for incremantal runs
+use crate::{
+    mutate::{Mutant, MutantHash},
+    options::Options,
+    output::{PositiveOutcome, OUTDIR_NAME, POSITIVE_OUTCOMES_FILE},
+};
+use anyhow::{anyhow, Result};
+use camino::{Utf8Path, Utf8PathBuf};
+use std::{collections::HashSet, fs};
+
+pub fn filter_by_last_positive_outcomes(
+    mutants: Vec<Mutant>,
+    dir: &Utf8PathBuf,
+    options: &Options,
+) -> (Option<Vec<PositiveOutcome>>, Vec<Mutant>) {
+    let read_path: &Utf8Path = options.output_in_dir.as_ref().map_or(dir, |p| p.as_path());
+    // TODO: add logging here for error cases
+    let last_positive_outcomes = match read_last_positive_outcomes(read_path) {
+        Ok(outcomes) => Some(outcomes),
+        Err(_) => None,
+    };
+    // if last_positive_outcomes is none the hash set will be empty thereby allowing all mutants to be considered
+    let existing_mutants: HashSet<MutantHash> = last_positive_outcomes
+        .iter()
+        .flatten()
+        .map(|o| o.mutant_hash())
+        .collect();
+    let mutants = mutants
+        .into_iter()
+        .filter(|m| !existing_mutants.contains(&m.calculate_hash()))
+        .collect();
+    (last_positive_outcomes, mutants)
+}
+
+fn read_last_positive_outcomes(read_path: &Utf8Path) -> Result<Vec<PositiveOutcome>> {
+    let path = read_path.join(OUTDIR_NAME).join(POSITIVE_OUTCOMES_FILE);
+    fs::read_to_string(path.clone())
+        .map(|contents| serde_json::from_str(&contents).map_err(|e| anyhow!("{}", e)))
+        // If we can’t read the file, we assume that it doesn’t exist and we return an empty list.
+        // Later, the file will get written and any error will be surfaced to the user.
+        .unwrap_or(Ok(vec![]))
+}
diff --git a/src/lab.rs b/src/lab.rs
@@ -16,7 +16,7 @@ use tracing::{debug, debug_span, error, info, trace};
 use crate::cargo::run_cargo;
 use crate::console::Console;
 use crate::outcome::{LabOutcome, Phase, ScenarioOutcome};
-use crate::output::OutputDir;
+use crate::output::{OutputDir, PositiveOutcome};
 use crate::package::Package;
 use crate::*;
 
@@ -29,13 +29,14 @@ pub fn test_mutants(
     workspace_dir: &Utf8Path,
     options: Options,
     console: &Console,
+    last_positive_outcomes: Option<Vec<PositiveOutcome>>,
 ) -> Result<LabOutcome> {
     let start_time = Instant::now();
     let output_in_dir: &Utf8Path = options
         .output_in_dir
         .as_ref()
         .map_or(workspace_dir, |p| p.as_path());
-    let output_dir = OutputDir::new(output_in_dir)?;
+    let output_dir = OutputDir::new(output_in_dir, last_positive_outcomes)?;
     console.set_debug_log(output_dir.open_debug_log()?);
 
     if options.shuffle {

diff --git a/src/main.rs b/src/main.rs
@@ -10,6 +10,7 @@ mod copy_tree;
 mod exit_code;
 mod fnvalue;
 mod in_diff;
+mod incremental;
 mod interrupt;
 mod lab;
 mod list;
@@ -192,6 +193,10 @@ struct Args {
     #[arg(long, short = 'D')]
     in_diff: Option<Utf8PathBuf>,
 
+    /// Incremental run, testing only the mutants that weren’t caught in the previous run
+    #[arg(long, short = 'i')]
+    incremental: bool,
+
     /// only test mutants from these packages.
     #[arg(id = "package", long, short = 'p')]
     mutate_packages: Vec<String>,
@@ -297,10 +302,21 @@ fn main() -> Result<()> {
             &read_to_string(in_diff).context("Failed to read filter diff")?,
         )?;
     }
+    let mut last_positive_outcomes = None;
+    if args.incremental {
+        (last_positive_outcomes, mutants) =
+            incremental::filter_by_last_positive_outcomes(mutants, &workspace.dir, &options);
+    }
     if args.list {
         list_mutants(FmtToIoWrite::new(io::stdout()), &mutants, &options)?;
     } else {
-        let lab_outcome = test_mutants(mutants, &workspace.dir, options, &console)?;
+        let lab_outcome = test_mutants(
+            mutants,
+            &workspace.dir,
+            options,
+            &console,
+            last_positive_outcomes,
+        )?;
         exit(lab_outcome.exit_code());
     }
     Ok(())

diff --git a/src/mutate.rs b/src/mutate.rs
@@ -4,6 +4,9 @@
 
 use std::fmt;
 use std::fs;
+use std::hash::Hash;
+use std::hash::Hasher;
+use std::hash::SipHasher;
 use std::sync::Arc;
 
 use anyhow::{ensure, Context, Result};
@@ -19,7 +22,7 @@ use crate::span::Span;
 use crate::MUTATION_MARKER_COMMENT;
 
 /// Various broad categories of mutants.
-#[derive(Clone, Eq, PartialEq, Debug, Serialize)]
+#[derive(Clone, Eq, PartialEq, Debug, Serialize, Hash)]
 pub enum Genre {
     /// Replace the body of a function with a fixed value.
     FnValue,
@@ -28,7 +31,7 @@ pub enum Genre {
 }
 
 /// A mutation applied to source code.
-#[derive(Clone, Eq, PartialEq)]
+#[derive(Clone, Eq, Hash, PartialEq)]
 pub struct Mutant {
     /// Which file is being mutated.
     pub source_file: Arc<SourceFile>,
@@ -53,7 +56,7 @@ pub struct Mutant {
 /// The function containing a mutant.
 ///
 /// This is used for both mutations of the whole function, and smaller mutations within it.
-#[derive(Eq, PartialEq, Debug, Serialize)]
+#[derive(Eq, PartialEq, Debug, Hash, Serialize)]
 pub struct Function {
     /// The function that's being mutated.
     pub function_name: String,
@@ -198,8 +201,19 @@ impl Mutant {
             self.span.start.line,
         )
     }
+
+    pub fn calculate_hash(&self) -> MutantHash {
+        // We need the hashes to be stable across restarts, so that they can be serialized
+        // consistently.
+        let mut s = SipHasher::new();
+        self.hash(&mut s);
+        s.finish()
+    }
 }
 
+/// A hash of a mutant, used to identify it across runs.
+pub type MutantHash = u64;
+
 impl fmt::Debug for Mutant {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         // Custom implementation to show spans more concisely

diff --git a/src/outcome.rs b/src/outcome.rs
@@ -9,6 +9,7 @@ use std::time::Instant;
 
 use anyhow::Context;
 use serde::ser::SerializeStruct;
+use serde::Deserialize;
 use serde::Serialize;
 use serde::Serializer;
 
@@ -294,7 +295,7 @@ impl Serialize for PhaseResult {
 }
 
 /// Overall summary outcome for one mutant.
-#[derive(Debug, Clone, Eq, PartialEq, Serialize, Hash)]
+#[derive(Debug, Deserialize, Clone, Eq, PartialEq, Serialize, Hash)]
 pub enum SummaryOutcome {
     Success,
     CaughtMutant,

diff --git a/src/output.rs b/src/output.rs
@@ -12,16 +12,19 @@ use anyhow::{Context, Result};
 use camino::Utf8Path;
 use fs2::FileExt;
 use path_slash::PathExt;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;
 use tracing::info;
 
+use crate::mutate::MutantHash;
 use crate::outcome::{LabOutcome, SummaryOutcome};
 use crate::*;
 
-const OUTDIR_NAME: &str = "mutants.out";
-const ROTATED_NAME: &str = "mutants.out.old";
+pub const OUTDIR_NAME: &str = "mutants.out";
+pub const ROTATED_NAME: &str = "mutants.out.old";
+pub const OUTCOMES_FILE: &str = "outcomes.json";
+pub const POSITIVE_OUTCOMES_FILE: &str = "positive_outcomes.json";
 const LOCK_JSON: &str = "lock.json";
 const LOCK_POLL: Duration = Duration::from_millis(100);
 
@@ -98,6 +101,7 @@ pub struct OutputDir {
     /// A file holding a list of mutants where testing timed out, as text, one per line.
     timeout_list: File,
     unviable_list: File,
+    last_positive_outcomes: Option<Vec<PositiveOutcome>>,
     /// The accumulated overall lab outcome.
     pub lab_outcome: LabOutcome,
 }
@@ -113,7 +117,10 @@ impl OutputDir {
     ///
     /// If the directory already exists and `lock.json` exists and is locked, this waits for
     /// the lock to be released. The returned `OutputDir` holds a lock for its lifetime.
-    pub fn new(in_dir: &Utf8Path) -> Result<OutputDir> {
+    pub fn new(
+        in_dir: &Utf8Path,
+        last_positive_outcomes: Option<Vec<PositiveOutcome>>,
+    ) -> Result<OutputDir> {
         if !in_dir.exists() {
             fs::create_dir(in_dir).context("create output parent directory {in_dir:?}")?;
         }
@@ -161,6 +168,7 @@ impl OutputDir {
             caught_list,
             timeout_list,
             unviable_list,
+            last_positive_outcomes,
         })
     }
 
@@ -178,21 +186,45 @@ impl OutputDir {
         &self.path
     }
 
+    /// Save positive outcomes for incremental runs
+    ///
+    /// Called multiple times as the lab runs.
+    pub fn maybe_write_positive_outcomes(&self) -> Result<()> {
+        if let Some(last_positive_outcomes) = &self.last_positive_outcomes {
+            let positive_outcomes = self
+                .lab_outcome
+                .outcomes
+                .iter()
+                .filter_map(|o: &ScenarioOutcome| PositiveOutcome::try_from(o).ok())
+                .chain(last_positive_outcomes.iter().cloned())
+                .collect::<Vec<PositiveOutcome>>();
+
+            serde_json::to_writer_pretty(
+                BufWriter::new(File::create(self.path.join(POSITIVE_OUTCOMES_FILE))?),
+                &positive_outcomes,
+            )
+            .context(format!("write {}", POSITIVE_OUTCOMES_FILE))
+        } else {
+            Ok(())
+        }
+    }
+
     /// Update the state of the overall lab.
     ///
     /// Called multiple times as the lab runs.
     pub fn write_lab_outcome(&self) -> Result<()> {
         serde_json::to_writer_pretty(
-            BufWriter::new(File::create(self.path.join("outcomes.json"))?),
+            BufWriter::new(File::create(self.path.join(OUTCOMES_FILE))?),
             &self.lab_outcome,
         )
-        .context("write outcomes.json")
+        .context(format!("write {}", OUTCOMES_FILE))
     }
 
     /// Add the result of testing one scenario.
     pub fn add_scenario_outcome(&mut self, scenario_outcome: &ScenarioOutcome) -> Result<()> {
         self.lab_outcome.add(scenario_outcome.to_owned());
         self.write_lab_outcome()?;
+        self.maybe_write_positive_outcomes()?;
         let scenario = &scenario_outcome.scenario;
         if let Scenario::Mutant(mutant) = scenario {
             let file = match scenario_outcome.summary() {
@@ -281,7 +313,7 @@ mod test {
         let tmp = minimal_source_tree();
         let tmp_path: &Utf8Path = tmp.path().try_into().unwrap();
         let workspace = Workspace::open(tmp_path).unwrap();
-        let output_dir = OutputDir::new(&workspace.dir).unwrap();
+        let output_dir = OutputDir::new(&workspace.dir, None).unwrap();
         assert_eq!(
             list_recursive(tmp.path()),
             &[
@@ -309,7 +341,7 @@ mod test {
         let temp_dir_path = Utf8Path::from_path(temp_dir.path()).unwrap();
 
         // Create an initial output dir with one log.
-        let output_dir = OutputDir::new(temp_dir_path).unwrap();
+        let output_dir = OutputDir::new(temp_dir_path, None).unwrap();
         output_dir.create_log(&Scenario::Baseline).unwrap();
         assert!(temp_dir
             .path()
@@ -318,7 +350,7 @@ mod test {
         drop(output_dir); // release the lock.
 
         // The second time we create it in the same directory, the old one is moved away.
-        let output_dir = OutputDir::new(temp_dir_path).unwrap();
+        let output_dir = OutputDir::new(temp_dir_path, None).unwrap();
         output_dir.create_log(&Scenario::Baseline).unwrap();
         assert!(temp_dir
             .path()
@@ -331,7 +363,7 @@ mod test {
         drop(output_dir);
 
         // The third time (and later), the .old directory is removed.
-        let output_dir = OutputDir::new(temp_dir_path).unwrap();
+        let output_dir = OutputDir::new(temp_dir_path, None).unwrap();
         output_dir.create_log(&Scenario::Baseline).unwrap();
         assert!(temp_dir
             .path()
@@ -347,3 +379,47 @@ mod test {
             .is_file());
     }
 }
+
+/// Caught and unviable scenario outcome
+#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+pub struct PositiveOutcome {
+    pub mutant_hash: MutantHash,
+    /// This is always `Unviable` or `Caught`, because other outcomes aren’t psotive outcomes
+    pub summary: SummaryOutcome,
+}
+
+impl PositiveOutcome {
+    fn new(mutant: &Mutant, summary: SummaryOutcome) -> Result<Self> {
+        match summary {
+            SummaryOutcome::CaughtMutant | SummaryOutcome::Unviable => Ok(Self {
+                mutant_hash: mutant.calculate_hash(),
+                summary,
+            }),
+            _ => Err(anyhow::anyhow!(
+                "outcome {:?} isn’t a positive outcome",
+                summary
+            )),
+        }
+    }
+
+    /// Gets the hash of the associated mutant
+    pub fn mutant_hash(&self) -> MutantHash {
+        self.mutant_hash
+    }
+}
+
+impl TryFrom<&ScenarioOutcome> for PositiveOutcome {
+    type Error = anyhow::Error;
+
+    fn try_from(scenario_outcome: &ScenarioOutcome) -> Result<Self> {
+        let mutant = match &scenario_outcome.scenario {
+            Scenario::Mutant(mutant) => mutant,
+            Scenario::Baseline => {
+                return Err(anyhow::anyhow!(
+                    "baseline can’t be converted to positive outcome"
+                ))
+            }
+        };
+        Self::new(mutant, scenario_outcome.summary())
+    }
+}