Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incremental runs #174

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions src/incremental.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2023 Paxos

//! Logic for incremantal runs
use crate::{
mutate::{Mutant, MutantHash},
options::Options,
output::{PositiveOutcome, OUTDIR_NAME, POSITIVE_OUTCOMES_FILE},
};
use anyhow::{anyhow, Result};
use camino::{Utf8Path, Utf8PathBuf};
use std::{collections::HashSet, fs};

pub fn filter_by_last_positive_outcomes(
mutants: Vec<Mutant>,
dir: &Utf8PathBuf,
options: &Options,
) -> (Option<Vec<PositiveOutcome>>, Vec<Mutant>) {
let read_path: &Utf8Path = options.output_in_dir.as_ref().map_or(dir, |p| p.as_path());
// TODO: add logging here for error cases
let last_positive_outcomes = match read_last_positive_outcomes(read_path) {
Ok(outcomes) => Some(outcomes),
Err(_) => None,
};
// if last_positive_outcomes is none the hash set will be empty thereby allowing all mutants to be considered
let existing_mutants: HashSet<MutantHash> = last_positive_outcomes
.iter()
.flatten()
.map(|o| o.mutant_hash())
.collect();
let mutants = mutants
.into_iter()
.filter(|m| !existing_mutants.contains(&m.calculate_hash()))
.collect();
(last_positive_outcomes, mutants)
}

fn read_last_positive_outcomes(read_path: &Utf8Path) -> Result<Vec<PositiveOutcome>> {
let path = read_path.join(OUTDIR_NAME).join(POSITIVE_OUTCOMES_FILE);
fs::read_to_string(path.clone())
.map(|contents| serde_json::from_str(&contents).map_err(|e| anyhow!("{}", e)))
// If we can’t read the file, we assume that it doesn’t exist and we return an empty list.
// Later, the file will get written and any error will be surfaced to the user.
.unwrap_or(Ok(vec![]))
}
5 changes: 3 additions & 2 deletions src/lab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use tracing::{debug, debug_span, error, info, trace};
use crate::cargo::run_cargo;
use crate::console::Console;
use crate::outcome::{LabOutcome, Phase, ScenarioOutcome};
use crate::output::OutputDir;
use crate::output::{OutputDir, PositiveOutcome};
use crate::package::Package;
use crate::*;

Expand All @@ -29,13 +29,14 @@ pub fn test_mutants(
workspace_dir: &Utf8Path,
options: Options,
console: &Console,
last_positive_outcomes: Option<Vec<PositiveOutcome>>,
) -> Result<LabOutcome> {
let start_time = Instant::now();
let output_in_dir: &Utf8Path = options
.output_in_dir
.as_ref()
.map_or(workspace_dir, |p| p.as_path());
let output_dir = OutputDir::new(output_in_dir)?;
let output_dir = OutputDir::new(output_in_dir, last_positive_outcomes)?;
console.set_debug_log(output_dir.open_debug_log()?);

if options.shuffle {
Expand Down
18 changes: 17 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod copy_tree;
mod exit_code;
mod fnvalue;
mod in_diff;
mod incremental;
mod interrupt;
mod lab;
mod list;
Expand Down Expand Up @@ -192,6 +193,10 @@ struct Args {
#[arg(long, short = 'D')]
in_diff: Option<Utf8PathBuf>,

/// Incremental run, testing only the mutants that weren’t caught in the previous run
#[arg(long, short = 'i')]
incremental: bool,

/// only test mutants from these packages.
#[arg(id = "package", long, short = 'p')]
mutate_packages: Vec<String>,
Expand Down Expand Up @@ -297,10 +302,21 @@ fn main() -> Result<()> {
&read_to_string(in_diff).context("Failed to read filter diff")?,
)?;
}
let mut last_positive_outcomes = None;
if args.incremental {
(last_positive_outcomes, mutants) =
incremental::filter_by_last_positive_outcomes(mutants, &workspace.dir, &options);
}
if args.list {
list_mutants(FmtToIoWrite::new(io::stdout()), &mutants, &options)?;
} else {
let lab_outcome = test_mutants(mutants, &workspace.dir, options, &console)?;
let lab_outcome = test_mutants(
mutants,
&workspace.dir,
options,
&console,
last_positive_outcomes,
)?;
exit(lab_outcome.exit_code());
}
Ok(())
Expand Down
20 changes: 17 additions & 3 deletions src/mutate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

use std::fmt;
use std::fs;
use std::hash::Hash;
use std::hash::Hasher;
use std::hash::SipHasher;
use std::sync::Arc;

use anyhow::{ensure, Context, Result};
Expand All @@ -19,7 +22,7 @@ use crate::span::Span;
use crate::MUTATION_MARKER_COMMENT;

/// Various broad categories of mutants.
#[derive(Clone, Eq, PartialEq, Debug, Serialize)]
#[derive(Clone, Eq, PartialEq, Debug, Serialize, Hash)]
pub enum Genre {
/// Replace the body of a function with a fixed value.
FnValue,
Expand All @@ -28,7 +31,7 @@ pub enum Genre {
}

/// A mutation applied to source code.
#[derive(Clone, Eq, PartialEq)]
#[derive(Clone, Eq, Hash, PartialEq)]
pub struct Mutant {
/// Which file is being mutated.
pub source_file: Arc<SourceFile>,
Expand All @@ -53,7 +56,7 @@ pub struct Mutant {
/// The function containing a mutant.
///
/// This is used for both mutations of the whole function, and smaller mutations within it.
#[derive(Eq, PartialEq, Debug, Serialize)]
#[derive(Eq, PartialEq, Debug, Hash, Serialize)]
pub struct Function {
/// The function that's being mutated.
pub function_name: String,
Expand Down Expand Up @@ -198,8 +201,19 @@ impl Mutant {
self.span.start.line,
)
}

pub fn calculate_hash(&self) -> MutantHash {
// We need the hashes to be stable across restarts, so that they can be serialized
// consistently.
let mut s = SipHasher::new();
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is deprecated and the warning says we should use DefaultHasher, but the doc says:

The internal algorithm is not specified, and so it and its hashes should not be relied upon over releases.
https://doc.rust-lang.org/std/collections/hash_map/struct.DefaultHasher.html

and in this case, we need the hashes to be stable since they are persisted on disk.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we could just avoid the serialized format depending on a hash algorithm: could you store them in the originally generated order or even just sorted by location and name?

self.hash(&mut s);
s.finish()
}
}

/// A hash of a mutant, used to identify it across runs.
pub type MutantHash = u64;

impl fmt::Debug for Mutant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Custom implementation to show spans more concisely
Expand Down
3 changes: 2 additions & 1 deletion src/outcome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::time::Instant;

use anyhow::Context;
use serde::ser::SerializeStruct;
use serde::Deserialize;
use serde::Serialize;
use serde::Serializer;

Expand Down Expand Up @@ -294,7 +295,7 @@ impl Serialize for PhaseResult {
}

/// Overall summary outcome for one mutant.
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Hash)]
#[derive(Debug, Deserialize, Clone, Eq, PartialEq, Serialize, Hash)]
pub enum SummaryOutcome {
Success,
CaughtMutant,
Expand Down
96 changes: 86 additions & 10 deletions src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@ use anyhow::{Context, Result};
use camino::Utf8Path;
use fs2::FileExt;
use path_slash::PathExt;
use serde::Serialize;
use serde::{Deserialize, Serialize};
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use tracing::info;

use crate::mutate::MutantHash;
use crate::outcome::{LabOutcome, SummaryOutcome};
use crate::*;

const OUTDIR_NAME: &str = "mutants.out";
const ROTATED_NAME: &str = "mutants.out.old";
pub const OUTDIR_NAME: &str = "mutants.out";
pub const ROTATED_NAME: &str = "mutants.out.old";
pub const OUTCOMES_FILE: &str = "outcomes.json";
pub const POSITIVE_OUTCOMES_FILE: &str = "positive_outcomes.json";
const LOCK_JSON: &str = "lock.json";
const LOCK_POLL: Duration = Duration::from_millis(100);

Expand Down Expand Up @@ -98,6 +101,7 @@ pub struct OutputDir {
/// A file holding a list of mutants where testing timed out, as text, one per line.
timeout_list: File,
unviable_list: File,
last_positive_outcomes: Option<Vec<PositiveOutcome>>,
/// The accumulated overall lab outcome.
pub lab_outcome: LabOutcome,
}
Expand All @@ -113,7 +117,10 @@ impl OutputDir {
///
/// If the directory already exists and `lock.json` exists and is locked, this waits for
/// the lock to be released. The returned `OutputDir` holds a lock for its lifetime.
pub fn new(in_dir: &Utf8Path) -> Result<OutputDir> {
pub fn new(
in_dir: &Utf8Path,
last_positive_outcomes: Option<Vec<PositiveOutcome>>,
) -> Result<OutputDir> {
if !in_dir.exists() {
fs::create_dir(in_dir).context("create output parent directory {in_dir:?}")?;
}
Expand Down Expand Up @@ -161,6 +168,7 @@ impl OutputDir {
caught_list,
timeout_list,
unviable_list,
last_positive_outcomes,
})
}

Expand All @@ -178,21 +186,45 @@ impl OutputDir {
&self.path
}

/// Save positive outcomes for incremental runs
///
/// Called multiple times as the lab runs.
pub fn maybe_write_positive_outcomes(&self) -> Result<()> {
if let Some(last_positive_outcomes) = &self.last_positive_outcomes {
let positive_outcomes = self
.lab_outcome
.outcomes
.iter()
.filter_map(|o: &ScenarioOutcome| PositiveOutcome::try_from(o).ok())
.chain(last_positive_outcomes.iter().cloned())
.collect::<Vec<PositiveOutcome>>();

serde_json::to_writer_pretty(
BufWriter::new(File::create(self.path.join(POSITIVE_OUTCOMES_FILE))?),
&positive_outcomes,
)
.context(format!("write {}", POSITIVE_OUTCOMES_FILE))
} else {
Ok(())
}
}

/// Update the state of the overall lab.
///
/// Called multiple times as the lab runs.
pub fn write_lab_outcome(&self) -> Result<()> {
serde_json::to_writer_pretty(
BufWriter::new(File::create(self.path.join("outcomes.json"))?),
BufWriter::new(File::create(self.path.join(OUTCOMES_FILE))?),
&self.lab_outcome,
)
.context("write outcomes.json")
.context(format!("write {}", OUTCOMES_FILE))
}

/// Add the result of testing one scenario.
pub fn add_scenario_outcome(&mut self, scenario_outcome: &ScenarioOutcome) -> Result<()> {
self.lab_outcome.add(scenario_outcome.to_owned());
self.write_lab_outcome()?;
self.maybe_write_positive_outcomes()?;
let scenario = &scenario_outcome.scenario;
if let Scenario::Mutant(mutant) = scenario {
let file = match scenario_outcome.summary() {
Expand Down Expand Up @@ -281,7 +313,7 @@ mod test {
let tmp = minimal_source_tree();
let tmp_path: &Utf8Path = tmp.path().try_into().unwrap();
let workspace = Workspace::open(tmp_path).unwrap();
let output_dir = OutputDir::new(&workspace.dir).unwrap();
let output_dir = OutputDir::new(&workspace.dir, None).unwrap();
assert_eq!(
list_recursive(tmp.path()),
&[
Expand Down Expand Up @@ -309,7 +341,7 @@ mod test {
let temp_dir_path = Utf8Path::from_path(temp_dir.path()).unwrap();

// Create an initial output dir with one log.
let output_dir = OutputDir::new(temp_dir_path).unwrap();
let output_dir = OutputDir::new(temp_dir_path, None).unwrap();
output_dir.create_log(&Scenario::Baseline).unwrap();
assert!(temp_dir
.path()
Expand All @@ -318,7 +350,7 @@ mod test {
drop(output_dir); // release the lock.

// The second time we create it in the same directory, the old one is moved away.
let output_dir = OutputDir::new(temp_dir_path).unwrap();
let output_dir = OutputDir::new(temp_dir_path, None).unwrap();
output_dir.create_log(&Scenario::Baseline).unwrap();
assert!(temp_dir
.path()
Expand All @@ -331,7 +363,7 @@ mod test {
drop(output_dir);

// The third time (and later), the .old directory is removed.
let output_dir = OutputDir::new(temp_dir_path).unwrap();
let output_dir = OutputDir::new(temp_dir_path, None).unwrap();
output_dir.create_log(&Scenario::Baseline).unwrap();
assert!(temp_dir
.path()
Expand All @@ -347,3 +379,47 @@ mod test {
.is_file());
}
}

/// Caught and unviable scenario outcome
#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
pub struct PositiveOutcome {
pub mutant_hash: MutantHash,
/// This is always `Unviable` or `Caught`, because other outcomes aren’t psotive outcomes
pub summary: SummaryOutcome,
}

impl PositiveOutcome {
fn new(mutant: &Mutant, summary: SummaryOutcome) -> Result<Self> {
match summary {
SummaryOutcome::CaughtMutant | SummaryOutcome::Unviable => Ok(Self {
mutant_hash: mutant.calculate_hash(),
summary,
}),
_ => Err(anyhow::anyhow!(
"outcome {:?} isn’t a positive outcome",
summary
)),
}
}

/// Gets the hash of the associated mutant
pub fn mutant_hash(&self) -> MutantHash {
self.mutant_hash
}
}

impl TryFrom<&ScenarioOutcome> for PositiveOutcome {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This bit is currently broken, we are working to fix it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed now

type Error = anyhow::Error;

fn try_from(scenario_outcome: &ScenarioOutcome) -> Result<Self> {
let mutant = match &scenario_outcome.scenario {
Scenario::Mutant(mutant) => mutant,
Scenario::Baseline => {
return Err(anyhow::anyhow!(
"baseline can’t be converted to positive outcome"
))
}
};
Self::new(mutant, scenario_outcome.summary())
}
}
Loading
Loading