Skip to content

Commit

Permalink
add tests for distance calculations
Browse files Browse the repository at this point in the history
  • Loading branch information
Johanna committed Sep 13, 2024
1 parent 6bd1e3f commit 32106ff
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 0 deletions.
160 changes: 160 additions & 0 deletions tests/test_files_in/distances.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
use predicates::prelude::*;
use snapbox::cmd::{cargo_bin, Command};
use std::path::Path;

pub mod common;
use crate::common::*;
use sketchlib::io::*;

use sketchlib::multisketch::MultiSketch;

use std::fs::File;
use std::io::{BufRead, BufReader};

#[cfg(test)]

mod tests {
use super::*;
use sketchlib::io;

fn compare_distances(a: f64, b: f64, rel_tol: f64) -> bool {
let abs_diff = (a - b).abs();
let larger = a.abs().max(b.abs());
abs_diff <= larger * rel_tol
}

#[test]
fn test_distances() {
let sandbox = TestSetup::setup();

//Test 1: One short sequence vs one short seqeunce (1 SNP apart)

//single k-mer
//Test 2: Jaccard Single k-mer
//Test 3: ANI Single k-mer

//core and accessory distance (4 kmer sizes)
//Test 4:

//Test 1 begin -------------
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.args(&["--k-vals", "5"])
.arg("--seq-files")
.arg(sandbox.file_string("short_sequence.fa", TestDir::Input))
.arg("-v")
.args(&["-o", "test1_part1"])
.assert()
.success();

Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.args(&["--k-vals", "5"])
.arg("--seq-files")
.arg(sandbox.file_string("short_sequence_SNP.fa", TestDir::Input))
.arg("-v")
.args(&["-o", "test1_part2"])
.assert()
.success();

Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("dist")
.arg("test1_part1")
.arg("test1_part2")
.args(&["-k", "5"])
.args(&["-o", "short_sequence_dist"])
.arg("-v")
.assert()
.success();

//Test 2 begin -------------
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.args(&["--k-vals", "17"])
.arg("--seq-files")
.arg(sandbox.file_string("14412_3#82.contigs_velvet.fa.gz", TestDir::Input))
.arg("-v")
.args(&["-o", "test2_part1"])
.assert()
.success();

// removed second to last contigs which is 3610 bps
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.args(&["--k-vals", "17"])
.arg("--seq-files")
.arg(sandbox.file_string("14412_3#82.contigs_velvet_removed_block.fa.gz", TestDir::Input))
.arg("-v")
.args(&["-o", "test2_part2"])
.assert()
.success();


//Test 3 begin -------------
// removed second to last contigs which is 3610 bps
Command::new(cargo_bin("sketchlib"))
.current_dir(sandbox.get_wd())
.arg("sketch")
.args(&["--k-vals", "17"])
.arg("--seq-files")
.arg(sandbox.file_string("14412_3#82.contigs_velvet_removed_block.fa.gz", TestDir::Input))
.arg("-v")
.args(&["-o", "test3_one_kmer"])
.assert()
.success();

// Command::new(cargo_bin("sketchlib"))
// .current_dir(sandbox.get_wd())
// .arg("sketch")
// .args(&["--k-vals", "17"])
// .arg("--seq-files")
// .arg(sandbox.file_string("14412_3#82.contigs_velvet.fa.gz", TestDir::Input))
// .arg(sandbox.file_string("14412_3#84.contigs_velvet.fa.gz", TestDir::Input))
// .arg(sandbox.file_string("R6.fa.gz", TestDir::Input))
// .arg(sandbox.file_string("TIGR4.fa.gz", TestDir::Input))
// .arg("-v")
// .args(&["-o", "test3_three_kmer"])
// .assert()
// .success();

// Test 1: Tests single SNP in short sequence
let c_sketchlib_distance = 0.753806;

// Read the output file
let file = File::open(sandbox.file_string("short_sequence_dist", TestDir::Output))
.expect("Failed to open output file");
let reader = BufReader::new(file);

// Parse the last number from the file
let mut parsed_distance = None;
for line in reader.lines() {
let line = line.expect("Failed to read line");
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() == 3 {
if let Ok(distance) = parts[2].parse::<f64>() {
parsed_distance = Some(distance);
}
}
}

// TEST 1 fails because the distances are off
// // Check if we found a valid distance
// if let Some(distance) = parsed_distance {
// // Compare the parsed distance with the expected distance, allowing 1% difference
// assert!(
// compare_distances(distance, c_sketchlib_distance, 0.01),
// "Distance {} is not within 1% of expected {}",
// distance,
// c_sketchlib_distance
// );
// } else {
// panic!("Failed to parse distance from output file");
// }

}
}
2 changes: 2 additions & 0 deletions tests/test_files_in/short_sequence.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
> short_seq_SNP
CTAGGGCCCTTTCCCGGATATAAACGCCAGGTTGAATCCGCATTTGGAGG
2 changes: 2 additions & 0 deletions tests/test_files_in/short_sequence_SNP.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
> short_seq_no_SNP
CTAGGGCCCTTTCCCGGATAAAAACGCCAGGTTGAATCCGCATTTGGAGG
Binary file removed tests/test_files_in/sketches1.skd
Binary file not shown.
Binary file removed tests/test_files_in/sketches1.skm
Binary file not shown.
Binary file removed tests/test_files_in/sketches2.skd
Binary file not shown.
Binary file removed tests/test_files_in/sketches2.skm
Binary file not shown.
Binary file removed tests/test_files_in/sketches3.skd
Binary file not shown.
Binary file removed tests/test_files_in/sketches3.skm
Binary file not shown.

0 comments on commit 32106ff

Please sign in to comment.