-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Johanna
committed
Sep 13, 2024
1 parent
6bd1e3f
commit 32106ff
Showing
9 changed files
with
164 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
use predicates::prelude::*; | ||
use snapbox::cmd::{cargo_bin, Command}; | ||
use std::path::Path; | ||
|
||
pub mod common; | ||
use crate::common::*; | ||
use sketchlib::io::*; | ||
|
||
use sketchlib::multisketch::MultiSketch; | ||
|
||
use std::fs::File; | ||
use std::io::{BufRead, BufReader}; | ||
|
||
#[cfg(test)] | ||
|
||
mod tests { | ||
use super::*; | ||
use sketchlib::io; | ||
|
||
fn compare_distances(a: f64, b: f64, rel_tol: f64) -> bool { | ||
let abs_diff = (a - b).abs(); | ||
let larger = a.abs().max(b.abs()); | ||
abs_diff <= larger * rel_tol | ||
} | ||
|
||
#[test] | ||
fn test_distances() { | ||
let sandbox = TestSetup::setup(); | ||
|
||
//Test 1: One short sequence vs one short seqeunce (1 SNP apart) | ||
|
||
//single k-mer | ||
//Test 2: Jaccard Single k-mer | ||
//Test 3: ANI Single k-mer | ||
|
||
//core and accessory distance (4 kmer sizes) | ||
//Test 4: | ||
|
||
//Test 1 begin ------------- | ||
Command::new(cargo_bin("sketchlib")) | ||
.current_dir(sandbox.get_wd()) | ||
.arg("sketch") | ||
.args(&["--k-vals", "5"]) | ||
.arg("--seq-files") | ||
.arg(sandbox.file_string("short_sequence.fa", TestDir::Input)) | ||
.arg("-v") | ||
.args(&["-o", "test1_part1"]) | ||
.assert() | ||
.success(); | ||
|
||
Command::new(cargo_bin("sketchlib")) | ||
.current_dir(sandbox.get_wd()) | ||
.arg("sketch") | ||
.args(&["--k-vals", "5"]) | ||
.arg("--seq-files") | ||
.arg(sandbox.file_string("short_sequence_SNP.fa", TestDir::Input)) | ||
.arg("-v") | ||
.args(&["-o", "test1_part2"]) | ||
.assert() | ||
.success(); | ||
|
||
Command::new(cargo_bin("sketchlib")) | ||
.current_dir(sandbox.get_wd()) | ||
.arg("dist") | ||
.arg("test1_part1") | ||
.arg("test1_part2") | ||
.args(&["-k", "5"]) | ||
.args(&["-o", "short_sequence_dist"]) | ||
.arg("-v") | ||
.assert() | ||
.success(); | ||
|
||
//Test 2 begin ------------- | ||
Command::new(cargo_bin("sketchlib")) | ||
.current_dir(sandbox.get_wd()) | ||
.arg("sketch") | ||
.args(&["--k-vals", "17"]) | ||
.arg("--seq-files") | ||
.arg(sandbox.file_string("14412_3#82.contigs_velvet.fa.gz", TestDir::Input)) | ||
.arg("-v") | ||
.args(&["-o", "test2_part1"]) | ||
.assert() | ||
.success(); | ||
|
||
// removed second to last contigs which is 3610 bps | ||
Command::new(cargo_bin("sketchlib")) | ||
.current_dir(sandbox.get_wd()) | ||
.arg("sketch") | ||
.args(&["--k-vals", "17"]) | ||
.arg("--seq-files") | ||
.arg(sandbox.file_string("14412_3#82.contigs_velvet_removed_block.fa.gz", TestDir::Input)) | ||
.arg("-v") | ||
.args(&["-o", "test2_part2"]) | ||
.assert() | ||
.success(); | ||
|
||
|
||
//Test 3 begin ------------- | ||
// removed second to last contigs which is 3610 bps | ||
Command::new(cargo_bin("sketchlib")) | ||
.current_dir(sandbox.get_wd()) | ||
.arg("sketch") | ||
.args(&["--k-vals", "17"]) | ||
.arg("--seq-files") | ||
.arg(sandbox.file_string("14412_3#82.contigs_velvet_removed_block.fa.gz", TestDir::Input)) | ||
.arg("-v") | ||
.args(&["-o", "test3_one_kmer"]) | ||
.assert() | ||
.success(); | ||
|
||
// Command::new(cargo_bin("sketchlib")) | ||
// .current_dir(sandbox.get_wd()) | ||
// .arg("sketch") | ||
// .args(&["--k-vals", "17"]) | ||
// .arg("--seq-files") | ||
// .arg(sandbox.file_string("14412_3#82.contigs_velvet.fa.gz", TestDir::Input)) | ||
// .arg(sandbox.file_string("14412_3#84.contigs_velvet.fa.gz", TestDir::Input)) | ||
// .arg(sandbox.file_string("R6.fa.gz", TestDir::Input)) | ||
// .arg(sandbox.file_string("TIGR4.fa.gz", TestDir::Input)) | ||
// .arg("-v") | ||
// .args(&["-o", "test3_three_kmer"]) | ||
// .assert() | ||
// .success(); | ||
|
||
// Test 1: Tests single SNP in short sequence | ||
let c_sketchlib_distance = 0.753806; | ||
|
||
// Read the output file | ||
let file = File::open(sandbox.file_string("short_sequence_dist", TestDir::Output)) | ||
.expect("Failed to open output file"); | ||
let reader = BufReader::new(file); | ||
|
||
// Parse the last number from the file | ||
let mut parsed_distance = None; | ||
for line in reader.lines() { | ||
let line = line.expect("Failed to read line"); | ||
let parts: Vec<&str> = line.split_whitespace().collect(); | ||
if parts.len() == 3 { | ||
if let Ok(distance) = parts[2].parse::<f64>() { | ||
parsed_distance = Some(distance); | ||
} | ||
} | ||
} | ||
|
||
// TEST 1 fails because the distances are off | ||
// // Check if we found a valid distance | ||
// if let Some(distance) = parsed_distance { | ||
// // Compare the parsed distance with the expected distance, allowing 1% difference | ||
// assert!( | ||
// compare_distances(distance, c_sketchlib_distance, 0.01), | ||
// "Distance {} is not within 1% of expected {}", | ||
// distance, | ||
// c_sketchlib_distance | ||
// ); | ||
// } else { | ||
// panic!("Failed to parse distance from output file"); | ||
// } | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
> short_seq_SNP | ||
CTAGGGCCCTTTCCCGGATATAAACGCCAGGTTGAATCCGCATTTGGAGG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
> short_seq_no_SNP | ||
CTAGGGCCCTTTCCCGGATAAAAACGCCAGGTTGAATCCGCATTTGGAGG |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.