Skip to content

Commit

Permalink
update nanoq version; header option [#29]
Browse files Browse the repository at this point in the history
  • Loading branch information
esteinig committed Feb 27, 2022
2 parents 598872e + 5509020 commit b78c338
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ jobs:
- name: Run cargo-tarpaulin
uses: actions-rs/tarpaulin@v0.1
with:
version: '0.18.0'
version: '0.19.1'
timeout: '240'
args: '-- --test-threads 1'

Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Ultra-fast quality control and summary reports for nanopore reads
## Performance

`Nanoq` is as fast as `seqtk-fqchk` for summary statistics of small datasets (100,000 reads, + computes nanopore quality scores) and slightly faster on large datasets (3.5 million reads, 1.3x - 1.5x). In `fast` mode (no quality scores), `nanoq` is (~2-3x) faster than `rust-bio-tools` and `seqkit stats` for summary statistics and faster than other commonly used summary reporters (up to 442x) and read filtering methods (up to 297x). Memory consumption is consistent and tends to be lower than other tools (~5-10x).
`Nanoq` is as fast as `seqtk-fqchk` for summary statistics of small datasets (e.g. Zymo - 100,000 reads) and slightly faster on large datasets (e.g. Zymo - 3.5 million reads, 1.3x - 1.5x). In `fast` mode (no quality scores), `nanoq` is (~2-3x) faster than `rust-bio-tools` and `seqkit stats` for summary statistics and faster than other commonly used summary reporters (up to 442x) and read filtering methods (up to 297x). Memory consumption is consistent and tends to be lower than other tools (~5-10x).

## Tests

Expand Down Expand Up @@ -113,6 +113,8 @@ Read qualities may be excluded from filters and statistics to speed up read iter
nanoq -i test.fq.gz -f -s
```

> :warning: When using fast mode `-f` read quality scores are not computed
`Nanoq` can be used to check on active sequencing runs and barcoded samples.

```bash
Expand Down
8 changes: 6 additions & 2 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,23 @@ pub struct Cli {
#[structopt(short = "w", long, value_name = "FLOAT", default_value = "0")]
pub max_qual: f32,

/// Verbose output statistics [multiple up to -vvv]
/// Verbose output statistics [multiple, up to -vvv]
#[structopt(
short,
long,
parse(from_occurrences = parse_verbosity)
)]
pub verbose: u64,

/// Header for summary output
#[structopt(short = "H", long)]
pub header: bool,

/// Number of top reads in verbose summary.
#[structopt(short, long, value_name = "INT", default_value = "5")]
pub top: usize,

/// Summary statistics report
/// Summary statistics report.
#[structopt(short, long)]
pub stats: bool,

Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn main() -> Result<()> {
let mut read_set = ReadSet::new(read_lengths, read_qualities);

read_set
.summary(&cli.verbose, cli.top)
.summary(&cli.verbose, cli.top, cli.header)
.context("unable to get summary")?;

Ok(())
Expand Down
54 changes: 53 additions & 1 deletion src/needlecast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,17 +256,69 @@ mod tests {
}

#[test]
fn needlecast_filter_length_fq_ok() {
fn needlecast_filter_max_fq_ok() {
use structopt::StructOpt;

let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_ok.fq", "-o", "/dev/null"]);
let mut caster = NeedleCast::new(&cli);
let (read_lengths, read_quals) = caster.filter(0, 3, 0.0, 0.0).unwrap();

assert_eq!(read_lengths, vec![]);
assert_eq!(read_quals, vec![]);
}

#[test]
fn needlecast_filter_length_fq_ok() {
use structopt::StructOpt;

let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_len.fq", "-o", "/dev/null"]);
let mut caster = NeedleCast::new(&cli);
let (read_lengths, read_quals) = caster.filter_length(0, 0).unwrap();

assert_eq!(read_lengths, vec![4, 8]);
assert_eq!(read_quals, vec![]);
}

#[test]
fn needlecast_filter_length_max_fq_ok() {
use structopt::StructOpt;

let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_len.fq", "-o", "/dev/null"]);

let mut caster = NeedleCast::new(&cli);
let (read_lengths, read_quals) = caster.filter_length(0, 3).unwrap();

assert_eq!(read_lengths, vec![]);
assert_eq!(read_quals, vec![]);

// NeedleCast struct has to be initiated again to reset filter length parameters
let mut caster = NeedleCast::new(&cli);
let (read_lengths, read_quals) = caster.filter_length(0, 5).unwrap();

assert_eq!(read_lengths, vec![4]);
assert_eq!(read_quals, vec![]);
}

#[test]
fn needlecast_filter_length_min_fq_ok() {
use structopt::StructOpt;

let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_len.fq", "-o", "/dev/null"]);

let mut caster = NeedleCast::new(&cli);
let (read_lengths, read_quals) = caster.filter_length(5, 0).unwrap();

assert_eq!(read_lengths, vec![8]);
assert_eq!(read_quals, vec![]);

// NeedleCast struct has to be initiated again to reset filter length parameters
let mut caster = NeedleCast::new(&cli);
let (read_lengths, read_quals) = caster.filter_length(4, 0).unwrap();

assert_eq!(read_lengths, vec![4, 8]);
assert_eq!(read_quals, vec![]);
}

#[test]
fn needlecast_filter_fa_ok() {
use structopt::StructOpt;
Expand Down
56 changes: 46 additions & 10 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,24 @@ impl ReadSet {
/// )
/// read_set.summary(0, 3);
/// ```
pub fn summary(&mut self, verbosity: &u64, top: usize) -> Result<(), UtilityError> {
pub fn summary(
&mut self,
verbosity: &u64,
top: usize,
header: bool,
) -> Result<(), UtilityError> {
let length_range = self.range_length();

match verbosity {
&0 => {
let head = match header {
true => "reads bases n50 longest shortest mean_length median_length mean_quality median_quality\n",
false => ""
};

eprintdoc! {
"{reads} {bases} {n50} {longest} {shortest} {mean} {median} {meanq:.1} {medianq:.1}\n",
"{head}{reads} {bases} {n50} {longest} {shortest} {mean} {median} {meanq:.1} {medianq:.1}\n",
head = head,
reads = self.reads(),
bases = self.bases(),
n50 = self.n50(),
Expand Down Expand Up @@ -661,12 +672,12 @@ mod tests {
read_set_odd.print_ranking(3);
read_set_odd.print_ranking(5);

read_set_odd.summary(&0, 5).unwrap();
read_set_odd.summary(&1, 5).unwrap();
read_set_odd.summary(&2, 5).unwrap();
read_set_odd.summary(&3, 5).unwrap();
read_set_odd.summary(&0, 5, false).unwrap();
read_set_odd.summary(&1, 5, false).unwrap();
read_set_odd.summary(&2, 5, false).unwrap();
read_set_odd.summary(&3, 5, false).unwrap();

let error = read_set_odd.summary(&4, 5).unwrap_err();
let error = read_set_odd.summary(&4, 5, false).unwrap_err();
assert_eq!(error, UtilityError::InvalidVerbosity("4".to_string()));
}

Expand All @@ -679,7 +690,7 @@ mod tests {

read_set_noqual.print_thresholds();
read_set_noqual.print_ranking(3);
read_set_noqual.summary(&3, 3).unwrap();
read_set_noqual.summary(&3, 3, false).unwrap();
}

#[test]
Expand All @@ -693,7 +704,7 @@ mod tests {

read_set_none.print_thresholds();
read_set_none.print_ranking(3);
read_set_none.summary(&3, 3).unwrap();
read_set_none.summary(&3, 3, false).unwrap();
}

#[test]
Expand All @@ -709,6 +720,31 @@ mod tests {

read_set_none.print_thresholds();
read_set_none.print_ranking(3);
read_set_none.summary(&3, 3).unwrap();
read_set_none.summary(&3, 3, false).unwrap();
}
// These tests are not testing for the correct stderr output,
// does not seem possible with libtest at the moment:
// * https://github.com/rust-lang/rust/issues/42474
// * https://github.com/rust-lang/rust/issues/40298
#[test]
fn summary_output_ok() {
use float_eq::float_eq;

let mut read_set_none = ReadSet::new(vec![10], vec![8.0]);
assert_eq!(read_set_none.mean_length(), 10);
assert_eq!(read_set_none.median_length(), 10);
float_eq!(read_set_none.mean_quality(), 8.0, abs <= f32::EPSILON);
float_eq!(read_set_none.median_quality(), 8.0, abs <= f32::EPSILON);
assert_eq!(read_set_none.range_length(), [10, 10]);

read_set_none.print_thresholds();
read_set_none.print_ranking(3);
read_set_none.summary(&3, 3, false).unwrap();
}

#[test]
fn summary_header_stderr_ok() {
let mut read_set_none = ReadSet::new(vec![10], vec![8.0]);
read_set_none.summary(&0, 3, true).unwrap();
}
}
8 changes: 8 additions & 0 deletions tests/cases/test_len.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@id1
ACTG
+
IIII
@id2
ACTGACTG
+
IIIIIIII

0 comments on commit b78c338

Please sign in to comment.