diff --git a/Cargo.lock b/Cargo.lock index e131242..682e7d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -302,7 +302,7 @@ dependencies = [ [[package]] name = "nanoq" -version = "0.8.2" +version = "0.8.5" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index 0360e97..3ae49ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nanoq" -version = "0.8.4" +version = "0.8.5" authors = ["esteinig ", "ljmcoin "] description = "Minimal but speedy quality control and summaries of nanopore reads" documentation = "https://github.com/esteinig/nanoq" diff --git a/README.md b/README.md index 7b21e08..c088a90 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,14 @@ [![build](https://github.com/esteinig/nanoq/actions/workflows/rust-ci.yaml/badge.svg?branch=master)](https://github.com/esteinig/nanoq/actions/workflows/rust-ci.yaml) [![codecov](https://codecov.io/gh/esteinig/nanoq/branch/master/graph/badge.svg?token=1X04YD8YOE)](https://codecov.io/gh/esteinig/nanoq) -![](https://img.shields.io/badge/version-0.8.4-black.svg) +![](https://img.shields.io/badge/version-0.8.5-black.svg) [![DOI](https://joss.theoj.org/papers/10.21105/joss.02991/status.svg)](https://doi.org/10.21105/joss.02991) Ultra-fast quality control and summary reports for nanopore reads ## Overview -**`v0.8.4`** +**`v0.8.5`** - [Purpose](#purpose) - [Install](#install) @@ -72,7 +72,7 @@ cargo install nanoq Explicit version (for some reason defaults to old version) ``` -conda install -c conda-forge -c bioconda nanoq=0.8.4 +conda install -c conda-forge -c bioconda nanoq=0.8.5 ``` #### `Binaries` @@ -80,7 +80,7 @@ conda install -c conda-forge -c bioconda nanoq=0.8.4 Precompiled binaries for Linux and MacOS are attached to the latest release. ``` -VERSION=0.8.4 +VERSION=0.8.5 RELEASE=nanoq-${VERSION}-x86_64-unknown-linux-musl.tar.gz wget https://github.com/esteinig/nanoq/releases/download/${VERSION}/${RELEASE} @@ -144,7 +144,7 @@ done ### Parameters ``` -nanoq 0.8.4 +nanoq 0.8.5 Read filters and summary reports for nanopore data @@ -152,18 +152,19 @@ USAGE: nanoq [FLAGS] [OPTIONS] FLAGS: - -f, --fast Fast mode, do not consider quality values + -f, --fast Ignore quality values if present -h, --help Prints help information - -s, --stats Statistics only, reads to /dev/null + -s, --stats Summary statistics report -V, --version Prints version information - -v, --verbose Pretty print output statistics + -v, --verbose Verbose output statistics [multiple up to -vvv] OPTIONS: -c, --compress-level <1-9> Compression level to use if compressing output [default: 6] -i, --input Fast{a,q}.{gz,xz,bz}, stdin if not present -m, --max-len Maximum read length filter (bp) [default: 0] + -w, --max-qual Maximum average read quality filter (Q) [default: 0] -l, --min-len Minimum read length filter (bp) [default: 0] - -m, --min-qual Minimum average read quality filter (Q) [default: 0] + -q, --min-qual Minimum average read quality filter (Q) [default: 0] -o, --output Output filepath, stdout if not present -O, --output-type u: uncompressed; b: Bzip2; g: Gzip; l: Lzma -t, --top Number of top reads in verbose summary [default: 5] diff --git a/src/cli.rs b/src/cli.rs index 8c2d784..f61063b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -26,7 +26,11 @@ pub struct Cli { #[structopt(short = "q", long, value_name = "FLOAT", default_value = "0")] pub min_qual: f32, - /// Pretty print output statistics. + /// Maximum average read quality filter (Q). + #[structopt(short = "w", long, value_name = "FLOAT", default_value = "0")] + pub max_qual: f32, + + /// Verbose output statistics [multiple up to -vvv] #[structopt( short, long, @@ -38,7 +42,7 @@ pub struct Cli { #[structopt(short, long, value_name = "INT", default_value = "5")] pub top: usize, - /// Statistics only, reads to /dev/null. + /// Summary statistics report #[structopt(short, long)] pub stats: bool, @@ -195,6 +199,17 @@ mod tests { assert_eq!(actual, expected) } + #[test] + fn invalid_max_qual() { + let passed_args = vec!["nanoq", "-w", "test"]; + let args: Result = Cli::from_iter_safe(passed_args); + + let actual = args.unwrap_err().kind; + let expected = clap::ErrorKind::ValueValidation; + + assert_eq!(actual, expected) + } + #[test] fn invalid_to_value() { let passed_args = vec!["nanoq", "-t", "test"]; diff --git a/src/main.rs b/src/main.rs index e094019..4d06585 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,7 +23,7 @@ fn main() -> Result<()> { .filter_length(cli.min_len, cli.max_len) .context("unable to process reads")?, false => needle_cast - .filter(cli.min_len, cli.max_len, cli.min_qual) + .filter(cli.min_len, cli.max_len, cli.min_qual, cli.max_qual) .context("unable to process reads")?, }; diff --git a/src/needlecast.rs b/src/needlecast.rs index 8119012..1bfbe81 100644 --- a/src/needlecast.rs +++ b/src/needlecast.rs @@ -96,6 +96,7 @@ impl NeedleCast { min_length: u32, max_length: u32, min_quality: f32, + max_quality: f32, ) -> Result<(Vec, Vec), ParseError> { let mut read_lengths: Vec = vec![]; let mut read_qualities: Vec = vec![]; @@ -105,6 +106,9 @@ impl NeedleCast { } else { max_length }; + + let max_quality = if max_quality == 0. { 93. } else { max_quality }; + while let Some(record) = self.reader.next() { let rec = record.expect("failed to parse record"); let seqlen = rec.num_bases() as u32; // NANOQ READ LENGTH LIMIT: ~ 4.2 x 10e9 @@ -113,7 +117,11 @@ impl NeedleCast { let mean_error_prob = mean_error_probability(qual); let mean_quality: f32 = -10f32 * mean_error_prob.log(10.0); // FASTQ - if seqlen >= min_length && seqlen <= max_length && mean_quality >= min_quality { + if seqlen >= min_length + && seqlen <= max_length + && mean_quality >= min_quality + && mean_quality <= max_quality + { read_lengths.push(seqlen); read_qualities.push(mean_quality); rec.write(&mut self.writer, None) @@ -241,7 +249,7 @@ mod tests { let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_ok.fq", "-o", "/dev/null"]); let mut caster = NeedleCast::new(&cli); - let (read_lengths, read_quals) = caster.filter(0, 0, 0.0).unwrap(); + let (read_lengths, read_quals) = caster.filter(0, 0, 0.0, 0.0).unwrap(); assert_eq!(read_lengths, vec![4]); assert_eq!(read_quals, vec![40.0]); @@ -265,7 +273,7 @@ mod tests { let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_ok.fa", "-o", "/dev/null"]); let mut caster = NeedleCast::new(&cli); - let (read_lengths, read_quals) = caster.filter(0, 0, 0.0).unwrap(); + let (read_lengths, read_quals) = caster.filter(0, 0, 0.0, 0.0).unwrap(); assert_eq!(read_lengths, vec![4]); assert_eq!(read_quals, vec![]); @@ -290,7 +298,7 @@ mod tests { let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_bad1.fa", "-o", "/dev/null"]); let mut caster = NeedleCast::new(&cli); - caster.filter(0, 0, 0.0).unwrap(); + caster.filter(0, 0, 0.0, 0.0).unwrap(); } #[test] @@ -300,7 +308,7 @@ mod tests { let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_bad1.fq", "-o", "/dev/null"]); let mut caster = NeedleCast::new(&cli); - caster.filter(0, 0, 0.0).unwrap(); + caster.filter(0, 0, 0.0, 0.0).unwrap(); } #[test] @@ -310,7 +318,7 @@ mod tests { let cli = Cli::from_iter(&["nanoq", "-i", "tests/cases/test_bad2.fq", "-o", "/dev/null"]); let mut caster = NeedleCast::new(&cli); - caster.filter(0, 0, 0.0).unwrap(); + caster.filter(0, 0, 0.0, 0.0).unwrap(); } #[test] diff --git a/tests/app.rs b/tests/app.rs index 7872a05..8a0fe82 100644 --- a/tests/app.rs +++ b/tests/app.rs @@ -30,7 +30,7 @@ fn output_file_in_nonexistant_dir() -> Result<(), Box> { } #[test] -fn valid_inputs_raises_no_errors() -> Result<(), Box> { +fn valid_inputs_raise_no_errors() -> Result<(), Box> { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; cmd.args(vec![ "-i",