diff --git a/.github/workflows/basic.yml b/.github/workflows/basic.yml index b7fadf39..17e5f182 100644 --- a/.github/workflows/basic.yml +++ b/.github/workflows/basic.yml @@ -1,6 +1,4 @@ -on: - push: - branches: [master,main,unit-test,fasten_normalize] +on: push name: CI @@ -17,5 +15,8 @@ jobs: - uses: actions-rs/cargo@v1 with: command: build - args: --release --all-features + #args: --release --all-features + - name: tests + run: | + for i in tests/fasten_*.sh; do echo $i; echo ===; bash $i; echo; done; diff --git a/.gitignore b/.gitignore index 50281a44..0eb24cde 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk + +tests/hyperfine +paper/ diff --git a/Cargo.toml b/Cargo.toml index 4edeba05..926006d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fasten" -version = "0.5.0" +version = "0.6.0" authors = ["Lee Katz "] #license-file = "LICENSE" license = "MIT" @@ -75,6 +75,14 @@ path = "src/bin/fasten_sample.rs" name = "fasten_validate" path = "src/bin/fasten_validate.rs" +[[bin]] +name = "fasten_inspect" +path = "src/bin/fasten_inspect.rs" + +[[bin]] +name = "fasten_repair" +path = "src/bin/fasten_repair.rs" + [[bin]] name = "fasten_progress" path = "src/bin/fasten_progress.rs" diff --git a/README.md b/README.md index c6dd9021..31eacddf 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,9 @@ This documentation was built with `cargo docs --no-deps` |[`fasten_normalize`](https://lskatz.github.io/fasten/fasten_normalize) | Normalize read depth by using kmer counting.| |[`fasten_sample`](https://lskatz.github.io/fasten/fasten_sample) | Downsamples reads.| |[`fasten_shuffle`](https://lskatz.github.io/fasten/fasten_shuffle) | Shuffles or deshuffles paired end reads.| -|[`fasten_validate`](https://lskatz.github.io/fasten/fasten_validate) | Validates your reads| +|[`fasten_validate`](https://lskatz.github.io/fasten/fasten_validate) | Validates your reads (deprecated in favor of `fasten_inspect` and `fasten_repair`| +|[`fasten_inspect`](https://lskatz.github.io/fasten/fasten_inspect) | adds information to read IDs such as seqlength | +|[`fasten_repair`](https://lskatz.github.io/fasten/fasten_repair) | Repairs corrupted reads | |[`fasten_quality_filter`](https://lskatz.github.io/fasten/fasten_quality_filter) | Transforms nucleotides to "N" if the quality is low | | |[`fasten_trim`](https://lskatz.github.io/fasten/fasten_trim) | Blunt-end trims reads | | |[`fasten_replace`](https://lskatz.github.io/fasten/fasten_replace) | Find and replace using regex | | diff --git a/docs/crates.js b/docs/crates.js index 1dbc0e67..a111ffcf 100644 --- a/docs/crates.js +++ b/docs/crates.js @@ -1 +1 @@ -window.ALL_CRATES = ["adler","aho_corasick","bam","byteorder","cfg_if","crc32fast","crossbeam","fasten","fasten_clean","fasten_combine","fasten_convert","fasten_kmer","fasten_metrics","fasten_mutate","fasten_normalize","fasten_pe","fasten_progress","fasten_quality_filter","fasten_randomize","fasten_regex","fasten_replace","fasten_sample","fasten_shuffle","fasten_sort","fasten_straighten","fasten_trim","fasten_validate","fastq","flate2","futures","getopts","lazy_static","libc","libz_sys","lz4","lz4_sys","maybe_uninit","memchr","miniz_oxide","multiqueue","num","num_bigint","num_complex","num_cpus","num_integer","num_iter","num_rational","num_traits","owning_ref","parking_lot","parking_lot_core","rand","regex","regex_syntax","rustc_serialize","smallvec","statistical","thread_id","thread_local","threadpool","time","ucd_util","unicode_width","utf8_ranges"]; \ No newline at end of file +window.ALL_CRATES = ["adler","aho_corasick","bam","byteorder","cfg_if","crc32fast","crossbeam","fasten","fasten_clean","fasten_combine","fasten_convert","fasten_inspect","fasten_kmer","fasten_metrics","fasten_mutate","fasten_normalize","fasten_pe","fasten_progress","fasten_quality_filter","fasten_randomize","fasten_regex","fasten_repair","fasten_replace","fasten_sample","fasten_shuffle","fasten_sort","fasten_straighten","fasten_trim","fasten_validate","fastq","flate2","futures","getopts","lazy_static","libc","libz_sys","lz4","lz4_sys","maybe_uninit","memchr","miniz_oxide","multiqueue","num","num_bigint","num_complex","num_cpus","num_integer","num_iter","num_rational","num_traits","owning_ref","parking_lot","parking_lot_core","rand","regex","regex_syntax","rustc_serialize","smallvec","statistical","thread_id","thread_local","threadpool","time","ucd_util","unicode_width","utf8_ranges"]; \ No newline at end of file diff --git a/docs/fasten/all.html b/docs/fasten/all.html index 0e40ca67..46e3ed45 100644 --- a/docs/fasten/all.html +++ b/docs/fasten/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten

Version 0.6.0

Back to index

\ No newline at end of file diff --git a/docs/fasten/index.html b/docs/fasten/index.html index fa0ac91f..ac54a17f 100644 --- a/docs/fasten/index.html +++ b/docs/fasten/index.html @@ -1,6 +1,6 @@ fasten - Rust

Crate fasten[][src]

Expand description

Perform random operations on fastq files, using unix streaming. +

Crate fasten

Version 0.6.0

Crate fasten[][src]

Expand description

Perform random operations on fastq files, using unix streaming. Secure your analysis with Fasten!

Synopsis

read metrics

 
 $ cat testdata/R1.fastq testdata/R2.fastq | \
diff --git a/docs/fasten_clean/all.html b/docs/fasten_clean/all.html
index 3c31eb7a..975aa9e2 100644
--- a/docs/fasten_clean/all.html
+++ b/docs/fasten_clean/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_clean

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_clean/index.html b/docs/fasten_clean/index.html index 2c7b30d1..bc9a20bc 100644 --- a/docs/fasten_clean/index.html +++ b/docs/fasten_clean/index.html @@ -1,6 +1,6 @@ fasten_clean - Rust

Crate fasten_clean[][src]

Expand description

Trim and filter reads

+

Crate fasten_clean

Version 0.6.0

Crate fasten_clean[][src]

Expand description

Trim and filter reads

Examples

cat testdata/four_lines.fastq | \
   fasten_clean > out.fastq

more options

cat testdata | \
   fasten_clean --min-avg-quality 25 --min-trim-quality 25 \
diff --git a/docs/fasten_combine/all.html b/docs/fasten_combine/all.html
index 7661f082..2803d178 100644
--- a/docs/fasten_combine/all.html
+++ b/docs/fasten_combine/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_combine

Version 0.6.0

Back to index

List of all items[]

Functions

Constants

\ No newline at end of file diff --git a/docs/fasten_combine/index.html b/docs/fasten_combine/index.html index f2fd6413..ba296613 100644 --- a/docs/fasten_combine/index.html +++ b/docs/fasten_combine/index.html @@ -1,6 +1,6 @@ fasten_combine - Rust

Crate fasten_combine[][src]

Expand description

Collapse identical reads into single reads, recalculating quality values. +

Crate fasten_combine

Version 0.6.0

Crate fasten_combine[][src]

Expand description

Collapse identical reads into single reads, recalculating quality values. If paired end, then each set of reads must be identical to be collapsed. Warning: due to multiple reads collapsing into one, read identifiers will be reconstituted.

Examples

cat testdata/four_reads | fasten_combine > combined.fastq

Usage

Usage: fasten_combine [-h] [-n INT] [-p] [-v] [--max-qual-char CHAR] [--min-qual-char CHAR]
diff --git a/docs/fasten_convert/all.html b/docs/fasten_convert/all.html
index a981e858..c7445fe0 100644
--- a/docs/fasten_convert/all.html
+++ b/docs/fasten_convert/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_convert

Version 0.6.0

Back to index

\ No newline at end of file diff --git a/docs/fasten_convert/index.html b/docs/fasten_convert/index.html index e1823bb3..b43cc9d2 100644 --- a/docs/fasten_convert/index.html +++ b/docs/fasten_convert/index.html @@ -1,6 +1,6 @@ fasten_convert - Rust

Crate fasten_convert[][src]

Expand description

Convert between different sequence formats

+

Crate fasten_convert

Version 0.6.0

Crate fasten_convert[][src]

Expand description

Convert between different sequence formats

Examples

Simple conversion

cat file.fastq | fasten_convert -i fastq -o fasta > out.fasta

Convert to sam and then to bam

cat file.fastq | fasten_convert -i fastq -o sam   | samtools view -bS > file.bam

Convert to fastq and then clean

cat file.fasta | fasten_convert -i fasta -o fastq | fasten_clean > cleaned.fastq

Usage

Usage: fasten_convert [-h] [-n INT] [-p] [-v] [-i FORMAT] [-o FORMAT]
 
 Options:
diff --git a/docs/fasten_inspect/all.html b/docs/fasten_inspect/all.html
new file mode 100644
index 00000000..1f97a9ea
--- /dev/null
+++ b/docs/fasten_inspect/all.html
@@ -0,0 +1,6 @@
+List of all items in this crate
+    

List of all items[] + +

Functions

+ \ No newline at end of file diff --git a/docs/fasten_inspect/fn.main.html b/docs/fasten_inspect/fn.main.html new file mode 100644 index 00000000..a18c21c0 --- /dev/null +++ b/docs/fasten_inspect/fn.main.html @@ -0,0 +1,4 @@ +main in fasten_inspect - Rust +

Function fasten_inspect::main[][src]

pub(crate) fn main()
+ \ No newline at end of file diff --git a/docs/fasten_inspect/fn.validate_reads.html b/docs/fasten_inspect/fn.validate_reads.html new file mode 100644 index 00000000..72cae11d --- /dev/null +++ b/docs/fasten_inspect/fn.validate_reads.html @@ -0,0 +1,5 @@ +validate_reads in fasten_inspect - Rust +

Function fasten_inspect::validate_reads[][src]

pub(crate) fn validate_reads(
    lines_per_read: u8,
    seq_regex: Regex,
    qual_regex: Regex
)
Expand description

marks up reads from stdin

+
+ \ No newline at end of file diff --git a/docs/fasten_inspect/index.html b/docs/fasten_inspect/index.html new file mode 100644 index 00000000..3997bf38 --- /dev/null +++ b/docs/fasten_inspect/index.html @@ -0,0 +1,31 @@ +fasten_inspect - Rust +

Crate fasten_inspect[][src]

Expand description

Marks up your reads with useful information like read length

+

Examples

Quick validation with stderr message

cat file.fastq | fasten_inspect > markedup.fastq
+cat file.fastq | fasten_inspect --paired-end > markedup-paired.fastq
+

The resulting marked-up fastq file will have deflines like

+
@read0/1 id-at:1 seq-length:100 seq-invalid-chars: id-plus:1 qual-invalid-chars: avg-qual:20.93 qual-length:100 read-pair:1

Usage

fasten_inspect: Marks up your reads with useful information like read length
+
+Usage: fasten_inspect [-h] [-n INT] [-p] [--verbose] [--version]
+
+Options:
+   -h, --help          Print this help menu.
+   -n, --numcpus INT   Number of CPUs (default: 1)
+   -p, --paired-end    The input reads are interleaved paired-end
+       --verbose       Print more status messages
+       --version       Print the version of Fasten and exit
+
+

The fields will be found on the defline of the sequence and include:

+
+ + + + + + + +
keytypeexamplenote
id-atboolean (1 or 0)id-at:1Whether or not the @ was first character, first line
seq-invalid-charsstringseq-invalid-chars:$$%
qual-invalid-charsstringqual-invalid-chars:[<
seq-lengthintseq-length:100
id-plusbooleanid-plus:1Whether or not the + was first character, 3rd line
avg-qualfloatavg-qual:17.52
qual-lengthintqual-length:100Length of the quality score line
+

Functions

+

marks up reads from stdin

+
+ \ No newline at end of file diff --git a/docs/fasten_inspect/sidebar-items.js b/docs/fasten_inspect/sidebar-items.js new file mode 100644 index 00000000..9c7f6b86 --- /dev/null +++ b/docs/fasten_inspect/sidebar-items.js @@ -0,0 +1 @@ +initSidebarItems({"fn":[["main",""],["validate_reads","marks up reads from stdin"]]}); \ No newline at end of file diff --git a/docs/fasten_kmer/all.html b/docs/fasten_kmer/all.html index 0a8b596d..7432cc93 100644 --- a/docs/fasten_kmer/all.html +++ b/docs/fasten_kmer/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_kmer

Version 0.6.0

Back to index

List of all items[]

Functions

Constants

\ No newline at end of file diff --git a/docs/fasten_kmer/index.html b/docs/fasten_kmer/index.html index aec992e1..23af54bd 100644 --- a/docs/fasten_kmer/index.html +++ b/docs/fasten_kmer/index.html @@ -1,6 +1,6 @@ fasten_kmer - Rust

Crate fasten_kmer[][src]

Expand description

Counts kmers. +

Crate fasten_kmer

Version 0.6.0

Crate fasten_kmer[][src]

Expand description

Counts kmers. Each line is a kmer with two columns separated by tab: kmer, count Optional columns starting with column 3 are the reads that start with that kmer with a delimiter of ~

diff --git a/docs/fasten_metrics/all.html b/docs/fasten_metrics/all.html index b72e9fae..1ac15a60 100644 --- a/docs/fasten_metrics/all.html +++ b/docs/fasten_metrics/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_metrics

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_metrics/index.html b/docs/fasten_metrics/index.html index 26946c50..0eb7052d 100644 --- a/docs/fasten_metrics/index.html +++ b/docs/fasten_metrics/index.html @@ -1,6 +1,6 @@ fasten_metrics - Rust

Crate fasten_metrics[][src]

Expand description

Gives read metrics on a read set. +

Crate fasten_metrics

Version 0.6.0

Crate fasten_metrics[][src]

Expand description

Gives read metrics on a read set. Values are given in a column delimited stdout.

Examples

cat testdata/four_reads.fastq | fasten_metrics | column -t

Usage

Usage: fasten_metrics [-h] [-n INT] [-p] [-v] [--each-read] [--distribution STRING]
 Options:
diff --git a/docs/fasten_mutate/all.html b/docs/fasten_mutate/all.html
index f0d4aea7..3717645e 100644
--- a/docs/fasten_mutate/all.html
+++ b/docs/fasten_mutate/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_mutate

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_mutate/index.html b/docs/fasten_mutate/index.html index da9498fa..d2337f89 100644 --- a/docs/fasten_mutate/index.html +++ b/docs/fasten_mutate/index.html @@ -1,6 +1,6 @@ fasten_mutate - Rust

Crate fasten_mutate[][src]

Expand description

Mutates reads. There is no mutation model; only randomness.

+

Crate fasten_mutate

Version 0.6.0

Crate fasten_mutate[][src]

Expand description

Mutates reads. There is no mutation model; only randomness.

Examples

cat testdata/four_reads.fastq | fasten_mutate > out.fastq

Usage

 
 Usage: fasten_mutate [-h] [-n INT] [-p] [-v] [-s INT] [-m]
  
diff --git a/docs/fasten_normalize/all.html b/docs/fasten_normalize/all.html
index 6d571999..30acea35 100644
--- a/docs/fasten_normalize/all.html
+++ b/docs/fasten_normalize/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_normalize

Version 0.6.0

Back to index

List of all items[]

Functions

Constants

\ No newline at end of file diff --git a/docs/fasten_normalize/index.html b/docs/fasten_normalize/index.html index 6473ef68..8d0d4050 100644 --- a/docs/fasten_normalize/index.html +++ b/docs/fasten_normalize/index.html @@ -1,6 +1,6 @@ fasten_normalize - Rust

Crate fasten_normalize[][src]

Expand description

Normalizes kmer depth by removing some reads from high kmer depths +

Crate fasten_normalize

Version 0.6.0

Crate fasten_normalize[][src]

Expand description

Normalizes kmer depth by removing some reads from high kmer depths The input has to be from fasten_kmer --remember-reads where there are at least three columns: kmer, count, read1, [read2,…]

This was inspired by BBNorm and is probably not the exact same algorithm. diff --git a/docs/fasten_pe/all.html b/docs/fasten_pe/all.html index 6cfb0744..b778afc9 100644 --- a/docs/fasten_pe/all.html +++ b/docs/fasten_pe/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_pe

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_pe/index.html b/docs/fasten_pe/index.html index a6ac166e..9bb23b7f 100644 --- a/docs/fasten_pe/index.html +++ b/docs/fasten_pe/index.html @@ -1,6 +1,6 @@ fasten_pe - Rust

Crate fasten_pe[][src]

Expand description

Determine paired-end-ness in an interleaved file. +

Crate fasten_pe

Version 0.6.0

Crate fasten_pe[][src]

Expand description

Determine paired-end-ness in an interleaved file. Exit code of 0 indicates PE. Exit code > 0 indicates single end.

Examples

Test the file and then print a message with the exit code

cat file.fastq | fasten_pe; echo "Reads were paired-end? $?";

Test the file and if it is paired end (exit code 0), then print a message

cat file.fastq | fasten_pe || echo "Reads were paired end.";

Usage

Usage: fasten_pe [-h] [-n INT] [-p] [-v] [--print-reads]
  
diff --git a/docs/fasten_progress/all.html b/docs/fasten_progress/all.html
index 37e428ab..7e43a9b4 100644
--- a/docs/fasten_progress/all.html
+++ b/docs/fasten_progress/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_progress

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_progress/index.html b/docs/fasten_progress/index.html index d78f28cf..f82cc267 100644 --- a/docs/fasten_progress/index.html +++ b/docs/fasten_progress/index.html @@ -1,6 +1,6 @@ fasten_progress - Rust

Crate fasten_progress[][src]

Expand description

Prints a progress meter for number of fastq entries to stderr.

+

Crate fasten_progress

Version 0.6.0

Crate fasten_progress[][src]

Expand description

Prints a progress meter for number of fastq entries to stderr.

Examples

fasten_metrics progress

While getting read metrics for a large fastq file, print the progress to make the wait a little easier

diff --git a/docs/fasten_quality_filter/all.html b/docs/fasten_quality_filter/all.html index 06c1a237..85bc9970 100644 --- a/docs/fasten_quality_filter/all.html +++ b/docs/fasten_quality_filter/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_quality_filter

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_quality_filter/index.html b/docs/fasten_quality_filter/index.html index 4972dfb0..7d85802b 100644 --- a/docs/fasten_quality_filter/index.html +++ b/docs/fasten_quality_filter/index.html @@ -1,6 +1,6 @@ fasten_quality_filter - Rust

Crate fasten_quality_filter[][src]

Expand description

Transforms any low-quality base to ‘N’

+

Crate fasten_quality_filter

Version 0.6.0

Crate fasten_quality_filter[][src]

Expand description

Transforms any low-quality base to ‘N’

Examples

cat file.fastq | fasten_quality_filter > file_with_Ns.fastq

Usage

 
 Usage: fasten_quality_filter [-h] [-n INT] [-p] [-v] [-m INT]
  
diff --git a/docs/fasten_randomize/all.html b/docs/fasten_randomize/all.html
index c1d5cfe0..4a6b25b6 100644
--- a/docs/fasten_randomize/all.html
+++ b/docs/fasten_randomize/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_randomize

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_randomize/index.html b/docs/fasten_randomize/index.html index 7e2b25c2..c92de7b2 100644 --- a/docs/fasten_randomize/index.html +++ b/docs/fasten_randomize/index.html @@ -1,6 +1,6 @@ fasten_randomize - Rust

Crate fasten_randomize[][src]

Expand description

Create random reads from stdin.

+

Crate fasten_randomize

Version 0.6.0

Crate fasten_randomize[][src]

Expand description

Create random reads from stdin.

Examples

print "hello world\n";

General usage

General usage to randomize the order of the reads

cat file.fastq | fasten_randomize > random.fastq

One read

diff --git a/docs/fasten_regex/all.html b/docs/fasten_regex/all.html index 2b0c7121..0292939a 100644 --- a/docs/fasten_regex/all.html +++ b/docs/fasten_regex/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_regex

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_regex/index.html b/docs/fasten_regex/index.html index c42d0142..e5ca2d5f 100644 --- a/docs/fasten_regex/index.html +++ b/docs/fasten_regex/index.html @@ -1,6 +1,6 @@ fasten_regex - Rust

Crate fasten_regex[][src]

Expand description

Filter reads based on a regular expression.

+

Crate fasten_regex

Version 0.6.0

Crate fasten_regex[][src]

Expand description

Filter reads based on a regular expression.

Examples

Find a specific read

cat file.fastq | fasten_regex --which ID --regex 'my-specific-read-id-1234' > my_read.fastq

Find a specific read but also keep its pair

cat file.fastq | fasten_regex --which ID --regex 'my-specific-read-id-1234' --paired-end > my_pairs.fastq

Find a specific motif

cat file.fastq | fasten_regex --which SEQ --regex ATAT > atat-motif.fastq

Usage

Usage: fasten_regex [-h] [-n INT] [-p] [-v] [-r STRING] [-w String]
  
 Options:
diff --git a/docs/fasten_repair/all.html b/docs/fasten_repair/all.html
new file mode 100644
index 00000000..6ecdee96
--- /dev/null
+++ b/docs/fasten_repair/all.html
@@ -0,0 +1,6 @@
+List of all items in this crate
+    

List of all items[] + +

Functions

+ \ No newline at end of file diff --git a/docs/fasten_repair/fn.main.html b/docs/fasten_repair/fn.main.html new file mode 100644 index 00000000..c376eb3b --- /dev/null +++ b/docs/fasten_repair/fn.main.html @@ -0,0 +1,4 @@ +main in fasten_repair - Rust +

Function fasten_repair::main[][src]

pub(crate) fn main()
+ \ No newline at end of file diff --git a/docs/fasten_repair/fn.repair_one_read.html b/docs/fasten_repair/fn.repair_one_read.html new file mode 100644 index 00000000..da9ef3c6 --- /dev/null +++ b/docs/fasten_repair/fn.repair_one_read.html @@ -0,0 +1,5 @@ +repair_one_read in fasten_repair - Rust +

Function fasten_repair::repair_one_read[][src]

pub(crate) fn repair_one_read(
    id: String,
    seq: String,
    plus: String,
    qual: String,
    should_repair: bool,
    min_length: usize,
    min_qual: f32,
    remove_info: bool
) -> (String, bool, String)
Expand description

Repair exactly one read

+
+ \ No newline at end of file diff --git a/docs/fasten_repair/fn.repair_reads.html b/docs/fasten_repair/fn.repair_reads.html new file mode 100644 index 00000000..801df5e1 --- /dev/null +++ b/docs/fasten_repair/fn.repair_reads.html @@ -0,0 +1,5 @@ +repair_reads in fasten_repair - Rust +

Function fasten_repair::repair_reads[][src]

pub(crate) fn repair_reads(
    paired_end: bool,
    min_length: usize,
    min_qual: f32,
    remove_info: bool,
    mode: &str
)
Expand description

Repairs reads depending on the deflines by calling repair_one_read

+
+ \ No newline at end of file diff --git a/docs/fasten_repair/index.html b/docs/fasten_repair/index.html new file mode 100644 index 00000000..48fac48c --- /dev/null +++ b/docs/fasten_repair/index.html @@ -0,0 +1,42 @@ +fasten_repair - Rust +

Crate fasten_repair[][src]

Expand description

Repairs reads from fasten_inspect output

+

Examples

./target/debug/fasten_inspect  < testdata/four_reads.fastq | \
+  ./target/debug/fasten_repair --remove-info > repaired.fastq
+
+

If remove-info is given, then extra header information from fasten_inspect will be removed.

+

Usage

Usage: fasten_repair [-h] [-n INT] [-p] [--verbose] [--version] [--min-length INT] [--min-quality FLOAT] [--remove-info] [-m STRING]
+Options:
+   -h, --help          Print this help menu.
+   -n, --numcpus INT   Number of CPUs (default: 1)
+   -p, --paired-end    The input reads are interleaved paired-end
+       --verbose       Print more status messages
+       --version       Print the version of Fasten and exit
+       --min-length INT
+                       Minimum read length allowed
+       --min-quality FLOAT
+                       Minimum quality allowed
+       --remove-info   Remove fasten_inspect headers
+   -m, --mode STRING   Either repair or panic. If panic, then the binary will
+                       panic when the first issue comes up. Default:repair

Methods of repair

+

If you choose --mode repair, then this is the expected behavior

+
    +
  • Mismatched seq and qual lengths: seq or qual length will be truncated
  • +
  • R1 or R2 not passing: then the other (R1 or R2) will also not pass and they will not be +printed.
  • +
+

Panic

+

If the sequences are not repaired but there is still an issue, the program might still panic:

+
    +
  • seq length < min length (TODO when implementing PE reads)
  • +
  • avg qual < min qual (TODO when implementing PE reads)
  • +
  • invalid characters in seq (TODO when implementing PE reads)
  • +
  • invalid characters in qual (TODO when implementing PE reads)
  • +
  • @ not present in first character of the entry (TODO when implementing PE reads)
  • +
  • + not present in the first character of the third line (TODO when implementing PE reads)
  • +
+

Functions

+

Repair exactly one read

+

Repairs reads depending on the deflines by calling repair_one_read

+
+ \ No newline at end of file diff --git a/docs/fasten_repair/sidebar-items.js b/docs/fasten_repair/sidebar-items.js new file mode 100644 index 00000000..369f5130 --- /dev/null +++ b/docs/fasten_repair/sidebar-items.js @@ -0,0 +1 @@ +initSidebarItems({"fn":[["main",""],["repair_one_read","Repair exactly one read"],["repair_reads","Repairs reads depending on the deflines by calling repair_one_read"]]}); \ No newline at end of file diff --git a/docs/fasten_replace/all.html b/docs/fasten_replace/all.html index 1d2abb5a..398337f8 100644 --- a/docs/fasten_replace/all.html +++ b/docs/fasten_replace/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_replace

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_replace/index.html b/docs/fasten_replace/index.html index 3bcd9390..f0ddc512 100644 --- a/docs/fasten_replace/index.html +++ b/docs/fasten_replace/index.html @@ -1,6 +1,6 @@ fasten_replace - Rust

Crate fasten_replace[][src]

Expand description

Streaming editor for fastq data using a find/replace.

+

Crate fasten_replace

Version 0.6.0

Crate fasten_replace[][src]

Expand description

Streaming editor for fastq data using a find/replace.

Examples

Force a motif to be lowercase

cat file.fastq | fasten_replace --which SEQ --find ATAT --replace atat > file.fastq

Mutate the middle base of a kmer

cat file.fastq | fasten_replace --which SEQ --find AAAAA --replace AATAA > file.fastq
 
 Usage: fasten_replace [-h] [-n INT] [-p] [-v] [-f STRING] [-r STRING] [-w STRING]
  
diff --git a/docs/fasten_sample/all.html b/docs/fasten_sample/all.html
index bd5025e9..04d8e6fc 100644
--- a/docs/fasten_sample/all.html
+++ b/docs/fasten_sample/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_sample

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_sample/index.html b/docs/fasten_sample/index.html index d28e1b4d..c7cc9c09 100644 --- a/docs/fasten_sample/index.html +++ b/docs/fasten_sample/index.html @@ -1,6 +1,6 @@ fasten_sample - Rust

Crate fasten_sample[][src]

Expand description

downsample your reads

+

Crate fasten_sample

Version 0.6.0

Crate fasten_sample[][src]

Expand description

downsample your reads

Examples

Get 10% of the reads

cat file.fastq | fasten_sample --frequency 0.1 > out.fastq

Usage

    Usage: fasten_sample [-h] [-n INT] [-p] [-v] [-f FLOAT]
      
     Options:
diff --git a/docs/fasten_shuffle/all.html b/docs/fasten_shuffle/all.html
index 95c4880d..5b6ea431 100644
--- a/docs/fasten_shuffle/all.html
+++ b/docs/fasten_shuffle/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_shuffle

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_shuffle/index.html b/docs/fasten_shuffle/index.html index 0a63cbfb..a74b1fca 100644 --- a/docs/fasten_shuffle/index.html +++ b/docs/fasten_shuffle/index.html @@ -1,6 +1,6 @@ fasten_shuffle - Rust

Crate fasten_shuffle[][src]

Expand description

Interleaves reads from either stdin or file parameters.

+

Crate fasten_shuffle

Version 0.6.0

Crate fasten_shuffle[][src]

Expand description

Interleaves reads from either stdin or file parameters.

Many fasten executables are aware of paired end reads but they need to be in interleaved format. This script transforms R1 and R2 reads into interleaved format.

diff --git a/docs/fasten_sort/all.html b/docs/fasten_sort/all.html index a77004a4..c2fec96a 100644 --- a/docs/fasten_sort/all.html +++ b/docs/fasten_sort/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_sort

Version 0.6.0

Back to index

List of all items[]

Structs

Functions

\ No newline at end of file diff --git a/docs/fasten_sort/index.html b/docs/fasten_sort/index.html index a5b0b197..2e559592 100644 --- a/docs/fasten_sort/index.html +++ b/docs/fasten_sort/index.html @@ -1,6 +1,6 @@ fasten_sort - Rust

Crate fasten_sort[][src]

Expand description

Sort a fastq file. +

Crate fasten_sort

Version 0.6.0

Crate fasten_sort[][src]

Expand description

Sort a fastq file. If the reads are paired end, then the sorted field concatenates R1 and R2 before comparisons in the sort. R1 and R2 reads will stay together if paired end.

diff --git a/docs/fasten_straighten/all.html b/docs/fasten_straighten/all.html index 2ad8a8bb..0764bede 100644 --- a/docs/fasten_straighten/all.html +++ b/docs/fasten_straighten/all.html @@ -1,6 +1,6 @@ List of all items in this crate

List of all items[] +

Crate fasten_straighten

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_straighten/index.html b/docs/fasten_straighten/index.html index d5248ab2..5dfd83d4 100644 --- a/docs/fasten_straighten/index.html +++ b/docs/fasten_straighten/index.html @@ -1,6 +1,6 @@ fasten_straighten - Rust

Crate fasten_straighten[][src]

Expand description

Convert a fastq file to a standard 4-lines-per-entry format

+

Crate fasten_straighten

Version 0.6.0

Crate fasten_straighten[][src]

Expand description

Convert a fastq file to a standard 4-lines-per-entry format

Examples

cat weird.fastq | fasten_straighten > four-per-entry.fastq

Usage

Usage: fasten_straighten [-h] [-n INT] [-p] [-v]
  
 Options:
diff --git a/docs/fasten_trim/all.html b/docs/fasten_trim/all.html
index 4445e0df..43b4673e 100644
--- a/docs/fasten_trim/all.html
+++ b/docs/fasten_trim/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_trim

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_trim/index.html b/docs/fasten_trim/index.html index ea90eb8e..b88d027a 100644 --- a/docs/fasten_trim/index.html +++ b/docs/fasten_trim/index.html @@ -1,6 +1,6 @@ fasten_trim - Rust

Crate fasten_trim[][src]

Expand description

Blunt-end trims using 0-based coordinates

+

Crate fasten_trim

Version 0.6.0

Crate fasten_trim[][src]

Expand description

Blunt-end trims using 0-based coordinates

Examples

Trim five bases from the right side

cat file.fastq | fasten_trim -l -5 > trimmed.fastq

Keep a maximum of 100bp

cat file.fastq | fasten_trim -l 99 > trimmed.fastq

Trim 5bp from the left side

cat file.fastq | fasten_trim -f 4  > trimmed.fastq

Usage

Usage: fasten_trim [-h] [-n INT] [-p] [-v] [-f INT] [-l INT]
  
 Options:
diff --git a/docs/fasten_validate/all.html b/docs/fasten_validate/all.html
index 44aab997..cb56f5fa 100644
--- a/docs/fasten_validate/all.html
+++ b/docs/fasten_validate/all.html
@@ -1,6 +1,6 @@
 List of all items in this crate
     

List of all items[] +

Crate fasten_validate

Version 0.6.0

Back to index

List of all items[]

Functions

\ No newline at end of file diff --git a/docs/fasten_validate/fn.main.html b/docs/fasten_validate/fn.main.html index 5dc07179..edc67228 100644 --- a/docs/fasten_validate/fn.main.html +++ b/docs/fasten_validate/fn.main.html @@ -1,4 +1,4 @@ main in fasten_validate - Rust

Function fasten_validate::main[][src]

pub(crate) fn main()
+

Function fasten_validate::main[][src]

pub(crate) fn main()
\ No newline at end of file diff --git a/docs/fasten_validate/index.html b/docs/fasten_validate/index.html index d01d1c61..94d681de 100644 --- a/docs/fasten_validate/index.html +++ b/docs/fasten_validate/index.html @@ -1,6 +1,6 @@ fasten_validate - Rust

Crate fasten_validate[][src]

Expand description

Validates your reads and makes you feel good about yourself!

+

Crate fasten_validate

Version 0.6.0

Crate fasten_validate[][src]

Expand description

Validates your reads and makes you feel good about yourself!

Examples

Quick validation with stderr message

cat file.fastq | fasten_validate --verbose

Validate that your reads are paired end

cat R1.fastq R2.fastq | fasten_shuffle | fasten_validate --paired-end

Parallelize

Large-scale validation of PE reads with 4 CPUs and xargs

diff --git a/docs/search-index.js b/docs/search-index.js index 34a8f610..2a8bdce1 100644 --- a/docs/search-index.js +++ b/docs/search-index.js @@ -10,6 +10,7 @@ var searchIndex = JSON.parse('{\ "fasten_clean":{"doc":"Trim and filter reads","t":[5,5,5,5],"n":["avg_quality","clean_entry","main","trim"],"q":["fasten_clean","","",""],"d":["Determine average quality of a qual cigar string, e.g., …","Cleans a SE or PE read","","Trim the ends of reads with low quality"],"i":[0,0,0,0],"f":[[[["string",3]],["f32",15]],[[["vec",3,[["string",3]]],["usize",15],["f32",15],["u8",15],["u8",15],["sender",3,[["string",3]]],["sender",3,[["string",3]]]]],[[]],[[["string",3],["string",3],["u8",15]]]],"p":[]},\ "fasten_combine":{"doc":"Collapse identical reads into single reads, recalculating …","t":[17,17,5,5],"n":["READ_SEPARATOR","TEN","combine_error_vectors","main"],"q":["fasten_combine","","",""],"d":["Glues together paired end reads internally and is a …","need this constant because the compiler had a problem with …","Combines vectors of error probabilities such that the rate …",""],"i":[0,0,0,0],"f":[null,null,[[["vec",3],["vec",3]],["vec",3,[["f32",15]]]],[[]]],"p":[]},\ "fasten_convert":{"doc":"Convert between different sequence formats","t":[3,11,11,11,11,11,11,11,11,11,12,12,11,5,11,12,12,5,5,5,12,12,11,11,11,11,5,5,5],"n":["FastenSeq","as_fasta","as_fastq","as_sam","borrow","borrow_mut","clone","clone_into","fmt","from","id1","id2","into","main","new","qual1","qual2","read_fasta","read_fastq","read_sam","seq1","seq2","to_owned","try_from","try_into","type_id","write_fasta","write_fastq","write_sam"],"q":["fasten_convert","","","","","","","","","","","","","","","","","","","","","","","","","","","",""],"d":["Struct that can handle paired end reads","Return a formatted string as a fasta entry","Return a formatted string as a fastq entry","Return a formatted string as a sam entry","","","","","","","","","","","a blank new object is a set of blank strings for each value","","","Read fasta from stdin and transmit it to a channel","Read fastq from stdin and transmit it to a channel","Read sam from stdin and transmit it to a channel","","","","","","","Read from a channel and print as fasta","Read from a channel and print as fastq","Read from a channel and print as sam"],"i":[0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,1,0,0,0],"f":[null,[[],["string",3]],[[],["string",3]],[[],["string",3]],[[]],[[]],[[],["fastenseq",3]],[[]],[[["formatter",3]],["result",6]],[[]],null,null,[[]],[[]],[[],["fastenseq",3]],null,null,[[["sender",3,[["fastenseq",3]]],["bool",15]]],[[["sender",3,[["fastenseq",3]]],["bool",15]]],[[["sender",3,[["fastenseq",3]]],["bool",15]]],null,null,[[]],[[],["result",4]],[[],["result",4]],[[],["typeid",3]],[[["receiver",3,[["fastenseq",3]]]]],[[["receiver",3,[["fastenseq",3]]]]],[[["receiver",3,[["fastenseq",3]]]]]],"p":[[3,"FastenSeq"]]},\ +"fasten_inspect":{"doc":"Marks up your reads with useful information like read …","t":[5,5],"n":["main","validate_reads"],"q":["fasten_inspect",""],"d":["","marks up reads from stdin"],"i":[0,0],"f":[[[]],[[["u8",15],["regex",3],["regex",3]]]],"p":[]},\ "fasten_kmer":{"doc":"Counts kmers. Each line is a kmer with two columns …","t":[17,5,5,5,5,5],"n":["READ_SEPARATOR","count_kmers","kmers_in_str","main","revcomp","switch_base"],"q":["fasten_kmer","","","","",""],"d":["Glues together paired end reads internally and is a …","Read fastq from stdin and count kmers","Read a str of nucleotides and count kmers. If …","","reverse-complement a dna sequence","Complementary nucleotide for ACTGUN, case insensitive"],"i":[0,0,0,0,0,0],"f":[null,[[["stdin",3],["usize",15],["bool",15],["bool",15],["bool",15]]],[[["str",15],["usize",15],["bool",15]],["hashmap",3,[["string",3],["u32",15]]]],[[]],[[["str",15]],["string",3]],[[["char",15]],["char",15]]],"p":[]},\ "fasten_metrics":{"doc":"Gives read metrics on a read set. Values are given in a …","t":[5,5,5],"n":["average_quality","main","standard_deviation"],"q":["fasten_metrics","",""],"d":["given a cigar line for quality, return its average","","Local implementation of standard deviation"],"i":[0,0,0],"f":[[[["str",15]],["f32",15]],[[]],[[["vec",3]],["f32",15]]],"p":[]},\ "fasten_mutate":{"doc":"Mutates reads. There is no mutation model; only randomness.","t":[5,5],"n":["main","mutate"],"q":["fasten_mutate",""],"d":["","Mutate a str of a sequence of nucleotides using the …"],"i":[0,0],"f":[[[]],[[["str",15],["vec",3],["u8",15],["bool",15]],["string",3]]],"p":[]},\ @@ -19,6 +20,7 @@ var searchIndex = JSON.parse('{\ "fasten_quality_filter":{"doc":"Transforms any low-quality base to ‘N’","t":[5],"n":["main"],"q":["fasten_quality_filter"],"d":[""],"i":[0],"f":[[[]]],"p":[]},\ "fasten_randomize":{"doc":"Create random reads from stdin.","t":[5,5],"n":["main","print_reads_from_stdin"],"q":["fasten_randomize",""],"d":["","Read fastq from stdin, add the reads to a vector, then …"],"i":[0,0],"f":[[[]],[[["u32",15]]]],"p":[]},\ "fasten_regex":{"doc":"Filter reads based on a regular expression.","t":[5],"n":["main"],"q":["fasten_regex"],"d":[""],"i":[0],"f":[[[]]],"p":[]},\ +"fasten_repair":{"doc":"Repairs reads from fasten_inspect output","t":[5,5,5],"n":["main","repair_one_read","repair_reads"],"q":["fasten_repair","",""],"d":["","Repair exactly one read","Repairs reads depending on the deflines by calling …"],"i":[0,0,0],"f":[[[]],[[["string",3],["string",3],["string",3],["string",3],["bool",15],["usize",15],["f32",15],["bool",15]]],[[["bool",15],["usize",15],["f32",15],["bool",15],["str",15]]]],"p":[]},\ "fasten_replace":{"doc":"Streaming editor for fastq data using a find/replace.","t":[5],"n":["main"],"q":["fasten_replace"],"d":[""],"i":[0],"f":[[[]]],"p":[]},\ "fasten_sample":{"doc":"downsample your reads","t":[5],"n":["main"],"q":["fasten_sample"],"d":[""],"i":[0],"f":[[[]]],"p":[]},\ "fasten_shuffle":{"doc":"Interleaves reads from either stdin or file parameters.","t":[5,5,5,5],"n":["deshuffle","main","read_seqs","shuffle"],"q":["fasten_shuffle","","",""],"d":["Read from stdin and deshuffle reads into files","","Read fastq entries from a filename","Read fastq from stdin and interleave"],"i":[0,0,0,0],"f":[[[["matches",3]]],[[]],[[["string",3]],["vec",3,[["seq",3]]]],[[["matches",3]]]],"p":[]},\ diff --git a/docs/settings.html b/docs/settings.html index a6263ec3..3b331eb3 100644 --- a/docs/settings.html +++ b/docs/settings.html @@ -1,5 +1,5 @@ Rustdoc settings -

Rustdoc settings

Theme preferences
Use system theme
Preferred dark theme
Preferred light theme
-
Auto-hide item contents for large items.
Auto-hide item methods' documentation
Auto-hide trait implementation documentation
Directly go to item in search if there is only one result
Show line numbers on code examples
Disable keyboard shortcuts
+

Rustdoc settings

Theme preferences
Use system theme
Preferred dark theme
Preferred light theme
+
Auto-hide item contents for large items.
Auto-hide item methods' documentation
Auto-hide trait implementation documentation
Directly go to item in search if there is only one result
Show line numbers on code examples
Disable keyboard shortcuts
\ No newline at end of file diff --git a/docs/source-files.js b/docs/source-files.js index 2e6ee011..8cd8b1c0 100644 --- a/docs/source-files.js +++ b/docs/source-files.js @@ -10,6 +10,7 @@ sourcesIndex["fasten"] = {"name":"","dirs":[{"name":"io","files":["fastq.rs","mo sourcesIndex["fasten_clean"] = {"name":"","files":["fasten_clean.rs"]}; sourcesIndex["fasten_combine"] = {"name":"","files":["fasten_combine.rs"]}; sourcesIndex["fasten_convert"] = {"name":"","files":["fasten_convert.rs"]}; +sourcesIndex["fasten_inspect"] = {"name":"","files":["fasten_inspect.rs"]}; sourcesIndex["fasten_kmer"] = {"name":"","files":["fasten_kmer.rs"]}; sourcesIndex["fasten_metrics"] = {"name":"","files":["fasten_metrics.rs"]}; sourcesIndex["fasten_mutate"] = {"name":"","files":["fasten_mutate.rs"]}; @@ -19,6 +20,7 @@ sourcesIndex["fasten_progress"] = {"name":"","files":["fasten_progress.rs"]}; sourcesIndex["fasten_quality_filter"] = {"name":"","files":["fasten_quality_filter.rs"]}; sourcesIndex["fasten_randomize"] = {"name":"","files":["fasten_randomize.rs"]}; sourcesIndex["fasten_regex"] = {"name":"","files":["fasten_regex.rs"]}; +sourcesIndex["fasten_repair"] = {"name":"","files":["fasten_repair.rs"]}; sourcesIndex["fasten_replace"] = {"name":"","files":["fasten_replace.rs"]}; sourcesIndex["fasten_sample"] = {"name":"","files":["fasten_sample.rs"]}; sourcesIndex["fasten_shuffle"] = {"name":"","files":["fasten_shuffle.rs"]}; diff --git a/docs/src/fasten_inspect/fasten_inspect.rs.html b/docs/src/fasten_inspect/fasten_inspect.rs.html new file mode 100644 index 00000000..48005552 --- /dev/null +++ b/docs/src/fasten_inspect/fasten_inspect.rs.html @@ -0,0 +1,363 @@ +fasten_inspect.rs - source +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+
//! Marks up your reads with useful information like read length
+//! 
+//! # Examples
+//! 
+//! ## Quick validation with stderr message
+//! ```bash
+//! cat file.fastq | fasten_inspect > markedup.fastq
+//! cat file.fastq | fasten_inspect --paired-end > markedup-paired.fastq
+//! ```
+//!
+//! The resulting marked-up fastq file will have deflines like
+//!
+//! ```text
+//! @read0/1 id-at:1 seq-length:100 seq-invalid-chars: id-plus:1 qual-invalid-chars: avg-qual:20.93 qual-length:100 read-pair:1
+//! ```
+//!
+//! # Usage
+//! 
+//! ```text
+//!fasten_inspect: Marks up your reads with useful information like read length
+//!
+//!Usage: fasten_inspect [-h] [-n INT] [-p] [--verbose] [--version]
+//!
+//!Options:
+//!    -h, --help          Print this help menu.
+//!    -n, --numcpus INT   Number of CPUs (default: 1)
+//!    -p, --paired-end    The input reads are interleaved paired-end
+//!        --verbose       Print more status messages
+//!        --version       Print the version of Fasten and exit
+//!
+//! ```
+//!
+//! The fields will be found on the defline of the sequence and include:
+//!
+//!| key | type  | example | note   |
+//!| --- | ----- | ------- | ------ |
+//!| id-at | boolean (1 or 0) | id-at:1 | Whether or not the `@` was first character, first line | 
+//!| seq-invalid-chars | string | seq-invalid-chars:$$% | |
+//!| qual-invalid-chars | string | qual-invalid-chars:[< | |
+//!| seq-length | int | seq-length:100 | |
+//!| id-plus | boolean | id-plus:1 | Whether or not the `+` was first character, 3rd line |
+//!| avg-qual | float | avg-qual:17.52 | |
+//!| qual-length | int | qual-length:100 | Length of the quality score line |
+//!
+//!
+
+// TODO add points that were validated into the sequence deflines: length, is-paired, seq-regex=1, and anything else
+
+extern crate getopts;
+extern crate fasten;
+extern crate regex;
+use std::fs::File;
+//use std::io::BufReader;
+use std::io::{BufRead,BufReader};
+
+use regex::Regex;
+
+use fasten::fasten_base_options;
+use fasten::fasten_base_options_matches;
+
+fn main(){
+    let opts = fasten_base_options();
+    // Options specific to this script
+    // opts.optflag("","paired-end","The reads are interleaved paired-end");
+
+    let matches = fasten_base_options_matches("Marks up your reads with useful information like read length", opts);
+
+    let lines_per_read :u8 ={
+        if matches.opt_present("paired-end") {
+            8
+        }else{
+            4
+        }
+    };
+    
+    // If there is a match on these, then mark invalid.
+    // In other words, we are looking for a pattern that
+    // is NOT the target seq or qual
+    let seq_regex = Regex::new(r"[^a-zA-Z]").expect("malformed seq regex");
+    //let qual_regex= Regex::new(r"[^!-Za-z]").expect("malformed qual regex");
+    let qual_regex= Regex::new(r"\s").expect("malformed qual regex");
+
+    validate_reads(lines_per_read, seq_regex, qual_regex);
+
+    if matches.opt_present("verbose") {
+        fasten::logmsg("These reads have been validated!");
+    }
+}
+
+/// marks up reads from stdin
+fn validate_reads(lines_per_read: u8, seq_regex: regex::Regex, qual_regex: regex::Regex) {
+    let my_file = File::open("/dev/stdin").expect("Could not open file");
+    let mut my_buffer = BufReader::new(my_file);
+
+    let mut id   = String::new();
+    let mut seq  = String::new();
+    let mut plus = String::new();
+    let mut qual = String::new();
+
+    let mut i :u64 = 0;
+    loop{
+
+        id.clear();
+        if my_buffer.read_line(&mut id).expect("Cannot read new line") == 0 {
+            break;
+        }
+
+        seq.clear();
+        my_buffer.read_line(&mut seq).expect("ERROR: failed to read 'seq' line");
+        plus.clear();
+        my_buffer.read_line(&mut plus).expect("ERROR: failed to read 'plus' line");
+        qual.clear();
+        my_buffer.read_line(&mut qual).expect("ERROR: failed to read 'qual' line");
+        id   = id.trim().to_string();
+        seq  = seq.trim().to_string();
+        plus = plus.trim().to_string();
+        qual = qual.trim().to_string();
+
+        // Test ID
+        if id.chars().nth(0).unwrap() == '@' {
+            id = format!("{} id-at:1", &id);
+        } else {
+            id = format!("{} id-at:0", &id);
+        }
+
+        // Test Seq
+        id = format!("{} seq-length:{}", &id, seq.len());
+        let mut illegal_seq_chars:String = String::new();
+        if seq_regex.is_match(&seq) {
+            for cap in seq_regex.captures_iter(&seq) {
+                illegal_seq_chars.push_str(&cap[0]);
+            }
+        }
+        id = format!("{} seq-invalid-chars:{}", &id, &illegal_seq_chars);
+
+        // Test plus
+        if plus.chars().nth(0).unwrap() == '+' {
+            id = format!("{} id-plus:1", &id);
+        } else {
+            id = format!("{} id-plus:0", &id);
+        }
+
+        // Test qual
+        let mut illegal_qual_chars:String = String::new();
+        if qual_regex.is_match(&qual) {
+            for cap in qual_regex.captures_iter(&qual) {
+                illegal_qual_chars.push_str(&cap[0]);
+            }
+        }
+        id = format!("{} qual-invalid-chars:{}", &id, &illegal_qual_chars);
+
+        // quality score regex
+        let mut qual_total :usize = 0;
+        for q in qual.chars() {
+            qual_total += q as usize;
+        }
+        let avg_qual :f32 = {
+            if qual.len() == 0 {
+                -1.0
+            } else {            
+                qual_total as f32 / qual.len() as f32 - 33.0
+            }
+        };
+        id = format!("{} avg-qual:{:.2}", &id, avg_qual);
+        id = format!("{} qual-length:{}", &id, qual.len());
+
+        let mut read_pair:u8 = ((i as u64 % lines_per_read as u64) + 1) as u8;
+        if read_pair > 1 {
+            read_pair = 2;
+        }
+        id = format!("{} read-pair:{}", &id, &read_pair);
+
+        i += 4;
+
+        println!("{}\n{}\n{}\n{}", id, seq, plus, qual);
+    }
+}
+
+
+
+
+ \ No newline at end of file diff --git a/docs/src/fasten_regex/fasten_regex.rs.html b/docs/src/fasten_regex/fasten_regex.rs.html index d8eb81ae..6537b753 100644 --- a/docs/src/fasten_regex/fasten_regex.rs.html +++ b/docs/src/fasten_regex/fasten_regex.rs.html @@ -353,14 +353,14 @@ } }, "ID" => { - if regex.is_match(&all_seq) { + if regex.is_match(&all_id) { true } else { false } }, "QUAL" => { - if regex.is_match(&all_seq) { + if regex.is_match(&all_qual) { true } else { false diff --git a/docs/src/fasten_repair/fasten_repair.rs.html b/docs/src/fasten_repair/fasten_repair.rs.html new file mode 100644 index 00000000..4ded83c9 --- /dev/null +++ b/docs/src/fasten_repair/fasten_repair.rs.html @@ -0,0 +1,633 @@ +fasten_repair.rs - source +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+
//! Repairs reads from fasten_inspect output
+//! 
+//! # Examples
+//! 
+//! ```bash
+//! ./target/debug/fasten_inspect  < testdata/four_reads.fastq | \
+//!   ./target/debug/fasten_repair --remove-info > repaired.fastq
+//!
+//! ```
+//!
+//! If remove-info is given, then extra header information from fasten_inspect will be removed.
+//!
+//! # Usage
+//!
+//! ```text
+//! Usage: fasten_repair [-h] [-n INT] [-p] [--verbose] [--version] [--min-length INT] [--min-quality FLOAT] [--remove-info] [-m STRING]
+//! Options:
+//!    -h, --help          Print this help menu.
+//!    -n, --numcpus INT   Number of CPUs (default: 1)
+//!    -p, --paired-end    The input reads are interleaved paired-end
+//!        --verbose       Print more status messages
+//!        --version       Print the version of Fasten and exit
+//!        --min-length INT
+//!                        Minimum read length allowed
+//!        --min-quality FLOAT
+//!                        Minimum quality allowed
+//!        --remove-info   Remove fasten_inspect headers
+//!    -m, --mode STRING   Either repair or panic. If panic, then the binary will
+//!                        panic when the first issue comes up. Default:repair
+//! ```
+//!
+//! # Methods of repair
+//!
+//! If you choose `--mode repair`, then this is the expected behavior
+//!
+//! * Mismatched seq and qual lengths: seq or qual length will be truncated
+//! * R1 or R2 not passing: then the other (R1 or R2) will also not pass and they will not be
+//! printed.
+//!
+//! # Panic
+//!
+//! If the sequences are not repaired but there is still an issue, the program might still panic:
+//!
+//! * seq length < min length (TODO when implementing PE reads)
+//! * avg qual < min qual (TODO when implementing PE reads)
+//! * invalid characters in seq (TODO when implementing PE reads)
+//! * invalid characters in qual (TODO when implementing PE reads)
+//! * `@` not present in first character of the entry (TODO when implementing PE reads)
+//! * `+` not present in the first character of the third line (TODO when implementing PE reads)
+//! 
+
+extern crate getopts;
+extern crate fasten;
+use std::fs::File;
+use std::io::BufReader;
+use std::io::BufRead;
+use std::collections::HashMap;
+
+use fasten::fasten_base_options;
+use fasten::fasten_base_options_matches;
+use fasten::logmsg;
+
+fn main(){
+    let mut opts = fasten_base_options();
+    // Options specific to this script
+    opts.optopt("","min-length","Minimum read length allowed","INT");
+    opts.optopt("","min-quality","Minimum quality allowed","FLOAT");
+    opts.optflag("", "remove-info", "Remove fasten_inspect headers");
+    opts.optopt("m", "mode", " Either repair or panic. If panic, then the binary will panic when the first issue comes up. Default:repair", "STRING");
+
+    let matches = fasten_base_options_matches("Repairs reads", opts);
+
+    let paired_end = matches.opt_present("paired-end");
+
+    let min_length :usize={
+        if matches.opt_present("min-length") {
+            matches.opt_str("min-length")
+                .expect("ERROR parsing min-length")
+                .parse()
+                .expect("ERROR parsing min-length as INT")
+        } else {
+            0
+        }
+    };
+    let min_qual :f32={
+        if matches.opt_present("min-quality") {
+            matches.opt_str("min-quality")
+                .expect("ERROR parsing min-quality")
+                .parse()
+                .expect("ERROR parsing min-quality as FLOAT")
+        } else {
+            0.0
+        }
+    };
+
+    let remove_info :bool = matches.opt_present("remove-info");
+    let mode :String = {
+        if matches.opt_present("mode") {
+            matches.opt_str("mode")
+                .expect("ERROR parsing mode")
+        } else {
+            "repair".to_string()
+        }
+    };
+
+    repair_reads(paired_end, min_length, min_qual, remove_info, &mode);
+}
+
+/// Repairs reads depending on the deflines by calling repair_one_read
+fn repair_reads(paired_end:bool, min_length: usize, min_qual: f32, remove_info: bool, mode: &str) {
+    //behavior
+    let should_repair :bool = {
+        if mode == "repair" {
+            true
+        } else {
+            false
+        }
+    };
+
+    let my_file = File::open("/dev/stdin").expect("Could not open file");
+    let mut my_buffer = BufReader::new(my_file);
+
+    let mut id   = String::new();
+    let mut seq  = String::new();
+    let mut plus = String::new();
+    let mut qual = String::new();
+
+    //let mut i = 0;
+    loop{
+
+        id.clear();
+        if my_buffer.read_line(&mut id).expect("Cannot read new line") == 0 {
+            break;
+        }
+        let r1_id = id.clone();
+        
+        seq.clear();
+        my_buffer.read_line(&mut seq).expect("ERROR: failed to read 'seq' line");
+        plus.clear();
+        my_buffer.read_line(&mut plus).expect("ERROR: failed to read 'plus' line");
+        qual.clear();
+        my_buffer.read_line(&mut qual).expect("ERROR: failed to read 'qual' line");
+        id   = id.trim().to_string();
+        seq  = seq.trim().to_string();
+        plus = plus.trim().to_string();
+        qual = qual.trim().to_string();
+
+        let (r1, is_r1_good, err1):(String, bool, String) = repair_one_read(id.clone(), seq.clone(), plus.clone(), qual.clone(), should_repair, min_length, min_qual, remove_info);
+        //i += 4;
+
+        let mut is_r2_good = true;
+        let mut r2 = "".to_string();
+        let mut err2 = "".to_string();
+        let mut r2_id :String = "".to_string();
+        if paired_end {
+            id.clear();
+            if my_buffer.read_line(&mut id).expect("Cannot read new line") == 0 {
+                panic!("ERROR: paired end expected but not found after R1 {}", r1_id);
+            }
+            r2_id = id.clone();
+            
+            seq.clear();
+            my_buffer.read_line(&mut seq).expect("ERROR: failed to read 'seq' line");
+            plus.clear();
+            my_buffer.read_line(&mut plus).expect("ERROR: failed to read 'plus' line");
+            qual.clear();
+            my_buffer.read_line(&mut qual).expect("ERROR: failed to read 'qual' line");
+            id   = id.trim().to_string();
+            seq  = seq.trim().to_string();
+            plus = plus.trim().to_string();
+            qual = qual.trim().to_string();
+
+            (r2, is_r2_good, err2) = repair_one_read(id.clone(), seq.clone(), plus.clone(), qual.clone(), should_repair, min_length, min_qual, remove_info);
+        }
+
+        if is_r1_good && is_r2_good{
+            // Start with R1 for printing.
+            let mut to_print = r1.clone();
+            // If we're looking at a paired end, then add R2 to R1 for printing.
+            if paired_end {
+                to_print.push_str("\n");
+                to_print.push_str(&r2);
+            } 
+            println!("{}", to_print);
+
+            // If R1 and R2 are good, then any "errors" are warnings.
+            // Print the warnings.
+            if err1.as_str() != "" {
+                logmsg(format!("WARNING(s) on R1: {}", err1));
+            }
+            if err2.as_str() != "" {
+                logmsg(format!("WARNING(s) on R2: {}", err2));
+            }
+        } else {
+            
+            // Print the errors.
+            if err1.as_str() != "" {
+                logmsg(format!("SKIP R1 {}\n=> {}\n", r1_id.trim(), err1));
+            }
+            if err2.as_str() != "" {
+                logmsg(format!("SKIP R2 {}\n=> {}\n", r2_id.trim(), err2));
+            }
+        }
+    }
+}
+
+/// Repair exactly one read
+fn repair_one_read(mut id:String, mut seq:String, plus:String, mut qual:String, should_repair:bool, min_length: usize, min_qual: f32, remove_info: bool) -> (String, bool, String) {
+    // Eventual error message if any
+    let mut error = String::new();
+    let mut num_errors = 0;
+
+    // The eventual sequence identifier with fasten_inspect info or not
+    let mut identifier = String::new();
+    // Information about the read from the defline
+    let mut f:HashMap<&str, &str> = HashMap::new();
+    // Get that information from the defline
+    for field in id.split_whitespace() {
+        match field.find(":") {
+            None => {
+                identifier.push_str(&field);
+                continue;
+            },
+            Some(_) => {},
+        };
+        let mut key_value = field.split(':');
+        let key   :&str = key_value.next().expect("key not found");
+        let value :&str = key_value.next().expect("value not found");
+        f.insert(key, value);
+    }
+
+    // get some variables out of the hash
+    let seq_length  :usize = f.entry("seq-length").or_insert("0").parse::<usize>().unwrap();
+    let qual_length :usize = f.entry("qual-length").or_insert("0").parse::<usize>().unwrap();
+    let avg_qual    :f32   = f.entry("avg-qual").or_insert("0").parse::<f32>().unwrap();
+    let seq_invalid_chars :&str  = f.entry("seq-invalid-chars").or_insert("");
+    let qual_invalid_chars :&str = f.entry("qual-invalid-chars").or_insert("");
+    let _read_pair :u8 = f.entry("read-pair").or_insert("1").parse::<u8>().unwrap(); // either 1 or 2
+    // these are either 1 (true) or 0 (false)
+    let id_at :u8 = f.entry("id-at").or_insert("0").parse::<u8>().unwrap();
+    let id_plus :u8 = f.entry("id-plus").or_insert("0").parse::<u8>().unwrap();
+
+    // Check seq length and qual length
+    if seq_length != qual_length {
+        if should_repair {
+            let new_length :usize = *vec![seq_length, qual_length].iter().min().unwrap();
+            seq  = seq[..new_length].to_string();
+            qual = qual[..new_length].to_string();
+            error.push_str(
+                &format!("Repaired sequence and qual length\n")
+            );
+            // Don't count this as an actual error and so don't increment.
+        } else {
+            panic!("ERROR: seq length({}) did not match qual length({}) on seqid {}\n", &seq_length, &qual_length, &id);
+        }
+    }
+    if seq_length < min_length {
+        error.push_str(
+            &format!("seq length({}) is less than min length specified ({})\n", &seq_length, &min_length)
+        );
+        num_errors += 1;
+    }
+    // Check quality score
+    if avg_qual < min_qual {
+        error.push_str(
+            &format!("average quality ({}) is less than min quality ({})\n", &avg_qual, &min_qual)
+        );
+        num_errors += 1;
+    }
+
+    // check key seq-invalid-chars
+    if seq_invalid_chars != "" {
+        error.push_str(
+            &format!("invalid seq characters found in {}\n", &id)
+        );
+        num_errors += 1;
+    }
+    // check key qual-invalid-chars
+    if qual_invalid_chars != "" {
+        error.push_str(
+            &format!("invalid qual characters found in {}\n", &id)
+        );
+        num_errors += 1;
+    }
+
+    // check key id-at
+    if id_at < 1 {
+        error.push_str(
+            &format!("no @ found at position 1 on line 1 for {}\n", &id)
+        );    
+        num_errors += 1;
+    }
+    // check key id-plus 
+    if id_plus < 1 {
+        error.push_str(
+            &format!("no + found at position 1 on line 3 for {}\n", &id)
+        );
+        num_errors += 1;
+    }
+            
+    // if the user requests, we can remove all fasten_inspect information
+    if remove_info {
+        id = identifier.clone();
+    }
+    
+    let entry :String = format!("{}\n{}\n{}\n{}", &id, &seq, &plus, &qual);
+
+    let mut is_good = true;
+    if num_errors > 0 {
+        is_good = false;
+    }
+    return (entry, is_good, error);
+}
+
+
+
+ \ No newline at end of file diff --git a/docs/src/fasten_validate/fasten_validate.rs.html b/docs/src/fasten_validate/fasten_validate.rs.html index 39104b93..0bae060c 100644 --- a/docs/src/fasten_validate/fasten_validate.rs.html +++ b/docs/src/fasten_validate/fasten_validate.rs.html @@ -175,6 +175,8 @@ 173 174 175 +176 +177

//! Validates your reads and makes you feel good about yourself!
 //! 
 //! # Examples
@@ -231,8 +233,10 @@
 
 use fasten::fasten_base_options;
 use fasten::fasten_base_options_matches;
+use fasten::logmsg;
 
 fn main(){
+    logmsg("NOTE: fasten_validate is deprecated starting in v0.6 in favor of fasten_inspect and fasten_repair");
     let mut opts = fasten_base_options();
     // Options specific to this script
     opts.optopt("","min-length","Minimum read length allowed","INT");
diff --git a/src/bin/fasten_inspect.rs b/src/bin/fasten_inspect.rs
new file mode 100644
index 00000000..5b42470e
--- /dev/null
+++ b/src/bin/fasten_inspect.rs
@@ -0,0 +1,179 @@
+//! Marks up your reads with useful information like read length
+//! 
+//! # Examples
+//! 
+//! ## Quick validation with stderr message
+//! ```bash
+//! cat file.fastq | fasten_inspect > markedup.fastq
+//! cat file.fastq | fasten_inspect --paired-end > markedup-paired.fastq
+//! ```
+//!
+//! The resulting marked-up fastq file will have deflines like
+//!
+//! ```text
+//! @read0/1 id-at:1 seq-length:100 seq-invalid-chars: id-plus:1 qual-invalid-chars: avg-qual:20.93 qual-length:100 read-pair:1
+//! ```
+//!
+//! # Usage
+//! 
+//! ```text
+//!fasten_inspect: Marks up your reads with useful information like read length
+//!
+//!Usage: fasten_inspect [-h] [-n INT] [-p] [--verbose] [--version]
+//!
+//!Options:
+//!    -h, --help          Print this help menu.
+//!    -n, --numcpus INT   Number of CPUs (default: 1)
+//!    -p, --paired-end    The input reads are interleaved paired-end
+//!        --verbose       Print more status messages
+//!        --version       Print the version of Fasten and exit
+//!
+//! ```
+//!
+//! The fields will be found on the defline of the sequence and include:
+//!
+//!| key | type  | example | note   |
+//!| --- | ----- | ------- | ------ |
+//!| id-at | boolean (1 or 0) | id-at:1 | Whether or not the `@` was first character, first line | 
+//!| seq-invalid-chars | string | seq-invalid-chars:$$% | |
+//!| qual-invalid-chars | string | qual-invalid-chars:[< | |
+//!| seq-length | int | seq-length:100 | |
+//!| id-plus | boolean | id-plus:1 | Whether or not the `+` was first character, 3rd line |
+//!| avg-qual | float | avg-qual:17.52 | |
+//!| qual-length | int | qual-length:100 | Length of the quality score line |
+//!
+//!
+
+// TODO add points that were validated into the sequence deflines: length, is-paired, seq-regex=1, and anything else
+
+extern crate getopts;
+extern crate fasten;
+extern crate regex;
+use std::fs::File;
+//use std::io::BufReader;
+use std::io::{BufRead,BufReader};
+
+use regex::Regex;
+
+use fasten::fasten_base_options;
+use fasten::fasten_base_options_matches;
+
+fn main(){
+    let opts = fasten_base_options();
+    // Options specific to this script
+    // opts.optflag("","paired-end","The reads are interleaved paired-end");
+
+    let matches = fasten_base_options_matches("Marks up your reads with useful information like read length", opts);
+
+    let lines_per_read :u8 ={
+        if matches.opt_present("paired-end") {
+            8
+        }else{
+            4
+        }
+    };
+    
+    // If there is a match on these, then mark invalid.
+    // In other words, we are looking for a pattern that
+    // is NOT the target seq or qual
+    let seq_regex = Regex::new(r"[^a-zA-Z]").expect("malformed seq regex");
+    //let qual_regex= Regex::new(r"[^!-Za-z]").expect("malformed qual regex");
+    let qual_regex= Regex::new(r"\s").expect("malformed qual regex");
+
+    validate_reads(lines_per_read, seq_regex, qual_regex);
+
+    if matches.opt_present("verbose") {
+        fasten::logmsg("These reads have been validated!");
+    }
+}
+
+/// marks up reads from stdin
+fn validate_reads(lines_per_read: u8, seq_regex: regex::Regex, qual_regex: regex::Regex) {
+    let my_file = File::open("/dev/stdin").expect("Could not open file");
+    let mut my_buffer = BufReader::new(my_file);
+
+    let mut id   = String::new();
+    let mut seq  = String::new();
+    let mut plus = String::new();
+    let mut qual = String::new();
+
+    let mut i :u64 = 0;
+    loop{
+
+        id.clear();
+        if my_buffer.read_line(&mut id).expect("Cannot read new line") == 0 {
+            break;
+        }
+
+        seq.clear();
+        my_buffer.read_line(&mut seq).expect("ERROR: failed to read 'seq' line");
+        plus.clear();
+        my_buffer.read_line(&mut plus).expect("ERROR: failed to read 'plus' line");
+        qual.clear();
+        my_buffer.read_line(&mut qual).expect("ERROR: failed to read 'qual' line");
+        id   = id.trim().to_string();
+        seq  = seq.trim().to_string();
+        plus = plus.trim().to_string();
+        qual = qual.trim().to_string();
+
+        // Test ID
+        if id.chars().nth(0).unwrap() == '@' {
+            id = format!("{} id-at:1", &id);
+        } else {
+            id = format!("{} id-at:0", &id);
+        }
+
+        // Test Seq
+        id = format!("{} seq-length:{}", &id, seq.len());
+        let mut illegal_seq_chars:String = String::new();
+        if seq_regex.is_match(&seq) {
+            for cap in seq_regex.captures_iter(&seq) {
+                illegal_seq_chars.push_str(&cap[0]);
+            }
+        }
+        id = format!("{} seq-invalid-chars:{}", &id, &illegal_seq_chars);
+
+        // Test plus
+        if plus.chars().nth(0).unwrap() == '+' {
+            id = format!("{} id-plus:1", &id);
+        } else {
+            id = format!("{} id-plus:0", &id);
+        }
+
+        // Test qual
+        let mut illegal_qual_chars:String = String::new();
+        if qual_regex.is_match(&qual) {
+            for cap in qual_regex.captures_iter(&qual) {
+                illegal_qual_chars.push_str(&cap[0]);
+            }
+        }
+        id = format!("{} qual-invalid-chars:{}", &id, &illegal_qual_chars);
+
+        // quality score regex
+        let mut qual_total :usize = 0;
+        for q in qual.chars() {
+            qual_total += q as usize;
+        }
+        let avg_qual :f32 = {
+            if qual.len() == 0 {
+                -1.0
+            } else {            
+                qual_total as f32 / qual.len() as f32 - 33.0
+            }
+        };
+        id = format!("{} avg-qual:{:.2}", &id, avg_qual);
+        id = format!("{} qual-length:{}", &id, qual.len());
+
+        let mut read_pair:u8 = ((i as u64 % lines_per_read as u64) + 1) as u8;
+        if read_pair > 1 {
+            read_pair = 2;
+        }
+        id = format!("{} read-pair:{}", &id, &read_pair);
+
+        i += 4;
+
+        println!("{}\n{}\n{}\n{}", id, seq, plus, qual);
+    }
+}
+
+
diff --git a/src/bin/fasten_regex.rs b/src/bin/fasten_regex.rs
index 3170323b..8ba461f5 100644
--- a/src/bin/fasten_regex.rs
+++ b/src/bin/fasten_regex.rs
@@ -153,14 +153,14 @@ fn main(){
               }
             }, 
             "ID" => {
-              if regex.is_match(&all_seq) {
+              if regex.is_match(&all_id) {
                 true
               } else {
                 false
               }
             },
             "QUAL" => {
-              if regex.is_match(&all_seq) {
+              if regex.is_match(&all_qual) {
                 true
               } else {
                 false
diff --git a/src/bin/fasten_repair.rs b/src/bin/fasten_repair.rs
new file mode 100644
index 00000000..ce30c319
--- /dev/null
+++ b/src/bin/fasten_repair.rs
@@ -0,0 +1,314 @@
+//! Repairs reads from fasten_inspect output
+//! 
+//! # Examples
+//! 
+//! ```bash
+//! ./target/debug/fasten_inspect  < testdata/four_reads.fastq | \
+//!   ./target/debug/fasten_repair --remove-info > repaired.fastq
+//!
+//! ```
+//!
+//! If remove-info is given, then extra header information from fasten_inspect will be removed.
+//!
+//! # Usage
+//!
+//! ```text
+//! Usage: fasten_repair [-h] [-n INT] [-p] [--verbose] [--version] [--min-length INT] [--min-quality FLOAT] [--remove-info] [-m STRING]
+//! Options:
+//!    -h, --help          Print this help menu.
+//!    -n, --numcpus INT   Number of CPUs (default: 1)
+//!    -p, --paired-end    The input reads are interleaved paired-end
+//!        --verbose       Print more status messages
+//!        --version       Print the version of Fasten and exit
+//!        --min-length INT
+//!                        Minimum read length allowed
+//!        --min-quality FLOAT
+//!                        Minimum quality allowed
+//!        --remove-info   Remove fasten_inspect headers
+//!    -m, --mode STRING   Either repair or panic. If panic, then the binary will
+//!                        panic when the first issue comes up. Default:repair
+//! ```
+//!
+//! # Methods of repair
+//!
+//! # Repair mode
+//!
+//! If you choose `--mode repair`, then this is the expected behavior
+//!
+//! * Mismatched seq and qual lengths: seq or qual length will be truncated
+//! * R1 or R2 not passing: then the other (R1 or R2) will also not pass and they will not be
+//! printed.
+//!
+//! # Panic mode
+//!
+//! * seq length < min length 
+//! * avg qual < min qual 
+//! * invalid characters in seq 
+//! * invalid characters in qual 
+//! * `@` not present in first character of the entry 
+//! * `+` not present in the first character of the third line 
+//! 
+
+extern crate getopts;
+extern crate fasten;
+use std::fs::File;
+use std::io::BufReader;
+use std::io::BufRead;
+use std::collections::HashMap;
+
+use fasten::fasten_base_options;
+use fasten::fasten_base_options_matches;
+use fasten::logmsg;
+
+fn main(){
+    let mut opts = fasten_base_options();
+    // Options specific to this script
+    opts.optopt("","min-length","Minimum read length allowed","INT");
+    opts.optopt("","min-quality","Minimum quality allowed","FLOAT");
+    opts.optflag("", "remove-info", "Remove fasten_inspect headers");
+    opts.optopt("m", "mode", " Either repair or panic. If panic, then the binary will panic when the first issue comes up. Default:repair", "STRING");
+
+    let matches = fasten_base_options_matches("Repairs reads", opts);
+
+    let paired_end = matches.opt_present("paired-end");
+
+    let min_length :usize={
+        if matches.opt_present("min-length") {
+            matches.opt_str("min-length")
+                .expect("ERROR parsing min-length")
+                .parse()
+                .expect("ERROR parsing min-length as INT")
+        } else {
+            0
+        }
+    };
+    let min_qual :f32={
+        if matches.opt_present("min-quality") {
+            matches.opt_str("min-quality")
+                .expect("ERROR parsing min-quality")
+                .parse()
+                .expect("ERROR parsing min-quality as FLOAT")
+        } else {
+            0.0
+        }
+    };
+
+    let remove_info :bool = matches.opt_present("remove-info");
+    let mode :String = {
+        if matches.opt_present("mode") {
+            matches.opt_str("mode")
+                .expect("ERROR parsing mode")
+        } else {
+            "repair".to_string()
+        }
+    };
+
+    repair_reads(paired_end, min_length, min_qual, remove_info, &mode);
+}
+
+/// Repairs reads depending on the deflines by calling repair_one_read
+fn repair_reads(paired_end:bool, min_length: usize, min_qual: f32, remove_info: bool, mode: &str) {
+    //behavior
+    let should_repair :bool = {
+        if mode == "repair" {
+            true
+        } else {
+            false
+        }
+    };
+
+    let my_file = File::open("/dev/stdin").expect("Could not open file");
+    let mut my_buffer = BufReader::new(my_file);
+
+    let mut id   = String::new();
+    let mut seq  = String::new();
+    let mut plus = String::new();
+    let mut qual = String::new();
+
+    //let mut i = 0;
+    loop{
+
+        id.clear();
+        if my_buffer.read_line(&mut id).expect("Cannot read new line") == 0 {
+            break;
+        }
+        let r1_id = id.clone();
+        
+        seq.clear();
+        my_buffer.read_line(&mut seq).expect("ERROR: failed to read 'seq' line");
+        plus.clear();
+        my_buffer.read_line(&mut plus).expect("ERROR: failed to read 'plus' line");
+        qual.clear();
+        my_buffer.read_line(&mut qual).expect("ERROR: failed to read 'qual' line");
+        id   = id.trim().to_string();
+        seq  = seq.trim().to_string();
+        plus = plus.trim().to_string();
+        qual = qual.trim().to_string();
+
+        let (r1, is_r1_good, err1):(String, bool, String) = repair_one_read(id.clone(), seq.clone(), plus.clone(), qual.clone(), should_repair, min_length, min_qual, remove_info);
+        //i += 4;
+
+        let mut is_r2_good = true;
+        let mut r2 = "".to_string();
+        let mut err2 = "".to_string();
+        let mut r2_id :String = "".to_string();
+        if paired_end {
+            id.clear();
+            if my_buffer.read_line(&mut id).expect("Cannot read new line") == 0 {
+                panic!("ERROR: paired end expected but not found after R1 {}", r1_id);
+            }
+            r2_id = id.clone();
+            
+            seq.clear();
+            my_buffer.read_line(&mut seq).expect("ERROR: failed to read 'seq' line");
+            plus.clear();
+            my_buffer.read_line(&mut plus).expect("ERROR: failed to read 'plus' line");
+            qual.clear();
+            my_buffer.read_line(&mut qual).expect("ERROR: failed to read 'qual' line");
+            id   = id.trim().to_string();
+            seq  = seq.trim().to_string();
+            plus = plus.trim().to_string();
+            qual = qual.trim().to_string();
+
+            (r2, is_r2_good, err2) = repair_one_read(id.clone(), seq.clone(), plus.clone(), qual.clone(), should_repair, min_length, min_qual, remove_info);
+        }
+
+        if is_r1_good && is_r2_good{
+            // Start with R1 for printing.
+            let mut to_print = r1.clone();
+            // If we're looking at a paired end, then add R2 to R1 for printing.
+            if paired_end {
+                to_print.push_str("\n");
+                to_print.push_str(&r2);
+            } 
+            println!("{}", to_print);
+
+            // If R1 and R2 are good, then any "errors" are warnings.
+            // Print the warnings.
+            if err1.as_str() != "" {
+                logmsg(format!("WARNING(s) on R1: {}", err1));
+            }
+            if err2.as_str() != "" {
+                logmsg(format!("WARNING(s) on R2: {}", err2));
+            }
+        } else {
+            
+            // Print the errors.
+            if err1.as_str() != "" {
+                logmsg(format!("SKIP R1 {}\n=> {}\n", r1_id.trim(), err1));
+            }
+            if err2.as_str() != "" {
+                logmsg(format!("SKIP R2 {}\n=> {}\n", r2_id.trim(), err2));
+            }
+        }
+    }
+}
+
+/// Repair exactly one read
+fn repair_one_read(mut id:String, mut seq:String, plus:String, mut qual:String, should_repair:bool, min_length: usize, min_qual: f32, remove_info: bool) -> (String, bool, String) {
+    // Eventual error message if any
+    let mut error = String::new();
+    let mut num_errors = 0;
+
+    // The eventual sequence identifier with fasten_inspect info or not
+    let mut identifier = String::new();
+    // Information about the read from the defline
+    let mut f:HashMap<&str, &str> = HashMap::new();
+    // Get that information from the defline
+    for field in id.split_whitespace() {
+        match field.find(":") {
+            None => {
+                identifier.push_str(&field);
+                continue;
+            },
+            Some(_) => {},
+        };
+        let mut key_value = field.split(':');
+        let key   :&str = key_value.next().expect("key not found");
+        let value :&str = key_value.next().expect("value not found");
+        f.insert(key, value);
+    }
+
+    // get some variables out of the hash
+    let seq_length  :usize = f.entry("seq-length").or_insert("0").parse::().unwrap();
+    let qual_length :usize = f.entry("qual-length").or_insert("0").parse::().unwrap();
+    let avg_qual    :f32   = f.entry("avg-qual").or_insert("0").parse::().unwrap();
+    let seq_invalid_chars :&str  = f.entry("seq-invalid-chars").or_insert("");
+    let qual_invalid_chars :&str = f.entry("qual-invalid-chars").or_insert("");
+    let _read_pair :u8 = f.entry("read-pair").or_insert("1").parse::().unwrap(); // either 1 or 2
+    // these are either 1 (true) or 0 (false)
+    let id_at :u8 = f.entry("id-at").or_insert("0").parse::().unwrap();
+    let id_plus :u8 = f.entry("id-plus").or_insert("0").parse::().unwrap();
+
+    // Check seq length and qual length
+    if seq_length != qual_length {
+        if should_repair {
+            let new_length :usize = *vec![seq_length, qual_length].iter().min().unwrap();
+            seq  = seq[..new_length].to_string();
+            qual = qual[..new_length].to_string();
+            error.push_str(
+                &format!("Repaired sequence and qual length\n")
+            );
+            // Don't count this as an actual error and so don't increment.
+        } else {
+            panic!("ERROR: seq length({}) did not match qual length({}) on seqid {}\n", &seq_length, &qual_length, &id);
+        }
+    }
+    if seq_length < min_length {
+        error.push_str(
+            &format!("seq length({}) is less than min length specified ({})\n", &seq_length, &min_length)
+        );
+        num_errors += 1;
+    }
+    // Check quality score
+    if avg_qual < min_qual {
+        error.push_str(
+            &format!("average quality ({}) is less than min quality ({})\n", &avg_qual, &min_qual)
+        );
+        num_errors += 1;
+    }
+
+    // check key seq-invalid-chars
+    if seq_invalid_chars != "" {
+        error.push_str(
+            &format!("invalid seq characters found in {}\n", &id)
+        );
+        num_errors += 1;
+    }
+    // check key qual-invalid-chars
+    if qual_invalid_chars != "" {
+        error.push_str(
+            &format!("invalid qual characters found in {}\n", &id)
+        );
+        num_errors += 1;
+    }
+
+    // check key id-at
+    if id_at < 1 {
+        error.push_str(
+            &format!("no @ found at position 1 on line 1 for {}\n", &id)
+        );    
+        num_errors += 1;
+    }
+    // check key id-plus 
+    if id_plus < 1 {
+        error.push_str(
+            &format!("no + found at position 1 on line 3 for {}\n", &id)
+        );
+        num_errors += 1;
+    }
+            
+    // if the user requests, we can remove all fasten_inspect information
+    if remove_info {
+        id = identifier.clone();
+    }
+    
+    let entry :String = format!("{}\n{}\n{}\n{}", &id, &seq, &plus, &qual);
+
+    let mut is_good = true;
+    if num_errors > 0 {
+        is_good = false;
+    }
+    return (entry, is_good, error);
+}
+
diff --git a/src/bin/fasten_validate.rs b/src/bin/fasten_validate.rs
index ebc012e3..041bfc7c 100644
--- a/src/bin/fasten_validate.rs
+++ b/src/bin/fasten_validate.rs
@@ -54,8 +54,10 @@ use regex::Regex;
 
 use fasten::fasten_base_options;
 use fasten::fasten_base_options_matches;
+use fasten::logmsg;
 
 fn main(){
+    logmsg("NOTE: fasten_validate is deprecated starting in v0.6 in favor of fasten_inspect and fasten_repair");
     let mut opts = fasten_base_options();
     // Options specific to this script
     opts.optopt("","min-length","Minimum read length allowed","INT");
diff --git a/tests/fasten_kmer.sh b/tests/fasten_kmer.sh
index 416cc090..0613f4ee 100644
--- a/tests/fasten_kmer.sh
+++ b/tests/fasten_kmer.sh
@@ -2,11 +2,9 @@
 
 set -e
 
-cargo build
-
 INPUT=testdata/four_reads.pe.fastq
 
-AAA=$(./target/debug/fasten_kmer --kmer-length 3 < $INPUT | grep -m 1 AAA)
+AAA=$(./target/debug/fasten_kmer --kmer-length 3 --revcomp < $INPUT | grep -m 1 AAA)
 
 if [ "$AAA" != $'AAA\t31' ]; then
   echo "Fasten_kmer did not count kmers correctly"
diff --git a/tests/fasten_repair.sh b/tests/fasten_repair.sh
new file mode 100644
index 00000000..154afcea
--- /dev/null
+++ b/tests/fasten_repair.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+set -e
+
+INPUT=testdata/four_reads.pe.fastq;
+./target/debug/fasten_inspect < $INPUT > /dev/null
+if [ $? -gt 0 ]; then
+  echo "Test failed for validating a good set of reads"
+  exit 1
+fi
+
+set +e
+head -n 15 $INPUT | ./target/debug/fasten_inspect | \
+  ./target/debug/fasten_repair > /dev/null 2>&1
+if [ $? -gt 0 ]; then
+  echo "Test failed for repairing a truncated set of reads"
+  exit 1
+fi
+set -e
+
+# Paired end
+./target/debug/fasten_inspect --paired-end < $INPUT > /dev/null 
+if [ $? -gt 0 ]; then
+  echo "Test failed for validating a good set of paired end reads"
+  exit 1
+fi
+
+set +e
+head -n 15 $INPUT | ./target/debug/fasten_inspect --paired-end | \
+  ./target/debug/fasten_repair --paired-end --mode panic --remove-info > /dev/null 2>/dev/null
+if [ $? -eq 0 ]; then
+  echo "Test failed for detecting a truncated set of reads"
+  exit 1
+fi
+set -e
+
+head -n 15 $INPUT | ./target/debug/fasten_inspect --paired-end | \
+  ./target/debug/fasten_repair --paired-end --mode repair > /dev/null 2>&1 
+if [ $? -gt 0 ]; then
+  echo "Test failed for repairing a truncated set of reads"
+  exit 1
+fi
+
+# Test to make sure this gives 2 reads
+num_reads=$(
+    head -n 15 $INPUT | ./target/debug/fasten_inspect --paired-end | \
+    ./target/debug/fasten_repair --paired-end --mode repair 2>/dev/null | \
+    ./target/debug/fasten_metrics | cut -f 2 | tail -n 1
+    ) 
+if [ "$num_reads" -ne 2 ]; then
+  echo "Test failed for repairing a truncated set of reads and returning exactly 2"
+  exit 1
+fi
+
+echo "$0 passed!" >&2
\ No newline at end of file
diff --git a/tests/fasten_straighten.sh b/tests/fasten_straighten.sh
index 6d533445..f45770ca 100644
--- a/tests/fasten_straighten.sh
+++ b/tests/fasten_straighten.sh
@@ -8,8 +8,8 @@ CORRECT=testdata/four_reads.fastq
 corrected=$(./target/debug/fasten_straighten < testdata/four_reads.gt_16_lines.fastq)
 
 if [ "$corrected" != "$(cat $CORRECT)" ]; then
-  echo "carol did not correct the reads"
+  echo "fasten_straighten did not correct the reads"
   exit 1
 fi
 
-echo "Carol passed!"
+echo "Fasten_straighten passed!"