Skip to content

Commit

Permalink
Add draft of fastq reader
Browse files Browse the repository at this point in the history
  • Loading branch information
peri4n committed Oct 25, 2024
1 parent 1d0b0aa commit 0af8358
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 8 deletions.
File renamed without changes.
65 changes: 65 additions & 0 deletions src/io/fastq.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use std::io::{BufRead, BufReader, Read};

#[derive(Debug)]
pub struct FastqReader<R: Read> {
reader: BufReader<R>,
}

impl<R: Read + std::fmt::Debug> FastqReader<R> {
pub fn new(reader: R) -> Self {
Self {
reader: BufReader::new(reader),
}
}
}

impl<R: Read + std::fmt::Debug> Iterator for FastqReader<R> {
type Item = FastqRecord;

fn next(&mut self) -> Option<Self::Item> {
let mut id = String::new();
let mut sequence = String::new();
let mut qualities = String::new();

for _ in 0..4 {
let mut line = String::new();
self.reader.read_line(&mut line).ok()?;
match line.chars().next()? {
'@' => id = line[1..].trim().to_string(),
'+' => (),
_ => {
if sequence.is_empty() {
sequence.push_str(&line.trim());
} else {
qualities.push_str(&line.trim());
}
}
}
}

Some(FastqRecord::new(
id,
Dna::from_ascii(&sequence),
qualities.into_bytes(),
))
}
}

use crate::dna::Dna;

#[derive(Debug, PartialEq)]
pub struct FastqRecord {
pub id: String,
pub sequence: Dna,
pub qualities: Vec<u8>,
}

impl FastqRecord {
pub fn new(id: String, sequence: Dna, qualities: Vec<u8>) -> Self {
Self {
id,
sequence,
qualities,
}
}
}
2 changes: 2 additions & 0 deletions src/io/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod fasta;
pub mod fastq;
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![feature(portable_simd)]
#![feature(bufread_skip_until)]
//! `nuc` is a library for working with nucleotide sequences.
//!
//! It's goal is to provide the fastest and easiest way to work with DNA and RNA sequences.
Expand All @@ -7,7 +8,7 @@
pub mod dna;

/// Handles IO with FastA files.
pub mod fasta;
pub mod io;

/// Handles hashing of DNA sequences.
pub mod hash;
2 changes: 0 additions & 2 deletions tests/dna_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ fn can_be_sorted() {
];
sequences.sort();

println!("{:?}", sequences);

assert_eq!(
sequences,
vec![
Expand Down
6 changes: 1 addition & 5 deletions tests/fasta_test.rs → tests/io/fasta_test.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
mod hash_test;

use std::fs::File;

use nuc::{
dna::Dna,
fasta::{FastaDna, FastaReader},
io::fasta::{FastaDna, FastaReader},
};

#[test]
Expand Down
30 changes: 30 additions & 0 deletions tests/io/fastq_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use nuc::{
dna::Dna,
io::fastq::{FastqReader, FastqRecord},
};

#[test]
fn can_read_an_example_fasta_file() {
let reader = FastqReader::new(
r#"@SEQ_ID
GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
+
!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65"#
.as_bytes(),
);

assert_eq!(
reader.into_iter().collect::<Vec<_>>(),
vec![FastqRecord {
id: "SEQ_ID".to_string(),
sequence: Dna::from_ascii(
"GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT"
),
qualities: vec![
33, 39, 39, 42, 40, 40, 40, 40, 42, 42, 42, 43, 41, 41, 37, 37, 37, 43, 43, 41, 40,
37, 37, 37, 37, 41, 46, 49, 42, 42, 42, 45, 43, 42, 39, 39, 41, 41, 42, 42, 53, 53,
67, 67, 70, 62, 62, 62, 62, 62, 62, 67, 67, 67, 67, 67, 67, 67, 54, 53
]
},]
);
}
2 changes: 2 additions & 0 deletions tests/io/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod fasta_test;
pub mod fastq_test;
3 changes: 3 additions & 0 deletions tests/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod hash_test;
pub mod dna_test;
pub mod io;

0 comments on commit 0af8358

Please sign in to comment.