Skip to content

Commit

Permalink
Fix bug in fasta reader
Browse files Browse the repository at this point in the history
- files with one entry where parsed incorrectly
  • Loading branch information
peri4n committed Sep 30, 2024
1 parent bd7bc48 commit 4081357
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 21 deletions.
36 changes: 27 additions & 9 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
[toolchain]
channel = "nightly"
components = ["rust-src", "rustfmt"]
profile = "default"
2 changes: 1 addition & 1 deletion src/dna.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ impl Dna {
///
/// It assumes that each nucleotide is stored in 2 bits.
pub fn bytes_to_store(length: usize) -> usize {
((length as f32) / NUCS_PER_BLOCK as f32).ceil() as usize
(length / NUCS_PER_BLOCK) + if length % NUCS_PER_BLOCK == 0 { 0 } else { 1 }
}
}

Expand Down
17 changes: 7 additions & 10 deletions src/fasta.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,37 @@
use std::io::{BufReader, BufRead, Read};

#[derive(Debug)]
pub struct FastaReader<R: Read> {
reader: BufReader<R>,
next_id: Option<String>,

}

impl<R: Read> FastaReader<R> {
impl<R: Read + std::fmt::Debug> FastaReader<R> {
pub fn new(reader: R) -> Self {
Self { reader: BufReader::new(reader), next_id: None }
}
}

impl<R: Read> Iterator for FastaReader<R> {
impl<R: Read + std::fmt::Debug> Iterator for FastaReader<R> {
type Item = FastaDna;

fn next(&mut self) -> Option<Self::Item> {
println!("next:");
let mut id = self.next_id.take().unwrap_or_default();
let mut sequence = String::with_capacity(1000);
//if let Ok(buffer) = self.reader.fill_buf() {
//
//} else {
//
//}

let mut line = String::new();
loop {
let mut line = String::new();
match self.reader.read_line(&mut line) {
// buffer is completely read
Ok(0) => {
if id.is_empty() {
return None;
} else {
self.next_id = None;
return Some(FastaDna { id, sequence: Dna::from_ascii(&sequence) });
}
}
// buffer is not empty yet
Ok(_) => {
if line.starts_with('>') {
self.next_id = Some(line[1..].trim().to_string());
Expand All @@ -52,6 +48,7 @@ impl<R: Read> Iterator for FastaReader<R> {
return None;
}
}
line.clear();
}
}
}
Expand Down
11 changes: 10 additions & 1 deletion tests/fasta_test.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::fs::File;

use nuc::{
dna::Dna,
fasta::{self, FastaDna, FastaReader},
fasta::{FastaDna, FastaReader},
};

#[test]
Expand Down Expand Up @@ -40,3 +42,10 @@ fn can_read_an_empty_fasta_file() {
let records = FastaReader::new("".as_bytes()).into_iter().collect::<Vec<_>>();
assert_eq!(records, vec![]);
}

#[test]
fn read_chromosome1() {
let reader = FastaReader::new(File::open("/home/fbull/test.fa").unwrap());

assert_eq!(reader.into_iter().count(), 1);
}

0 comments on commit 4081357

Please sign in to comment.