Skip to content

Commit

Permalink
Improve documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
peri4n committed Sep 10, 2024
1 parent c48ef30 commit 730b128
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 5 deletions.
131 changes: 131 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ edition = "2021"

[dependencies]
nom = "7.1.3"
rand = "0.8.5"

[lib]
name = "nuc"
Expand Down
45 changes: 40 additions & 5 deletions src/dna.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,41 @@
use std::fmt;
use std::mem::size_of;

const BITS_PER_NUCLEOTIDE: usize = 2;
const NR_OF_NUCLEOTIDES: usize = 4;

const NUCS_PER_BLOCK: usize = size_of::<u8>() * 4;
const BITS_PER_NUCLEOTIDE: usize = NR_OF_NUCLEOTIDES.ilog2() as usize;

const MASK: u8 = 3;
const NUCS_PER_BLOCK: usize = size_of::<u8>() * NR_OF_NUCLEOTIDES;

const MASK: u8 = NR_OF_NUCLEOTIDES as u8 - 1;

/// Represents a _case-insensitive_ DNA sequence.
///
/// The DNA sequence is stored in a compact way, using 2 bits per nucleotide.
/// This allows for a low memory footprint and fast bitwise operations.
#[derive(Debug, Eq)]
pub struct Dna {
pub(crate) length: usize,
pub(crate) nucleotides: Vec<u8>,
}

impl Dna {
/// Creates a new DNA sequence with the given length.
///
/// Every nucleotide is initialized to `A`.
pub fn new(length: usize) -> Dna {
Dna {
length,
nucleotides: vec![0; length],
}
}

/// Access the length of the DNA sequence.
///
/// ```
/// let dna = nuc::dna::Dna::from_ascii("ACGT");
/// assert_eq!(dna.len(), 4);
/// ```
pub fn len(&self) -> usize {
self.length
}
Expand All @@ -32,6 +47,12 @@ impl Dna {
}
}

/// Creates a DNA sequence from an ASCII string.
///
/// ```
/// let dna = nuc::dna::Dna::from_ascii("ACGT");
/// assert_eq!(dna.to_string(), "ACGT");
/// ```
pub fn from_ascii(ascii: &str) -> Dna {
let mut dna = Dna {
length: ascii.len(),
Expand All @@ -50,16 +71,30 @@ impl Dna {
dna
}

/// Draws a random DNA sequence with the given length.
///
/// ```
/// let dna = nuc::dna::Dna::random(8);
/// assert_eq!(dna.len(), 8);
/// ```
pub fn random(length: usize) -> Dna {
let mut dna = Dna::new(length);
for i in 0..length {
dna.init_with(i, rand::random::<u8>() & MASK);
}
dna
}

#[inline(always)]
pub fn address(&self, index: usize) -> (usize, u8) {
fn address(&self, index: usize) -> (usize, u8) {
let block = index / NUCS_PER_BLOCK;
let bit = ((NUCS_PER_BLOCK - 1 - (index % NUCS_PER_BLOCK)) * BITS_PER_NUCLEOTIDE) as u8;
(block, bit)
}

/// Returns the internal bit sequence of the DNA sequence.
pub fn bit_string(&self) -> String {
let mut bit_string = String::new();
let mut bit_string = String::with_capacity(self.len() * 2);
for i in 0..self.nucleotides.len() {
bit_string.push_str(&format!("{:08b} ", self.nucleotides[i]));
}
Expand Down
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
//! `nuc` is a library for working with nucleotide sequences.
//!
//! It's goal is to provide the fastest and easiest way to work with DNA and RNA sequences.
/// Core functionality for working with nucleotide sequences.
pub mod dna;

/// Handles IO with FastA files.
pub mod fasta;

0 comments on commit 730b128

Please sign in to comment.