Improve documentation

peri4n · Sep 10, 2024 · 730b128 · 730b128
1 parent c48ef30
commit 730b128
Show file tree

Hide file tree

Showing 4 changed files with 179 additions and 5 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 
 [dependencies]
 nom = "7.1.3"
+rand = "0.8.5"
 
 [lib]
 name = "nuc"

diff --git a/src/dna.rs b/src/dna.rs
@@ -1,26 +1,41 @@
 use std::fmt;
 use std::mem::size_of;
 
-const BITS_PER_NUCLEOTIDE: usize = 2;
+const NR_OF_NUCLEOTIDES: usize = 4;
 
-const NUCS_PER_BLOCK: usize = size_of::<u8>() * 4;
+const BITS_PER_NUCLEOTIDE: usize = NR_OF_NUCLEOTIDES.ilog2() as usize;
 
-const MASK: u8 = 3;
+const NUCS_PER_BLOCK: usize = size_of::<u8>() * NR_OF_NUCLEOTIDES;
 
+const MASK: u8 = NR_OF_NUCLEOTIDES as u8 - 1;
+
+/// Represents a _case-insensitive_ DNA sequence.
+///
+/// The DNA sequence is stored in a compact way, using 2 bits per nucleotide.
+/// This allows for a low memory footprint and fast bitwise operations.
 #[derive(Debug, Eq)]
 pub struct Dna {
     pub(crate) length: usize,
     pub(crate) nucleotides: Vec<u8>,
 }
 
 impl Dna {
+    /// Creates a new DNA sequence with the given length.
+    ///
+    /// Every nucleotide is initialized to `A`.
     pub fn new(length: usize) -> Dna {
         Dna {
             length,
             nucleotides: vec![0; length],
         }
     }
 
+    /// Access the length of the DNA sequence.
+    ///
+    /// ```
+    /// let dna = nuc::dna::Dna::from_ascii("ACGT");
+    /// assert_eq!(dna.len(), 4);
+    /// ```
     pub fn len(&self) -> usize {
         self.length
     }
@@ -32,6 +47,12 @@ impl Dna {
         }
     }
 
+    /// Creates a DNA sequence from an ASCII string.
+    ///
+    /// ```
+    /// let dna = nuc::dna::Dna::from_ascii("ACGT");
+    /// assert_eq!(dna.to_string(), "ACGT");
+    /// ```
     pub fn from_ascii(ascii: &str) -> Dna {
         let mut dna = Dna {
             length: ascii.len(),
@@ -50,16 +71,30 @@ impl Dna {
         dna
     }
 
+    /// Draws a random DNA sequence with the given length.
+    ///
+    /// ```
+    /// let dna = nuc::dna::Dna::random(8);
+    /// assert_eq!(dna.len(), 8);
+    /// ```
+    pub fn random(length: usize) -> Dna {
+        let mut dna = Dna::new(length);
+        for i in 0..length {
+            dna.init_with(i, rand::random::<u8>() & MASK);
+        }
+        dna
+    }
+
     #[inline(always)]
-    pub fn address(&self, index: usize) -> (usize, u8) {
+    fn address(&self, index: usize) -> (usize, u8) {
         let block = index / NUCS_PER_BLOCK;
         let bit = ((NUCS_PER_BLOCK - 1 - (index % NUCS_PER_BLOCK)) * BITS_PER_NUCLEOTIDE) as u8;
         (block, bit)
     }
 
     /// Returns the internal bit sequence of the DNA sequence.
     pub fn bit_string(&self) -> String {
-        let mut bit_string = String::new();
+        let mut bit_string = String::with_capacity(self.len() * 2);
         for i in 0..self.nucleotides.len() {
             bit_string.push_str(&format!("{:08b} ", self.nucleotides[i]));
         }

diff --git a/src/lib.rs b/src/lib.rs
@@ -1,2 +1,9 @@
+//! `nuc` is a library for working with nucleotide sequences.
+//!
+//! It's goal is to provide the fastest and easiest way to work with DNA and RNA sequences.
+
+/// Core functionality for working with nucleotide sequences.
 pub mod dna;
+
+/// Handles IO with FastA files.
 pub mod fasta;