Skip to content

Commit

Permalink
Add hash benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
peri4n committed Oct 3, 2024
1 parent 303efdc commit 1d0b0aa
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 7 deletions.
3 changes: 2 additions & 1 deletion benches/all_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use criterion::criterion_main;
mod benchmarks;

criterion_main! {
benchmarks::fasta_benchmark::fasta_benches,
benchmarks::dna_benchmark::dna_benches,
benchmarks::fasta_benchmark::fasta_benches,
benchmarks::hash_benchmark::hash_benches,
}
18 changes: 18 additions & 0 deletions benches/benchmarks/hash_benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use criterion::{criterion_group, criterion_main, Criterion};
use nuc::dna::Dna;

pub fn hash_nucleotides_benchmark(c: &mut Criterion) {
let dna = Dna::random(1_000_000);
let bytes = dna.as_bytes();
c.bench_function("Hashing random nucleotides", |b| {
b.iter(|| {
bytes
.chunks(4)
.map(|chunk| nuc::hash::hash_chars_be(chunk))
.count();
})
});
}

criterion_group!(hash_benches, hash_nucleotides_benchmark);
criterion_main!(hash_benches);
1 change: 1 addition & 0 deletions benches/benchmarks/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod fasta_benchmark;
pub mod dna_benchmark;
pub mod hash_benchmark;
19 changes: 13 additions & 6 deletions src/hash.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use std::sync::LazyLock;

pub static CHAR_TO_TWO_BIT: LazyLock<[u8; 256]> = LazyLock::new(|| {
pub const CHAR_TO_TWO_BIT: [u8; 256] = {
let mut cache = [0; 256];
cache[99] = 1; // lowercase c
cache[67] = 1; // uppercase C
Expand All @@ -9,15 +7,24 @@ pub static CHAR_TO_TWO_BIT: LazyLock<[u8; 256]> = LazyLock::new(|| {
cache[116] = 3; // lowercase t
cache[84] = 3; // uppercase T
cache
});
};

/// Hashes a byte slice into a 8-bit integer.
pub fn hash_chars_be(bytes: &[u8]) -> u8 {
match bytes.len() {
1 => CHAR_TO_TWO_BIT[bytes[0] as usize] << 6,
2 => CHAR_TO_TWO_BIT[bytes[0] as usize] << 6 | (CHAR_TO_TWO_BIT[bytes[1] as usize] << 4),
3 => CHAR_TO_TWO_BIT[bytes[0] as usize] << 6 | (CHAR_TO_TWO_BIT[bytes[1] as usize] << 4) | (CHAR_TO_TWO_BIT[bytes[2] as usize] << 2),
_ => CHAR_TO_TWO_BIT[bytes[0] as usize] << 6 | (CHAR_TO_TWO_BIT[bytes[1] as usize] << 4) | (CHAR_TO_TWO_BIT[bytes[2] as usize] << 2) | (CHAR_TO_TWO_BIT[bytes[3] as usize]),
3 => {
CHAR_TO_TWO_BIT[bytes[0] as usize] << 6
| (CHAR_TO_TWO_BIT[bytes[1] as usize] << 4)
| (CHAR_TO_TWO_BIT[bytes[2] as usize] << 2)
}
_ => {
CHAR_TO_TWO_BIT[bytes[0] as usize] << 6
| (CHAR_TO_TWO_BIT[bytes[1] as usize] << 4)
| (CHAR_TO_TWO_BIT[bytes[2] as usize] << 2)
| (CHAR_TO_TWO_BIT[bytes[3] as usize])
}
}
}

Expand Down

0 comments on commit 1d0b0aa

Please sign in to comment.