Skip to content

Commit

Permalink
Merge pull request #93 from jguhlin/Aligner-clone-logistics
Browse files Browse the repository at this point in the history
Aligner clone logistics
  • Loading branch information
jguhlin authored Nov 25, 2024
2 parents 58d5fe1 + 2366de0 commit 094fd09
Show file tree
Hide file tree
Showing 11 changed files with 315 additions and 75 deletions.
8 changes: 3 additions & 5 deletions .github/workflows/build-test-rust-minimap2-sys.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,15 @@ jobs:
- run: cd minimap2-sys
- name: Run minimap2-sys tests on Android aarch64
run: cross test --target aarch64-linux-android
- name: Run minimap2-sys tests on Android armv7
run: cross test --target armv7-linux-androideabi
- name: Run minimap2-sys tests on Android i686
run: cross test --target i686-linux-android
- name: Run minimap2-sys tests on Android x86_64
run: cross test --target x86_64-linux-android

test-macos-13:
runs-on: macos-13
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'
- run: cd minimap2-sys
- name: Run minimap2-sys tests
run: cargo test
Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/build-test-rust-minimap2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,17 @@ jobs:
run: cargo test --features htslib
- name: Run tests simde
run: cargo test --features simde

test-android:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Install Rust Cross
run: rustup target add aarch64-linux-android armv7-linux-androideabi i686-linux-android x86_64-linux-android
- run: cargo install cross --git https://github.com/cross-rs/cross
- name: Run minimap2 tests on Android aarch64
run: cross test --target aarch64-linux-android
- name: Run minimap2 tests on Android x86_64
run: cross test --target x86_64-linux-android
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ test_data/*.bam
test_data/*.mmi
.ipynb_checkpoints
.vscode/
.quarto
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,14 @@ rayon = "1.10"

[features]
default = ["map-file"]
sse2only = ["minimap2-sys/sse2only"]
map-file = ["needletail"] #, "simdutf8"]
htslib = ['rust-htslib']
simde = ["minimap2-sys/simde"]
map-file = ["needletail"] #, "simdutf8"]
zlib-ng = ["minimap2-sys/zlib-ng"]
curl = ["rust-htslib/curl"]
static = ["minimap2-sys/static", "rust-htslib/static"]
sse2only = ["minimap2-sys/sse2only"]


[package.metadata.docs.rs]
features = ["map-file", "htslib"]
72 changes: 48 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,21 @@ A rust FFI library for [minimap2](https://github.com/lh3/minimap2/). In developm
[![codecov](https://codecov.io/gh/jguhlin/minimap2-rs/branch/main/graph/badge.svg?token=huw27ZC6Qy)](https://codecov.io/gh/jguhlin/minimap2-rs)

# Structure
minimap2-sys is the library of the raw FFI bindings to minimap2. minimap2 is the more rusty version.
minimap2-sys is the raw FFI bindings to minimap2. minimap2 is the more opinionated, rusty version.

# How to use
## Requirements
```toml
minimap2 = "0.1.20+minimap2.2.28"
minimap2 = "0.1.21+minimap2.2.28"
```
Also see [Features](#features)

Tested with rustc 1.64.0 and nightly. So probably a good idea to upgrade before running. But let me know if you run into pain points with older versions and will try to fix!
Tested with rustc 1.82.0 and nightly. So probably a good idea to upgrade before running. But let me know if you run into pain points with older versions and will try to fix.

## Minimap2 Version Table
| minimap2-rs | minimap2 |
|-------------|----------|
| 0.1.21 | 2.28 |
| 0.1.20 | 2.28 |
| 0.1.19 | 2.28 |
| 0.1.18 | 2.28 |
Expand All @@ -42,7 +43,7 @@ Align a sequence:
```rust
let seq: Vec<u8> = b"ACTGACTCACATCGACTACGACTACTAGACACTAGACTATCGACTACTGACATCGA";
let alignment = aligner
.map(&seq, false, false, None, None)
.map(&seq, false, false, None, None, Some(b"My Sequence Name"))
.expect("Unable to align");
```

Expand All @@ -53,6 +54,9 @@ let aligner = map_ont();
let aligner = asm20();
```

**Note** Each preset overwrites different arguments. Using multiple at a time is not technically supported, but will work. Results unknown. So be careful!
It's equivalent to running minimap2 -x map_ont -x short ...

### Customization
[MapOpts](https://docs.rs/minimap2-sys/0.1.5/minimap2_sys/struct.mm_mapopt_t.html) and [IdxOpts](https://docs.rs/minimap2-sys/0.1.5/minimap2_sys/struct.mm_idxopt_t.html) can be customized with Rust's struct pattern, as well as applying mapping settings. Inspired by [bevy](https://bevyengine.org/).
```rust
Expand Down Expand Up @@ -83,7 +87,7 @@ let mut fasta = Fasta::from_buffer(&mut reader)
for seq in reader {
let seq = seq.unwrap();
let alignment: Vec<Mapping> = aligner
.map(&seq.sequence.unwrap(), false, false, None, None)
.map(&seq.sequence.unwrap(), false, false, None, None, None)
.expect("Unable to align");
println!("{:?}", alignment);
}
Expand Down Expand Up @@ -111,25 +115,27 @@ This _appears_ to work.
use rayon::prelude::*;

let results = sequences.par_iter().map(|seq| {
aligner.map(seq.as_bytes(), false, false, None, None).unwrap()
aligner.map(seq.as_bytes(), false, false, None, None, None).unwrap()
}).collect::<Vec<_>>();
```

## Features
The following crate features are available:
* `mm2-fast` - Replace minimap2 with [mm2-fast](https://github.com/bwa-mem2/mm2-fast). This is likely not portable.
* `htslib` - Support output of bam/sam files using htslib.
* `simde` - Compile minimap2 / mm2-fast with [simd-everywhere](https://github.com/simd-everywhere/simde) support.
* `map-file` - *Default* - Convenience function for mapping an entire file. Caution, this is single-threaded.
* `sse2only` - Compiles for SSE2 support only (Default is to try to compile for SSE4.1, SSE2 only is default on aarch64)
* map-file - Enables the ability to map a file directly to a reference. Enabled by deafult
* htslib - Provides an interface to minimap2 that returns rust_htslib::Records
* simde - Enables SIMD Everywhere library in minimap2
* zlib-ng - Enables the use of zlib-ng for faster compression
* curl - Enables curl for htslib
* static - Builds minimap2 as a static library
* sse2only - Builds minimap2 with only SSE2 support

Map-file is a *default* feature and enabled unless otherwise specified.

## Missing Features
* setting mismatch penalty for base transitions [minimap 2.27 release notes](https://github.com/lh3/minimap2/releases/tag/v2.27)
* Generate ds tags to indicate uncertainty in indels

Potentially more, but I'm using this to keep track. I'd expect those would get implemented over time, but if you have urgent need open a pull request or an issue! Thanks
Potentially others. Please create an issue!

## Building for MUSL
Follow these [instructions](https://github.com/rust-cross/rust-musl-cross#prebuilt-images).
Expand All @@ -141,10 +147,9 @@ alias rust-musl-builder='docker run --rm -it -v "$(pwd)":/home/rust/src messense
rust-musl-builder cargo build --release
```

Please note minimap2 is only tested for x86_64. Other platforms may work, please open an issue if minimap2 compiles but minimap2-rs does not.
Minimap2 is tested on x86_64 and aarch64 (arm64). Other platforms may work, please open an issue if minimap2 compiles but minimap2-rs does not.

### Features tested with MUSL
* `mm2-fast` - **Fail**
* `htslib` - **Success**
* `simde` - **Success**

Expand All @@ -153,23 +158,19 @@ Please note minimap2 is only tested for x86_64. Other platforms may work, please
- [mappy-rs](https://github.com/Adoni5/mappy-rs) - Drop-in multi-threaded replacement for python's mappy
- [HiFiHLA](https://github.com/PacificBiosciences/hifihla) - HLA star-calling tool for PacBio HiFi data
- [STRdust](https://github.com/wdecoster/STRdust) - Tandem repeat genotyper for long reads

# Want feedback
* Many fields are i32 / i8 to mimic the C environment, but would it make more sense to convert to u32 / u8 / usize?
* Let me know pain points!
- [oarfish](https://github.com/COMBINE-lab/oarfish) - transcript quantification from long-read RNA-seq data

# Next things todo
* Print other tags so we can have an entire PAF format
* -sys Compile with SSE2 / SSE4.1 (auto-detect, but also make with features)
* Multi-thread guide (tokio async threads or use crossbeam queue and traditional threads?)
* Iterator interface for map_file
* MORE TESTS
* -sys Get SSE working with "sse" feature (compiles and tests work in -sys crate, but not main crate)
* -sys Possible to decouple from pthread?
* -sys Enable Lisa-hash for mm2-fast? But must handle build arguments from the command-line.

# Citation
You should cite the minimap2 papers if you use this in your work.
Please cite the appropriate minimap2 papers if you use this in your work, as well as this library.

## DOI for this library

## Minimap2 Papers

> Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences.
> *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi]
Expand All @@ -180,6 +181,29 @@ and/or:
> *Bioinformatics*, **37**:4572-4574. [doi:10.1093/bioinformatics/btab705][doi2]
# Changelog
### 0.1.21 minimap2 2.28
Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB
#### Breaking Changes
+ Map now returns Arc String's to reduce memory allocation for large and/or repetitive jobs
+ map now takes an additional argument, query_name: Option<impl AsRef<[u8]>>, possibly solves [#75](https://github.com/jguhlin/minimap2-rs/issues/75) (@rob-p @mbhall88 @jguhlin)
+ Arc the Index, to prevent double-frees, solves [#71](https://github.com/jguhlin/minimap2-rs/issues/71)
+ Map file now passes in query name, which should help with [#75](https://github.com/jguhlin/minimap2-rs/issues/75)
+ Supplementary flag now better detected (@rob-p)
+ FIX: Cigar string missing softclip operation (@Sam-Sims)

### Other Changes
+ Add ergonomic functions n_seq and get_seq.
+ Better docs on applying presets, solves [#84](https://github.com/jguhlin/minimap2-rs/issues/84)
+ Better detection of target arch c_char's and ptr's, solves [#82](https://github.com/jguhlin/minimap2-rs/issues/82)
+ Support for M1 Mac compilation and addition of github workflows to test it, solving [#81](https://github.com/jguhlin/minimap2-rs/issues/81)
+ Rayon test, so some support, closes [#5](https://github.com/jguhlin/minimap2-rs/issues/5)
+ Static str's and now static CStr's
+ FIX: memory leak due to sequences allocated by minimap2 not being freed @charlesgregory
+ Add Send + Sync to Aligner, along with unit test @PB-DB
+ Only use rust-htslib/curl when curl feature is enabled @PB-DB
+ Mark bam::Record objects as supplementary @PB-DB
+ Experimental Android support (tested on aarch64 and x86_64), solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66)

### 0.1.20 minimap2 2.28
+ Fix htslib errors. No update to -sys crate needed.

Expand Down
2 changes: 1 addition & 1 deletion minimap2-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "minimap2-sys"
version = "0.1.19+minimap2.2.28"
version = "0.1.20+minimap2.2.28"
edition = "2021"
links = "libminimap2"
authors = ["Joseph Guhlin <joseph.guhlin@gmail.com>"]
Expand Down
13 changes: 10 additions & 3 deletions minimap2-sys/README.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
# System Bindings for libminimap2
Use this if you need lower-level bindings for minimap2. Also works with mm2-fast.
Use this if you need lower-level bindings for minimap2.

# Minimap2 Version
Sync'd to minimap2 2.27
Minimap2 2.28

## Breaking Changes
### 0.0.18
mm2-fast and minimap2 have diverged. At this point mm2-fast is no longer supported. Please use a previous crate version.

## Features
* vendored - Regenerate the bindings from the vendored minimap2 source. Requires llvm installed. Useful to update the bindings to a different version of minimap2.
* mm2-fast - Use [mm2-fast](https://github.com/bwa-mem2/mm2-fast) as the backend rather than minimap2 itself.
* simde - Enable simde support (SIMD-everywhere)
* sse - Enable some sse bindings
* zlib-ng - Use zlib-ng
* static - Static compilation

## TODO
* Can we decouple from pthread? This would allow Windows and (possibly) WASM compilation.

## Changelog
### 0.1.20 minimap2.2.28
* Move Drop impl to -sys crate

### 0.1.19 minimap2.2.28
* Update to minimap2 2.28

### 0.1.18 minimap2.2.27
* Regenerated bindings
* mm2-fast and minimap2 have diverged
Expand Down
2 changes: 1 addition & 1 deletion minimap2-sys/src/bindings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4686,7 +4686,7 @@ fn bindgen_test_layout_mm_idx_seq_t() {
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
#[derive(Debug, Clone)]
pub struct mm_idx_t {
pub b: i32,
pub w: i32,
Expand Down
7 changes: 7 additions & 0 deletions minimap2-sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ unsafe impl Send for mm_idx_t {}
unsafe impl Send for mm_idx_reader_t {}
unsafe impl Send for mm_mapopt_t {}

impl Drop for mm_idx_t {
fn drop(&mut self) {
unsafe { mm_idx_destroy(self) };
}
}


use std::mem::MaybeUninit;

impl Default for mm_mapopt_t {
Expand Down
16 changes: 9 additions & 7 deletions src/htslib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ use rust_htslib::bam::{Header, HeaderView, Record};
use std::ffi::{CStr, CString};
use std::mem::MaybeUninit;
use std::ptr;
use std::sync::Arc;

/// A wrapper around mm_bseq1_t
#[derive(Debug)]
Expand Down Expand Up @@ -164,7 +165,7 @@ impl Aligner {

let mm_reg = MaybeUninit::new(unsafe {
mm_ffi::mm_map(
self.idx.unwrap() as *const mm_ffi::mm_idx_t,
&**self.idx.as_ref().unwrap().as_ref() as *const mm_ffi::mm_idx_t,
query.inner.l_seq,
query.inner.seq as *const libc::c_char,
&mut n_regs,
Expand All @@ -191,7 +192,7 @@ impl Aligner {
// TODO: use mm_write_sam3 t do the writing so that we can pass the map_opt flags
mm_ffi::mm_write_sam(
result.as_mut_ptr(),
self.idx.unwrap() as *const mm_ffi::mm_idx_t,
&**self.idx.as_ref().unwrap().as_ref() as *const mm_ffi::mm_idx_t,
&query.inner as *const mm_ffi::mm_bseq1_t,
const_ptr,
n_regs,
Expand Down Expand Up @@ -304,19 +305,20 @@ pub struct SeqMetaData {

#[derive(Debug)]
pub struct MMIndex {
pub inner: mm_ffi::mm_idx_t,
pub inner: Arc<*mut mm_ffi::mm_idx_t>,
}

impl MMIndex {
pub fn n_seq(&self) -> u32 {
self.inner.n_seq
unsafe { (**self.inner).n_seq }
}

pub fn seqs(&self) -> Vec<SeqMetaData> {
let mut seqs: Vec<SeqMetaData> = Vec::with_capacity(self.n_seq() as usize);
for i in 0..self.n_seq() {
let _seq = unsafe { *(self.inner.seq).offset(i as isize) };
let c_str = unsafe { ffi::CStr::from_ptr(_seq.name) };
let _seq = unsafe { *(**self.inner).seq.offset(i as isize) };
// let c_str = unsafe { ffi::CStr::from_ptr(_seq.name) };
let c_str = unsafe { CStr::from_ptr(_seq.name) };
let rust_str = c_str.to_str().unwrap().to_string();
seqs.push(SeqMetaData {
name: rust_str,
Expand Down Expand Up @@ -344,7 +346,7 @@ impl From<&Aligner> for MMIndex {
fn from(aligner: &Aligner) -> Self {
MMIndex {
// inner: aligner.idx.unwrap(),
inner: unsafe { **aligner.idx.as_ref().unwrap() },
inner: std::sync::Arc::clone(&aligner.idx.as_ref().unwrap())
}
}
}
Expand Down
Loading

0 comments on commit 094fd09

Please sign in to comment.