Skip to content

Commit

Permalink
Merge pull request #1 from david-bouyssie/main
Browse files Browse the repository at this point in the history
Initial commit based on preliminary work
  • Loading branch information
david-bouyssie authored Nov 8, 2023
2 parents 9273d08 + 1fc22d1 commit 98fe5a3
Show file tree
Hide file tree
Showing 20 changed files with 2,454 additions and 13 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
path: mzcore-py/dist

windows:
runs-on: windows-latest
Expand All @@ -37,7 +37,7 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
path: mzcore-py/dist

macos:
runs-on: macos-latest
Expand All @@ -47,12 +47,12 @@ jobs:
with:
command: build
working-directory: mzcore-py
args: -o dist --strip --find-interpreter
args: -o dist --strip --find-interpreter --target universal2-apple-darwin
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
name: wheels
path: dist
path: mzcore-py/dist

# release:
# name: Release
Expand Down
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[workspace]

members = [
"mzcore-rs",
"mzcore-py"
]
5 changes: 3 additions & 2 deletions mzcore-py/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
name = "mzcore-py"
version = "0.1.0"
publish = false
edition = "2021"

[lib]
name = "mzcore_py"
crate-type = ["cdylib"]

[dependencies]
anyhow = "1.0.68"
pyo3 = { version = "0.17.3", features = ["extension-module", "anyhow"] }
anyhow = "1.0.75"
pyo3 = { version = "0.20.0", features = ["extension-module", "anyhow"] }
mzcore = { path = "../mzcore-rs" }
4 changes: 2 additions & 2 deletions mzcore-py/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "Core utilities for mass spectrometry."
readme = "README.md"
keywords = ["mass spectrometry", "proteomics"]
authors = []
license = {file = "LICENSE"}
license = {file = "../LICENSE"}
dependencies = []
dynamic = ["version"]
classifiers = [
Expand Down Expand Up @@ -35,5 +35,5 @@ doc = [
]

[build-system]
requires = ["maturin>=0.14,<0.15"]
requires = ["maturin>=1.0.0,<1.3.0"]
build-backend = "maturin"
7 changes: 7 additions & 0 deletions mzcore-py/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
use pyo3::prelude::*;

/// Python bindings for mzcore
#[pymodule]
fn mzcore_py(py: Python, m: &PyModule) -> PyResult<()> {
Ok(())
}
13 changes: 8 additions & 5 deletions mzcore-rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "mzcore"
version = "0.1.0"
edition = "2023"
edition = "2021"
homepage = "https://github.com/rusteomics"
repository = "https://github.com/rusteomics/mzcore/"
license = "Apache-2.0"
Expand All @@ -11,10 +11,13 @@ categories = ["science"]
publish = false

[lib]
name = "mzcore_py"
crate-type = ["cdylib"]
name = "mzcore"
#crate-type = ["cdylib"]

[dependencies]
anyhow = "1.0.68"
fallible-iterator = "0.2.0"
anyhow = "1.0.75"
fallible-iterator = "0.3.0"
fast-float = "0.2.0"
itertools = "0.11.0"
lazy_static = "1.4.0"
serde = { version = "*", features = ["derive"] }
141 changes: 141 additions & 0 deletions mzcore-rs/src/chemistry/composition.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
///
/// Some parts of this file originates from [Sage](https://github.com/lazear/sage/blob/master/crates/sage/src/mass.rs)
/// Copyright (c) 2022 Michael Lazear
/// SPDX-License-Identifier: MIT
///
///
use anyhow::*;
use std::collections::HashMap;

pub fn parse_aa_composition(sequence: &str) -> Result<HashMap<char, f32>> {

let seq_chars: Vec<char> = sequence.chars().filter(|c| !c.is_whitespace()).collect();
let seq_len = seq_chars.len();

// Count the AA occurrences
let mut aa_count_by_char: HashMap<char, i32> = HashMap::new();

let mut i = 0;
while i < seq_len {
let aa = seq_chars[i];
let counter = aa_count_by_char.entry(aa).or_insert(0);
*counter += 1;
i += 1
}

let abundance_map: HashMap<char, f32> = aa_count_by_char.into_iter().map(|e| (e.0,e.1 as f32) ).collect();

// Build the abundanceMap
/*let mut abundance_map: HashMap<char, f32> = HashMap::new();
for (aaCharAsInt,aaCount) in aa_count_by_char {
let aaOpt = aaByCode1.get(aaCharAsInt)
require(aaOpt.isDefined, s"amino acid ${aaCharAsInt.toChar} is missing in provided aaTable")
abundance_map.insert(aaOpt.unwrap(), aaCount)
}*/

Ok(abundance_map)
}

/*pub fn parse_atom_composition(formula: &str, atom_table: AtomTable) -> Result<HashMap<char, f32>> {
let atom_by_symbol = atom_table.atom_by_symbol;*/

pub fn parse_atom_composition(formula: &str) -> Result<HashMap<String, f32>> {

let mut abundance_map: HashMap<String, f32> = HashMap::new();

let formula_elements = formula.split(" ");

for elem in formula_elements {

let (atom_symbol, abundance) = if elem.contains('(') == false { (elem,1) }
else {
let elem_ab_parts: Vec<&str> = elem.split('(').collect();
let elem_symbol = *elem_ab_parts.first().ok_or_else(|| anyhow!("no element symbol"))?;
let elem_quant_str = *elem_ab_parts.last().ok_or_else(|| anyhow!("no element abundance"))?;
let elem_quant: i32 = elem_quant_str.replace(')', "").parse()?;

(elem_symbol, elem_quant)
};

/*if atom_symbol.len() != 1 {
bail!("Invalid atom symbol '{}'",atom_symbol);
}*/

//let atom_opt = atom_by_symbol.get(atom_symbol);
//let atom = atom_opt.ok_or_else(|| anyhow!("atom symbol {} is missing in provided atom_table",atom_symbol))?;
//let atom = atom_symbol.chars().next().unwrap();

abundance_map.insert(atom_symbol.to_string(), abundance as f32);
}

Ok(abundance_map)
}

pub fn sum_atom_compositions(atom_comp1: HashMap<String, f32>, atom_comp2: HashMap<String, f32>) -> HashMap<String, f32> {
let mut new_atom_comp = atom_comp1.clone();
for (atom,ab) in atom_comp2 {
*new_atom_comp.entry(atom).or_insert(0.0) += ab;
}

new_atom_comp
}


/*
// --- Sage definitions --- //
use std::iter::Sum;
pub const fn composition(aa: u8) -> Composition {
match aa {
b'A' => Composition::new(3, 2, 0),
b'R' => Composition::new(6, 2, 0),
b'N' => Composition::new(4, 3, 0),
b'D' => Composition::new(4, 4, 0),
b'C' => Composition::new(3, 2, 1),
b'E' => Composition::new(5, 4, 0),
b'Q' => Composition::new(5, 3, 0),
b'G' => Composition::new(2, 2, 0),
b'H' => Composition::new(6, 2, 0),
b'I' => Composition::new(6, 2, 0),
b'L' => Composition::new(6, 2, 0),
b'K' => Composition::new(6, 2, 0),
b'M' => Composition::new(5, 2, 1),
b'F' => Composition::new(9, 2, 0),
b'P' => Composition::new(5, 2, 0),
b'S' => Composition::new(3, 3, 0),
b'T' => Composition::new(4, 3, 0),
b'W' => Composition::new(11, 2, 0),
b'Y' => Composition::new(9, 3, 0),
b'V' => Composition::new(5, 2, 0),
b'U' => Composition::new(3, 2, 0),
b'O' => Composition::new(12, 3, 0),
_ => Composition::new(0, 0, 0),
}
}
pub struct Composition {
pub carbon: u16,
pub sulfur: u16,
}
impl Composition {
pub const fn new(carbon: u16, _oxygen: u8, sulfur: u16) -> Self {
Self { carbon, sulfur }
}
}
impl Sum for Composition {
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
let mut comp = Composition::new(0, 0, 0);
for i in iter {
comp.carbon += i.carbon;
// comp.oxygen += i.oxygen;
comp.sulfur += i.sulfur;
}
comp
}
}*/
90 changes: 90 additions & 0 deletions mzcore-rs/src/chemistry/constants.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#[allow(dead_code)]

// Source: http://pdg.lbl.gov/2012/reviews/rpp2012-rev-phys-constants.pdf
pub const AVERAGE_AA_MASS: f64 = 111.1254; // TODO: marco => why difference with 111.10523866044295 by computation
// TODO: refine this value and put a source reference here (publication ?)
pub const AVERAGE_PEPTIDE_ISOTOPE_MASS_DIFF: f64 = 1.0027;

pub const ELECTRON_MASS: f64 = 0.00054857990946; // Source: NIST 2010 CODATA
pub const PROTON_MASS: f64 = 1.007276466812; // Source: NIST 2010 CODATA

pub const CO_MONO_MASS: f64 = 27.99491461956;
pub const CO2_MONO_MASS: f64 = 0.0; // FIXME
pub const H2O_MONO_MASS: f64 = 18.010565;
pub const NH3_MONO_MASS: f64 = 17.02654910101;

pub const WATER_MONO_MASS: f64 = 18.010565;
pub const WATER_AVERAGE_MASS: f64 = 18.01525697318;

pub mod aa {
pub const A: char = 'A';
pub const B: char = 'B';
pub const C: char = 'C';
pub const D: char = 'D';
pub const E: char = 'E';
pub const F: char = 'F';
pub const G: char = 'G';
pub const H: char = 'H';
pub const J: char = 'J';
pub const I: char = 'I';
pub const K: char = 'K';
pub const L: char = 'L';
pub const M: char = 'M';
pub const N: char = 'N';
pub const O: char = 'O';
pub const P: char = 'P';
pub const Q: char = 'Q';
pub const R: char = 'R';
pub const S: char = 'S';
pub const T: char = 'T';
pub const U: char = 'U';
pub const V: char = 'V';
pub const W: char = 'W';
pub const X: char = 'X';
pub const Y: char = 'Y';
pub const Z: char = 'Z';
}

// --- Sage definition --- //
pub const VALID_AA: [u8; 22] = [
b'A', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'K', b'L', b'M', b'N', b'P', b'Q', b'R', b'S',
b'T', b'V', b'W', b'Y', b'U', b'O',
];

pub mod atom {
pub const C: &'static str = "C";
pub const H: &'static str = "H";
pub const O: &'static str = "O";
pub const N: &'static str = "N";
pub const P: &'static str = "P";
pub const S: &'static str = "S";
}

// --- Sage definition --- //
pub const MONOISOTOPIC_MASSES: [f32; 26] = [
71.03711, 0.0, 103.00919, 115.02694, 129.04259, 147.0684, 57.02146, 137.05891, 113.08406, 0.0,
128.09496, 113.08406, 131.0405, 114.04293, 237.14774, 97.05276, 128.05858, 156.1011, 87.03203,
101.04768, 150.95363, 99.06841, 186.07932, 0.0, 163.06332, 0.0,
];

pub const fn monoisotopic(aa: u8) -> f32 {
if aa.is_ascii_uppercase() {
MONOISOTOPIC_MASSES[(aa - b'A') as usize]
} else {
0.0
}
}

#[cfg(test)]
mod test {

use super::{monoisotopic, VALID_AA};

#[test]
fn valid_aa() {
for ch in VALID_AA {
assert!(monoisotopic(ch) > 0.0);
}
}

}
4 changes: 4 additions & 0 deletions mzcore-rs/src/chemistry/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod composition;
pub mod constants;
pub mod model;
pub mod table;
Loading

0 comments on commit 98fe5a3

Please sign in to comment.