-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added general open identified peptides file function
- Loading branch information
1 parent
ee05d0e
commit 3e8c393
Showing
13 changed files
with
153 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,6 +116,7 @@ | |
"Psicose", | ||
"Psimod", | ||
"psms", | ||
"psmtsv", | ||
"pyclass", | ||
"pymethods", | ||
"pymodule", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[package] | ||
name = "de-novo-align" | ||
version = "0.1.0" | ||
publish = false | ||
edition.workspace = true | ||
|
||
[dependencies] | ||
rustyms = { path = "../../rustyms" } | ||
clap = { workspace = true, features = ["derive", "cargo"] } | ||
itertools = { workspace = true } | ||
rayon = { workspace = true } | ||
serde_json = { workspace = true } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
use std::{ | ||
fs::File, | ||
io::{BufReader, BufWriter}, | ||
}; | ||
|
||
use clap::Parser; | ||
use fragment::FragmentType; | ||
use identification::{open_identified_peptides_file, FastaData}; | ||
use itertools::Itertools; | ||
use rayon::prelude::*; | ||
use rustyms::{ | ||
spectrum::{Score, Scores}, | ||
system::{e, usize::Charge}, | ||
*, | ||
}; | ||
use spectrum::PeakSpectrum; | ||
use std::collections::HashMap; | ||
|
||
#[derive(Parser)] | ||
struct Cli { | ||
/// The input identified peptides file | ||
#[arg(short, long)] | ||
peptides: String, | ||
/// The fasta database of known proteins | ||
#[arg(short, long)] | ||
database: String, | ||
/// Where to store the results | ||
#[arg(long)] | ||
out_path: String, | ||
} | ||
|
||
fn main() { | ||
let args = Cli::parse(); | ||
let peptides = open_identified_peptides_file(args.peptides, None).unwrap(); | ||
let database = FastaData::parse_file(args.database).unwrap(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
use std::path::Path; | ||
|
||
use super::{ | ||
error::{Context, CustomError}, | ||
ontologies::CustomDatabase, | ||
FastaData, IdentifiedPeptide, IdentifiedPeptideIter, IdentifiedPeptideSource, MSFraggerData, | ||
MaxQuantData, NovorData, OpairData, PeaksData, SageData, | ||
}; | ||
|
||
/// Open the selected path and automatically determine the file type. | ||
/// # Errors | ||
/// It errors if the file type could not be determined or if opening the file errors. | ||
pub fn open_identified_peptides_file<'a>( | ||
path: impl AsRef<Path>, | ||
custom_database: Option<&'a CustomDatabase>, | ||
) -> Result<Box<dyn Iterator<Item = Result<IdentifiedPeptide, CustomError>> + 'a>, CustomError> { | ||
let path = path.as_ref(); | ||
let actual_extension = path | ||
.extension() | ||
.map(|ex| { | ||
(ex == "gz") | ||
.then_some(path) | ||
.and_then(|p| p.file_stem()) | ||
.and_then(|p| Path::new(p).extension()) | ||
.unwrap_or(ex) | ||
}) | ||
.map(|ex| ex.to_string_lossy().to_lowercase()); | ||
match actual_extension.as_deref() { | ||
Some("csv") => PeaksData::parse_file(path, custom_database) | ||
.map(IdentifiedPeptideIter::into_box) | ||
.or_else(|_| { | ||
NovorData::parse_file(path, custom_database).map(IdentifiedPeptideIter::into_box) | ||
}) | ||
.map_err(|_| { | ||
CustomError::error( | ||
"Unknown file", | ||
"Could not be recognised as either a Peaks or Novor file", | ||
Context::show(path.to_string_lossy()), | ||
) | ||
}), | ||
Some("tsv") => MSFraggerData::parse_file(path, custom_database) | ||
.map(IdentifiedPeptideIter::into_box) | ||
.or_else(|_| { | ||
SageData::parse_file(path, custom_database).map(IdentifiedPeptideIter::into_box) | ||
}) | ||
.map_err(|_| { | ||
CustomError::error( | ||
"Unknown file", | ||
"Could not be recognised as either a MSFragger or Sage file", | ||
Context::show(path.to_string_lossy()), | ||
) | ||
}), | ||
Some("psmtsv") => { | ||
OpairData::parse_file(path, custom_database).map(IdentifiedPeptideIter::into_box) | ||
} | ||
Some("fasta") => FastaData::parse_file(path).map(|peptides| { | ||
Box::new(peptides.into_iter().map(|p| Ok(p.into()))) | ||
as Box<dyn Iterator<Item = Result<IdentifiedPeptide, CustomError>> + 'a> | ||
}), | ||
Some("txt") => { | ||
MaxQuantData::parse_file(path, custom_database).map(IdentifiedPeptideIter::into_box) | ||
} | ||
_ => Err(CustomError::error( | ||
"Unknown extension", | ||
"Use CSV, TSV, TXT, PSMTSV, or Fasta, or any of these as a gzipped file (eg csv.gz).", | ||
Context::show(path.to_string_lossy()), | ||
)), | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters