Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multi-omics and better CLI messages #9

Merged
merged 4 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ interest.txt
latest.gmt
ref.txt
target_symbols.txt
/*.gmt
/*.rnk
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![Rust](https://github.com/bzhanglab/webgestalt_rust/actions/workflows/rust.yml/badge.svg?branch=master)](https://github.com/bzhanglab/webgestalt_rust/actions/workflows/rust.yml)

Rust implementation of [WebGestaltR](https://github.com/bzhanglab/webgestaltr).
Rust implementation of [WebGestaltR](https://github.com/bzhanglab/webgestaltr).

## Install

Expand Down
119 changes: 89 additions & 30 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ use owo_colors::{OwoColorize, Stream::Stdout, Style};
use std::io::{BufReader, Write};
use std::{fs::File, time::Instant};
use webgestalt_lib::methods::gsea::GSEAConfig;
use webgestalt_lib::methods::multiomics::{combine_gmts, MultiOmicsMethod, NormalizationMethod};
use webgestalt_lib::methods::ora::ORAConfig;
use webgestalt_lib::readers::read_rank_file;
use webgestalt_lib::readers::utils::Item;
use webgestalt_lib::readers::{read_gmt_file, read_rank_file};
use webgestalt_lib::{MalformedError, WebGestaltError};

/// WebGestalt CLI.
/// ORA and GSEA enrichment tool.
Expand Down Expand Up @@ -85,6 +88,7 @@ struct CombineGmtArgs {
/// Paths to the files to combine
files: Vec<String>,
}

#[derive(ValueEnum, Clone)]
enum NormMethods {
MedianRank,
Expand All @@ -93,8 +97,15 @@ enum NormMethods {
None,
}

#[derive(ValueEnum, Clone)]
enum CombinationMethods {
Max,
Mean,
}

#[derive(Args)]
struct CombineListArgs {
combination: Option<CombinationMethods>,
normalization: Option<NormMethods>,
out: Option<String>,
files: Vec<String>,
Expand Down Expand Up @@ -211,44 +222,78 @@ fn main() {
res.len()
);
}
Some(Commands::Test) => {
let list1 = read_rank_file("gene.rnk".to_string()).unwrap();
let list2 = read_rank_file("protein.rnk".to_string()).unwrap();
let list3 = read_rank_file("metabolite.rnk".to_string()).unwrap();
let lists = vec![list1, list2, list3];
// let gmt1 = webgestalt_lib::readers::read_gmt_file("gene.gmt".to_string()).unwrap();
// let gmt2 =
// webgestalt_lib::readers::read_gmt_file("metabolite.gmt".to_string()).unwrap();
// let combined_gmt = webgestalt_lib::methods::multiomics::combine_gmts(&vec![gmt1, gmt2]);
// let mut file = File::create("combined.gmt").unwrap();
// for row in combined_gmt {
// writeln!(file, "{}\t{}\t{}", row.id, row.url, row.parts.join("\t")).unwrap();
// }
let mut combined_list = webgestalt_lib::methods::multiomics::combine_lists(
lists,
webgestalt_lib::methods::multiomics::MultiOmicsMethod::Mean,
webgestalt_lib::methods::multiomics::NormalizationMethod::MeanValue,
);
combined_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap());
let mut file = File::create("combined.rnk").unwrap();
for row in combined_list {
writeln!(file, "{}\t{}", row.analyte, row.rank).unwrap();
}
}
Some(Commands::Test) => will_err(1).unwrap_or_else(|x| println!("{}", x)),
Some(Commands::Combine(args)) => match &args.combine_type {
Some(CombineType::Gmt(files)) => {}
Some(CombineType::List(files)) => {
Some(CombineType::Gmt(gmt_args)) => {
let style = Style::new().blue().bold();
println!(
"{}: READING GMTS",
"INFO".if_supports_color(Stdout, |text| text.style(style))
);
let mut gmts: Vec<Vec<Item>> = Vec::new();
let mut tot_length: usize = 0;
for path in gmt_args.files.clone() {
let gmt = read_gmt_file(path).unwrap();
tot_length += gmt.len();
gmts.push(gmt);
}
let combined_gmt = combine_gmts(&gmts);
println!(
"Found {} overlapping sets out of {}",
tot_length - combined_gmt.len(),
combined_gmt.len()
);
println!(
"{}: CREATING COMBINED GMT AT {}",
"INFO".if_supports_color(Stdout, |text| text.style(style)),
gmt_args.out.clone().unwrap()
);
let mut file = File::create(gmt_args.out.clone().unwrap()).unwrap();
for row in combined_gmt {
writeln!(file, "{}\t{}\t{}", row.id, row.url, row.parts.join("\t")).unwrap();
}
}
Some(CombineType::List(ora_args)) => {
let style = Style::new().blue().bold();
println!(
"{}: READING LISTS",
"INFO".if_supports_color(Stdout, |text| text.style(style))
);
let mut lists = Vec::new();
for file in files.files.iter() {
for file in ora_args.files.iter() {
lists.push(read_rank_file(file.clone()).unwrap());
}
let norm_method: NormalizationMethod = match ora_args.normalization {
Some(NormMethods::None) => NormalizationMethod::None,
Some(NormMethods::MeanValue) => NormalizationMethod::MeanValue,
Some(NormMethods::MedianRank) => NormalizationMethod::MedianRank,
Some(NormMethods::MedianValue) => NormalizationMethod::MedianValue,
None => panic!("No normalization method chosen."),
};
let method: MultiOmicsMethod = match ora_args.combination {
Some(CombinationMethods::Mean) => MultiOmicsMethod::Mean(norm_method),
Some(CombinationMethods::Max) => MultiOmicsMethod::Max(norm_method),
None => panic!("No combination method chosen."),
};
let mut combined_list =
webgestalt_lib::methods::multiomics::combine_lists(lists, method);
combined_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap());
let mut file = File::create(ora_args.out.clone().unwrap()).unwrap();
println!(
"{}: CREATING COMBINED LIST AT {}",
"INFO".if_supports_color(Stdout, |text| text.style(style)),
ora_args.out.clone().unwrap()
);
for row in combined_list {
writeln!(file, "{}\t{}", row.analyte, row.rank).unwrap();
}
}
_ => {
panic!("Please select a valid combine type");
println!("Please select a valid combine type");
}
},
_ => {
todo!("Please select a valid command. Run --help for options.")
println!("Please select a valid command. Run --help for options.")
}
}
}
Expand Down Expand Up @@ -288,3 +333,17 @@ fn benchmark() {
let mut ftsv = File::create("format_benchmarks.tsv").unwrap();
writeln!(ftsv, "{}", whole_file.join("\n")).unwrap();
}

fn will_err(x: i32) -> Result<(), WebGestaltError> {
if x == 0 {
Ok(())
} else {
Err(WebGestaltError::MalformedFile(MalformedError {
path: String::from("ExamplePath.txt"),
kind: webgestalt_lib::MalformedErrorType::WrongFormat {
found: String::from("GMT"),
expected: String::from("rank"),
},
}))
}
}
68 changes: 60 additions & 8 deletions webgestalt_lib/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,75 @@
use std::{error::Error, fmt};

pub mod methods;
pub mod readers;
pub mod stat;
pub enum Error {

trait CustomError {
fn msg(&self) -> String;
}

#[derive(Debug)]
pub enum WebGestaltError {
MalformedFile(MalformedError),
StatisticsError(StatisticsError),
IOError(std::io::Error),
}

pub enum MalformedError {
NoColumnsFound,
WrongFormat,
impl Error for WebGestaltError {}

impl fmt::Display for WebGestaltError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let msg: String = match &self {
WebGestaltError::MalformedFile(x) => x.msg(),
WebGestaltError::StatisticsError(x) => x.msg(),
WebGestaltError::IOError(x) => x.to_string(),
};
write!(f, "{}", msg)
}
}

#[derive(Debug)]
pub struct MalformedError {
pub path: String,
pub kind: MalformedErrorType,
}

#[derive(Debug)]
pub enum MalformedErrorType {
NoColumnsFound { delimeter: String },
WrongFormat { found: String, expected: String },
Unknown,
}

impl CustomError for MalformedError {
fn msg(&self) -> String {
let error_msg = match &self.kind {
MalformedErrorType::WrongFormat { found, expected } => format!(
"Wrong Format Found. Found: {}; Expected: {}",
found, expected
),
MalformedErrorType::Unknown => String::from("Unknown error type."),
MalformedErrorType::NoColumnsFound { delimeter } => format!(
"No column found with delimeter {}",
if delimeter == "\t" { "\\t" } else { delimeter }
),
};
format!("Error in {}: {}.", self.path, error_msg)
}
}

#[derive(Debug)]
pub enum StatisticsError {
FoundNANValue,
InvalidValue,
InvalidValue { value: f64 },
}

#[cfg(test)]
mod tests {
use super::*;
impl CustomError for StatisticsError {
fn msg(&self) -> String {
let error_msg = match &self {
StatisticsError::FoundNANValue => String::from("Found a NAN value"),
StatisticsError::InvalidValue { value } => format!("Found invalid value: {}", value),
};
format!("Statstical Error: {}.", error_msg)
}
}
35 changes: 33 additions & 2 deletions webgestalt_lib/src/methods/gsea.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use rayon::prelude::*;
use std::sync::{Arc, Mutex};

/// Parameters for GSEA
#[derive(Clone)]
pub struct GSEAConfig {
/// Power to raise each rank during the enrichment scoring
pub p: f64,
Expand Down Expand Up @@ -62,12 +63,19 @@ impl GSEAResult {

#[derive(Clone)]
pub struct FullGSEAResult {
/// The set name
pub set: String,
/// The statistical p-value
pub p: f64,
/// The FDR value
pub fdr: f64,
/// The enrichment score
pub es: f64,
/// The normalized enrichment score
pub nes: f64,
/// Leading edge count
pub leading_edge: i32,
/// Running sum vector
pub running_sum: Vec<f64>,
}

Expand Down Expand Up @@ -276,6 +284,10 @@ fn enrichment_score(
///
/// - `analyte_list` - [`Vec<RankListItem>`] of the rank list
/// - `gmt` - [`Vec<Item>`] of gmt file
///
/// # Returns
///
/// Returns a [`Vec<FullGSEAResult>`] of the GSEA results
pub fn gsea(
mut analyte_list: Vec<RankListItem>,
gmt: Vec<Item>,
Expand All @@ -286,7 +298,7 @@ pub fn gsea(
analyte_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap()); // sort list
let (analytes, ranks) = RankListItem::to_vecs(analyte_list.clone()); // seperate into vectors
let permutations: Vec<Vec<usize>> =
provided_permutations.unwrap_or(make_permuations(config.permutations, analytes.len()));
provided_permutations.unwrap_or(make_permutations(config.permutations, analytes.len()));
let all_nes = Arc::new(Mutex::new(Vec::new()));
let set_nes = Arc::new(Mutex::new(Vec::new()));
let all_res = Arc::new(Mutex::new(Vec::new()));
Expand Down Expand Up @@ -359,7 +371,26 @@ pub fn gsea(
final_gsea
}

pub fn make_permuations(permutations: i32, max: usize) -> Vec<Vec<usize>> {
/// Create index permutations for GSEA
///
/// # Parameters
///
/// - `permutations` - Number of permutations to create
/// - `max` - Maximum index to permute
///
/// # Returns
///
/// Returns a [`Vec<Vec<usize>>`] of the permutations
///
/// # Examples
///
/// ```
/// use webgestalt_lib::methods::gsea::make_permutations;
/// let permutations = make_permutations(10, 100);
/// assert_eq!(permutations.len(), 10);
/// assert_eq!(permutations[0].len(), 100);
/// ```
pub fn make_permutations(permutations: i32, max: usize) -> Vec<Vec<usize>> {
let mut temp_permutations: Vec<Vec<usize>> = Vec::new();
let mut smallrng = rand::rngs::SmallRng::from_entropy();
(0..permutations).for_each(|_i| {
Expand Down
Loading