Skip to content

Commit

Permalink
Merge pull request #7 from bzhanglab/dev
Browse files Browse the repository at this point in the history
Make FDR optional and add combination methods
  • Loading branch information
iblacksand authored Oct 25, 2023
2 parents 7e4a088 + a066ba1 commit 0a61279
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 17 deletions.
88 changes: 84 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
use std::fmt::format;
use std::io::{BufReader, Write};
use std::str::FromStr;
use std::{fs::File, time::Instant};

use bincode::deserialize_from;

use clap::Subcommand;
use clap::{Args, Parser};
use clap::{Subcommand, ValueEnum};
use owo_colors::{OwoColorize, Stream::Stdout, Style};
use webgestalt_lib::methods::gsea::GSEAConfig;
use webgestalt_lib::methods::multiomics::NormalizationMethod;
use webgestalt_lib::methods::ora::ORAConfig;
use webgestalt_lib::readers::read_rank_file;

/// WebGestalt CLI.
/// ORA and GSEA enrichment tool.
Expand All @@ -30,6 +33,10 @@ enum Commands {
Gsea(GseaArgs),
/// Run ORA using the provided files
Ora(ORAArgs),
/// Run a test
Test,
/// Combine multiple files into a single file
Combine(CombineArgs),
}

#[derive(Debug, Args)]
Expand Down Expand Up @@ -65,6 +72,39 @@ struct ORAArgs {
reference: Option<String>,
}

#[derive(Args)]
struct CombineArgs {
#[command(subcommand)]
combine_type: Option<CombineType>,
}

#[derive(Subcommand)]
enum CombineType {
Gmt(CombineGmtArgs),
List(CombineListArgs),
}

#[derive(Args)]
struct CombineGmtArgs {
out: Option<String>,
/// Paths to the files to combine
files: Vec<String>,
}
#[derive(ValueEnum, Clone)]
enum NormMethods {
MedianRank,
MedianValue,
MeanValue,
None,
}

#[derive(Args)]
struct CombineListArgs {
normalization: Option<NormMethods>,
out: Option<String>,
files: Vec<String>,
}

fn main() {
let args = CliArgs::parse();
match &args.command {
Expand Down Expand Up @@ -129,9 +169,13 @@ fn main() {
return;
}
let gene_list = webgestalt_lib::readers::read_rank_file(gsea_args.rnk.clone().unwrap())
.expect(format!("File {} not found", gsea_args.rnk.clone().unwrap()).as_str());
.unwrap_or_else(|_| {
panic!("File {} not found", gsea_args.rnk.clone().unwrap());
});
let gmt = webgestalt_lib::readers::read_gmt_file(gsea_args.gmt.clone().unwrap())
.expect(format!("File {} not found", gsea_args.gmt.clone().unwrap()).as_str());
.unwrap_or_else(|_| {
panic!("File {} not found", gsea_args.gmt.clone().unwrap());
});
webgestalt_lib::methods::gsea::gsea(gene_list, gmt, GSEAConfig::default(), None);
println!("Done with GSEA");
}
Expand Down Expand Up @@ -172,8 +216,44 @@ fn main() {
res.len()
);
}
Some(Commands::Test) => {
let list1 = read_rank_file("gene.rnk".to_string()).unwrap();
let list2 = read_rank_file("protein.rnk".to_string()).unwrap();
let list3 = read_rank_file("metabolite.rnk".to_string()).unwrap();
let lists = vec![list1, list2, list3];
// let gmt1 = webgestalt_lib::readers::read_gmt_file("gene.gmt".to_string()).unwrap();
// let gmt2 =
// webgestalt_lib::readers::read_gmt_file("metabolite.gmt".to_string()).unwrap();
// let combined_gmt = webgestalt_lib::methods::multiomics::combine_gmts(&vec![gmt1, gmt2]);
// let mut file = File::create("combined.gmt").unwrap();
// for row in combined_gmt {
// writeln!(file, "{}\t{}\t{}", row.id, row.url, row.parts.join("\t")).unwrap();
// }
let mut combined_list = webgestalt_lib::methods::multiomics::combine_lists(
lists,
webgestalt_lib::methods::multiomics::MultiOmicsMethod::Mean,
webgestalt_lib::methods::multiomics::NormalizationMethod::MeanValue,
);
combined_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap());
let mut file = File::create("combined.rnk").unwrap();
for row in combined_list {
writeln!(file, "{}\t{}", row.analyte, row.rank).unwrap();
}
}
Some(Commands::Combine(args)) => match &args.combine_type {
Some(CombineType::Gmt(files)) => {}
Some(CombineType::List(files)) => {
let mut lists = Vec::new();
for file in files.files.iter() {
lists.push(read_rank_file(file.clone()).unwrap());
}
}
_ => {
panic!("Please select a valid combine type");
}
},
_ => {
println!("Please select a command. Run --help for options.")
todo!("Please select a valid command. Run --help for options.")
}
}
}
Expand Down
84 changes: 73 additions & 11 deletions webgestalt_lib/src/methods/multiomics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ pub struct ORAJob<'a> {

#[derive(Copy, Clone)]
pub enum NormalizationMethod {
MedianRatio,
None,
MedianRank,
MedianValue,
MeanValue,
}

/// Run a multiomics analysis, using iehter the max/mean median ratio or a typical meta analysis
Expand All @@ -61,10 +62,10 @@ pub enum NormalizationMethod {
/// - `method` - A [`MultiOmicsMethod`] enum detailing the analysis method to combine the runs
/// together (meta-analysis, mean median ration, or max median ratio).
pub fn multiomic_analysis(
jobs: Vec<AnalysisJob>,
analysis_type: AnalysisType,
_jobs: Vec<AnalysisJob>,
_analysis_type: AnalysisType,
method: MultiOmicsMethod,
) -> () {
) {
if let MultiOmicsMethod::Meta(meta_method) = method {
} else {
}
Expand Down Expand Up @@ -94,7 +95,7 @@ fn max_combine(
for list in normalized_lists {
for item in list {
if let Some(val) = batches.get_mut(&item.analyte) {
if item.rank > *val {
if item.rank.abs() > *val {
*val = item.rank;
}
} else {
Expand Down Expand Up @@ -143,21 +144,82 @@ fn mean_combine(
fn normalize(list: &mut Vec<RankListItem>, method: NormalizationMethod) -> Vec<RankListItem> {
match method {
NormalizationMethod::None => list.clone(),
NormalizationMethod::MedianRatio => {
NormalizationMethod::MedianRank => {
list.sort_by(|a, b| {
b.rank
.partial_cmp(&a.rank)
.expect("Invalid float comparison during comparison")
.expect("Invalid float comparison during normalization")
});
let median = list[list.len() / 2].rank;
let median = list.len() as f64 / 2.0;
let mut final_list: Vec<RankListItem> = Vec::new();
for item in list {
for (i, item) in list.iter().enumerate() {
final_list.push(RankListItem {
analyte: item.analyte.clone(),
rank: item.rank / median,
rank: (i as f64 - median) / median,
});
}
final_list
}
NormalizationMethod::MedianValue => {
list.sort_by(|a, b| {
b.rank
.partial_cmp(&a.rank)
.expect("Invalid float comparison during normalization")
});
let min = list.last().unwrap().rank;
let median = list[list.len() / 2].rank - min;
let mut final_list: Vec<RankListItem> = Vec::new();
for item in list.iter() {
final_list.push(RankListItem {
analyte: item.analyte.clone(),
rank: (item.rank - min) / median,
});
}
final_list
}
NormalizationMethod::MeanValue => {
list.sort_by(|a, b| {
b.rank
.partial_cmp(&a.rank)
.expect("Invalid float comparison during normalization")
});
let min = list.last().unwrap().rank;
let mean: f64 = list.iter().map(|x| x.rank).sum::<f64>() / (list.len() as f64) - min;
let mut final_list: Vec<RankListItem> = Vec::new();
for item in list.iter() {
final_list.push(RankListItem {
analyte: item.analyte.clone(),
rank: (item.rank - min) / mean,
});
}
final_list
}
}
}

pub fn combine_gmts(gmts: &Vec<Vec<Item>>) -> Vec<Item> {
let mut combined_parts: AHashMap<String, Vec<String>> = AHashMap::default();
let mut combined_urls: AHashMap<String, String> = AHashMap::default();
for gmt in gmts {
for item in gmt {
if combined_parts.contains_key(&item.id) {
combined_parts
.get_mut(&item.id)
.unwrap()
.extend(item.parts.clone());
} else {
combined_parts.insert(item.id.clone(), item.parts.clone());
combined_urls.insert(item.id.clone(), item.url.clone());
}
}
}
let mut final_gmt: Vec<Item> = Vec::new();
for (key, parts) in combined_parts {
final_gmt.push(Item {
id: key.clone(),
parts,
url: combined_urls[&key].clone(),
})
}
final_gmt
}
4 changes: 3 additions & 1 deletion webgestalt_lib/src/methods/ora.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub struct ORAConfig {
pub min_overlap: i64,
pub min_set_size: usize,
pub max_set_size: usize,
pub fdr_method: stat::AdjustmentMethod,
}

impl Default for ORAConfig {
Expand All @@ -16,6 +17,7 @@ impl Default for ORAConfig {
min_overlap: 5,
min_set_size: 5,
max_set_size: 500,
fdr_method: stat::AdjustmentMethod::BH,
}
}
}
Expand Down Expand Up @@ -90,7 +92,7 @@ pub fn get_ora(
});
let partials = res.lock().unwrap();
let p_vals: Vec<f64> = partials.iter().map(|x| x.p).collect();
let fdrs: Vec<f64> = stat::adjust(&p_vals);
let fdrs: Vec<f64> = stat::adjust(&p_vals, config.fdr_method);
let mut final_res = Vec::new();
for (i, row) in partials.clone().into_iter().enumerate() {
final_res.push(ORAResult {
Expand Down
14 changes: 13 additions & 1 deletion webgestalt_lib/src/stat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,19 @@ struct Carrier {
original_order: usize,
}

pub fn adjust(p_vals: &[f64]) -> Vec<f64> {
pub enum AdjustmentMethod {
BH,
None,
}

pub fn adjust(p_vals: &[f64], method: AdjustmentMethod) -> Vec<f64> {
match method {
AdjustmentMethod::BH => benjamini_hochberg(p_vals),
AdjustmentMethod::None => p_vals.to_vec(),
}
}

fn benjamini_hochberg(p_vals: &[f64]) -> Vec<f64> {
let mut carriers: Vec<Carrier> = p_vals
.iter()
.enumerate()
Expand Down

0 comments on commit 0a61279

Please sign in to comment.