Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make FDR optional and add combination methods #7

Merged
merged 6 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 84 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
use std::fmt::format;
use std::io::{BufReader, Write};
use std::str::FromStr;
use std::{fs::File, time::Instant};

use bincode::deserialize_from;

use clap::Subcommand;
use clap::{Args, Parser};
use clap::{Subcommand, ValueEnum};
use owo_colors::{OwoColorize, Stream::Stdout, Style};
use webgestalt_lib::methods::gsea::GSEAConfig;
use webgestalt_lib::methods::multiomics::NormalizationMethod;
use webgestalt_lib::methods::ora::ORAConfig;
use webgestalt_lib::readers::read_rank_file;

/// WebGestalt CLI.
/// ORA and GSEA enrichment tool.
Expand All @@ -30,6 +33,10 @@ enum Commands {
Gsea(GseaArgs),
/// Run ORA using the provided files
Ora(ORAArgs),
/// Run a test
Test,
/// Combine multiple files into a single file
Combine(CombineArgs),
}

#[derive(Debug, Args)]
Expand Down Expand Up @@ -65,6 +72,39 @@ struct ORAArgs {
reference: Option<String>,
}

#[derive(Args)]
struct CombineArgs {
#[command(subcommand)]
combine_type: Option<CombineType>,
}

#[derive(Subcommand)]
enum CombineType {
Gmt(CombineGmtArgs),
List(CombineListArgs),
}

#[derive(Args)]
struct CombineGmtArgs {
out: Option<String>,
/// Paths to the files to combine
files: Vec<String>,
}
#[derive(ValueEnum, Clone)]
enum NormMethods {
MedianRank,
MedianValue,
MeanValue,
None,
}

#[derive(Args)]
struct CombineListArgs {
normalization: Option<NormMethods>,
out: Option<String>,
files: Vec<String>,
}

fn main() {
let args = CliArgs::parse();
match &args.command {
Expand Down Expand Up @@ -129,9 +169,13 @@ fn main() {
return;
}
let gene_list = webgestalt_lib::readers::read_rank_file(gsea_args.rnk.clone().unwrap())
.expect(format!("File {} not found", gsea_args.rnk.clone().unwrap()).as_str());
.unwrap_or_else(|_| {
panic!("File {} not found", gsea_args.rnk.clone().unwrap());
});
let gmt = webgestalt_lib::readers::read_gmt_file(gsea_args.gmt.clone().unwrap())
.expect(format!("File {} not found", gsea_args.gmt.clone().unwrap()).as_str());
.unwrap_or_else(|_| {
panic!("File {} not found", gsea_args.gmt.clone().unwrap());
});
webgestalt_lib::methods::gsea::gsea(gene_list, gmt, GSEAConfig::default(), None);
println!("Done with GSEA");
}
Expand Down Expand Up @@ -172,8 +216,44 @@ fn main() {
res.len()
);
}
Some(Commands::Test) => {
let list1 = read_rank_file("gene.rnk".to_string()).unwrap();
let list2 = read_rank_file("protein.rnk".to_string()).unwrap();
let list3 = read_rank_file("metabolite.rnk".to_string()).unwrap();
let lists = vec![list1, list2, list3];
// let gmt1 = webgestalt_lib::readers::read_gmt_file("gene.gmt".to_string()).unwrap();
// let gmt2 =
// webgestalt_lib::readers::read_gmt_file("metabolite.gmt".to_string()).unwrap();
// let combined_gmt = webgestalt_lib::methods::multiomics::combine_gmts(&vec![gmt1, gmt2]);
// let mut file = File::create("combined.gmt").unwrap();
// for row in combined_gmt {
// writeln!(file, "{}\t{}\t{}", row.id, row.url, row.parts.join("\t")).unwrap();
// }
let mut combined_list = webgestalt_lib::methods::multiomics::combine_lists(
lists,
webgestalt_lib::methods::multiomics::MultiOmicsMethod::Mean,
webgestalt_lib::methods::multiomics::NormalizationMethod::MeanValue,
);
combined_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap());
let mut file = File::create("combined.rnk").unwrap();
for row in combined_list {
writeln!(file, "{}\t{}", row.analyte, row.rank).unwrap();
}
}
Some(Commands::Combine(args)) => match &args.combine_type {
Some(CombineType::Gmt(files)) => {}
Some(CombineType::List(files)) => {
let mut lists = Vec::new();
for file in files.files.iter() {
lists.push(read_rank_file(file.clone()).unwrap());
}
}
_ => {
panic!("Please select a valid combine type");
}
},
_ => {
println!("Please select a command. Run --help for options.")
todo!("Please select a valid command. Run --help for options.")
}
}
}
Expand Down
84 changes: 73 additions & 11 deletions webgestalt_lib/src/methods/multiomics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ pub struct ORAJob<'a> {

#[derive(Copy, Clone)]
pub enum NormalizationMethod {
MedianRatio,
None,
MedianRank,
MedianValue,
MeanValue,
}

/// Run a multiomics analysis, using iehter the max/mean median ratio or a typical meta analysis
Expand All @@ -61,10 +62,10 @@ pub enum NormalizationMethod {
/// - `method` - A [`MultiOmicsMethod`] enum detailing the analysis method to combine the runs
/// together (meta-analysis, mean median ration, or max median ratio).
pub fn multiomic_analysis(
jobs: Vec<AnalysisJob>,
analysis_type: AnalysisType,
_jobs: Vec<AnalysisJob>,
_analysis_type: AnalysisType,
method: MultiOmicsMethod,
) -> () {
) {
if let MultiOmicsMethod::Meta(meta_method) = method {
} else {
}
Expand Down Expand Up @@ -94,7 +95,7 @@ fn max_combine(
for list in normalized_lists {
for item in list {
if let Some(val) = batches.get_mut(&item.analyte) {
if item.rank > *val {
if item.rank.abs() > *val {
*val = item.rank;
}
} else {
Expand Down Expand Up @@ -143,21 +144,82 @@ fn mean_combine(
fn normalize(list: &mut Vec<RankListItem>, method: NormalizationMethod) -> Vec<RankListItem> {
match method {
NormalizationMethod::None => list.clone(),
NormalizationMethod::MedianRatio => {
NormalizationMethod::MedianRank => {
list.sort_by(|a, b| {
b.rank
.partial_cmp(&a.rank)
.expect("Invalid float comparison during comparison")
.expect("Invalid float comparison during normalization")
});
let median = list[list.len() / 2].rank;
let median = list.len() as f64 / 2.0;
let mut final_list: Vec<RankListItem> = Vec::new();
for item in list {
for (i, item) in list.iter().enumerate() {
final_list.push(RankListItem {
analyte: item.analyte.clone(),
rank: item.rank / median,
rank: (i as f64 - median) / median,
});
}
final_list
}
NormalizationMethod::MedianValue => {
list.sort_by(|a, b| {
b.rank
.partial_cmp(&a.rank)
.expect("Invalid float comparison during normalization")
});
let min = list.last().unwrap().rank;
let median = list[list.len() / 2].rank - min;
let mut final_list: Vec<RankListItem> = Vec::new();
for item in list.iter() {
final_list.push(RankListItem {
analyte: item.analyte.clone(),
rank: (item.rank - min) / median,
});
}
final_list
}
NormalizationMethod::MeanValue => {
list.sort_by(|a, b| {
b.rank
.partial_cmp(&a.rank)
.expect("Invalid float comparison during normalization")
});
let min = list.last().unwrap().rank;
let mean: f64 = list.iter().map(|x| x.rank).sum::<f64>() / (list.len() as f64) - min;
let mut final_list: Vec<RankListItem> = Vec::new();
for item in list.iter() {
final_list.push(RankListItem {
analyte: item.analyte.clone(),
rank: (item.rank - min) / mean,
});
}
final_list
}
}
}

pub fn combine_gmts(gmts: &Vec<Vec<Item>>) -> Vec<Item> {
let mut combined_parts: AHashMap<String, Vec<String>> = AHashMap::default();
let mut combined_urls: AHashMap<String, String> = AHashMap::default();
for gmt in gmts {
for item in gmt {
if combined_parts.contains_key(&item.id) {
combined_parts
.get_mut(&item.id)
.unwrap()
.extend(item.parts.clone());
} else {
combined_parts.insert(item.id.clone(), item.parts.clone());
combined_urls.insert(item.id.clone(), item.url.clone());
}
}
}
let mut final_gmt: Vec<Item> = Vec::new();
for (key, parts) in combined_parts {
final_gmt.push(Item {
id: key.clone(),
parts,
url: combined_urls[&key].clone(),
})
}
final_gmt
}
4 changes: 3 additions & 1 deletion webgestalt_lib/src/methods/ora.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub struct ORAConfig {
pub min_overlap: i64,
pub min_set_size: usize,
pub max_set_size: usize,
pub fdr_method: stat::AdjustmentMethod,
}

impl Default for ORAConfig {
Expand All @@ -16,6 +17,7 @@ impl Default for ORAConfig {
min_overlap: 5,
min_set_size: 5,
max_set_size: 500,
fdr_method: stat::AdjustmentMethod::BH,
}
}
}
Expand Down Expand Up @@ -90,7 +92,7 @@ pub fn get_ora(
});
let partials = res.lock().unwrap();
let p_vals: Vec<f64> = partials.iter().map(|x| x.p).collect();
let fdrs: Vec<f64> = stat::adjust(&p_vals);
let fdrs: Vec<f64> = stat::adjust(&p_vals, config.fdr_method);
let mut final_res = Vec::new();
for (i, row) in partials.clone().into_iter().enumerate() {
final_res.push(ORAResult {
Expand Down
14 changes: 13 additions & 1 deletion webgestalt_lib/src/stat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,19 @@ struct Carrier {
original_order: usize,
}

pub fn adjust(p_vals: &[f64]) -> Vec<f64> {
pub enum AdjustmentMethod {
BH,
None,
}

pub fn adjust(p_vals: &[f64], method: AdjustmentMethod) -> Vec<f64> {
match method {
AdjustmentMethod::BH => benjamini_hochberg(p_vals),
AdjustmentMethod::None => p_vals.to_vec(),
}
}

fn benjamini_hochberg(p_vals: &[f64]) -> Vec<f64> {
let mut carriers: Vec<Carrier> = p_vals
.iter()
.enumerate()
Expand Down
Loading