diff --git a/README.md b/README.md index 613612e..0cf5c21 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,26 @@ Rust implementation of [WebGestaltR](https://github.com/bzhanglab/webgestaltr). +## Notes + +This CLI is focused purely on computation. **It does not provide GMT files or HTML reports**. The output of this tool is JSON files containing the results. For a more feature-complete tool, see the original [WebGestaltR](https://bzhanglab.github.io/WebGestaltR/) tool. + ## Install ```shell -git clone https://github.com/bzhanglab/webgestalt_rust.git -cd webgestalt_rust -cargo build --release +cargo install webgestalt ``` -## Run +## CLI + +For help with CLI, run ```shell -cargo run --release -- example ora +webgestalt --help ``` + +Example of running over-representation analysis using `kegg.gmt`, with an interesting list at `int.txt` and a reference of `ref.txt`. Outputs JSON file at `output.json` + +```shell +ora -g kegg.gmt -i int.txt -r ref.txt -o output.json +``` \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 51df1bb..18ca42f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +#![doc = include_str!("../README.md")] use clap::{Args, Parser}; use clap::{Subcommand, ValueEnum}; use owo_colors::{OwoColorize, Stream::Stdout, Style}; diff --git a/webgestalt_lib/src/lib.rs b/webgestalt_lib/src/lib.rs index 5e9b891..493e75f 100644 --- a/webgestalt_lib/src/lib.rs +++ b/webgestalt_lib/src/lib.rs @@ -1,10 +1,10 @@ +#![doc = include_str!("../README.md")] use std::{error::Error, fmt}; pub mod methods; pub mod readers; pub mod stat; pub mod writers; - trait CustomError { fn msg(&self) -> String; } diff --git a/webgestalt_lib/src/methods/gsea.rs b/webgestalt_lib/src/methods/gsea.rs index d6fb977..6e8ad46 100644 --- a/webgestalt_lib/src/methods/gsea.rs +++ b/webgestalt_lib/src/methods/gsea.rs @@ -36,7 +36,6 @@ pub struct RankListItem { } struct PartialGSEAResult { - // TODO: Look at adding enrichment and normalized enrichment score set: String, p: f64, es: f64, @@ -296,7 +295,7 @@ fn enrichment_score( ) } -/// Run GSEA and return a [`Vec`] for all analayte sets. /// /// # Parameters /// diff --git a/webgestalt_lib/src/methods/multilist.rs b/webgestalt_lib/src/methods/multilist.rs index a0555fa..280503d 100644 --- a/webgestalt_lib/src/methods/multilist.rs +++ b/webgestalt_lib/src/methods/multilist.rs @@ -59,7 +59,7 @@ pub enum NormalizationMethod { /// # Parameters /// /// - `jobs` - A [`Vec`] containing all of the separates 'jobs' or analysis to combine -/// - `method` - A [`MultiOmicsMethod`] enum detailing the analysis method to combine the runs together (meta-analysis, mean median ration, or max median ratio). +/// - `method` - A [`MultiListMethod`] enum detailing the analysis method to combine the runs together (meta-analysis, mean median ration, or max median ratio). /// - `fdr_method` - [`AdjustmentMethod`] of what FDR method to use to adjust p-values /// /// # Returns diff --git a/webgestalt_lib/src/methods/nta.rs b/webgestalt_lib/src/methods/nta.rs index da68a6e..13983c7 100644 --- a/webgestalt_lib/src/methods/nta.rs +++ b/webgestalt_lib/src/methods/nta.rs @@ -13,12 +13,16 @@ pub struct NTAConfig { pub reset_probability: f64, /// A float representing the tolerance for probability calculation pub tolerance: f64, + /// The [`NTAMethod`] to use for the analysis pub method: Option, } +/// Different methods for the NTA method that decides the important nodes to return #[derive(Debug, Clone)] pub enum NTAMethod { + /// Find the N most important seeds, where N is the provided [`usize`] value Prioritize(usize), + /// Find the N most important non-seed nodes, where N is the provided [`usize`] value Expand(usize), } @@ -34,19 +38,32 @@ impl Default for NTAConfig { } } +/// Struct representing the NTA results #[derive(Debug, Serialize)] pub struct NTAResult { + /// The nodes in the neighborhood. Will always include every seed pub neighborhood: Vec, + /// The random walk probabilities (score) for the nodes in the neighborhood pub scores: Vec, + /// If using the Prioritize method, contains the top N seeds. For expand method, this Vec is empty. pub candidates: Vec, } +/// Performs network topology-based analysis using random walk to identify important nodes in a network +/// +/// ## Parameters +/// +/// - `config`: A [`NTAConfig`] struct containing the parameters for the analysis. +/// +/// ## Returns +/// +/// Returns a [`NTAResult`] struct containing the results from the analysis. Is [serde](https://serde.rs/) compatible. pub fn get_nta(config: NTAConfig) -> NTAResult { let mut method = config.clone().method; if method.is_none() { method = Some(NTAMethod::Expand(10)); } - let mut nta_res = nta(config.clone()); + let mut nta_res = process_nta(config.clone()); match method { Some(NTAMethod::Prioritize(size)) => { let only_seeds = nta_res @@ -95,12 +112,16 @@ pub fn get_nta(config: NTAConfig) -> NTAResult { } } -/// Uses random walk to calculate the neighborhood of a set of nodes -/// Returns [`Vec`]representing the nodes in the neighborhood +/// Uses random walk to calculate the probabilities of each node being walked through +/// Returns [`Vec`] representing the nodes in the neighborhood +/// +/// ## Parameters +/// - `config` - A [`NTAConfig`] struct containing the edge list, seeds, neighborhood size, reset probability, and tolerance /// -/// # Parameters -/// - `config` - A [`NTAOptions`] struct containing the edge list, seeds, neighborhood size, reset probability, and tolerance -pub fn nta(config: NTAConfig) -> Vec<(String, f64)> { +/// ## Returns +/// +/// Returns a [`Vec<(String, f64)>`] where the [`String`] is the original node name, and the following value is the random walk probability (higher is typically better) +pub fn process_nta(config: NTAConfig) -> Vec<(String, f64)> { println!("Building Graph"); let unique_nodes = ahash::AHashSet::from_iter(config.edge_list.iter().flatten().cloned()); let mut node_map: ahash::AHashMap = ahash::AHashMap::default(); @@ -135,20 +156,32 @@ pub fn nta(config: NTAConfig) -> Vec<(String, f64)> { .collect() } +/// calculates the probability each node will be walked when starting from the one of the seeds +/// +/// ## Parameters +/// +/// - `adj_matrix` - A 2d adjacency matrix, where 1 means the node at the row and column indices are connected +/// - `seed_indices` - a [`Vec`] of the indices of the seeds (starting points) +/// - `r` - a [`f64`] of the reset probability (default in WebGestaltR is 0.5) +/// - `tolerance` - the tolerance/threshold value in [`f64`] (WebGestaltR default is `1e-6`) +/// +/// ## Output +/// +/// Returns 1d array containing the probability for each node fn random_walk_probability( adj_matrix: &ndarray::Array2, - node_indices: &Vec, + seed_indices: &Vec, r: f64, tolerance: f64, ) -> ndarray::Array1 { - let num_nodes = node_indices.len() as f64; + let num_nodes = seed_indices.len() as f64; let de = adj_matrix.sum_axis(Axis(0)); // de to 2d array let de = de.insert_axis(Axis(1)); let temp = adj_matrix.t().div(de); let w = temp.t(); let mut p0 = ndarray::Array1::from_elem(w.shape()[0], 0.0); - for i in node_indices { + for i in seed_indices { p0[*i] = 1.0 / num_nodes; } let mut pt = p0.clone(); diff --git a/webgestalt_lib/src/methods/ora.rs b/webgestalt_lib/src/methods/ora.rs index 21a49e3..d598004 100644 --- a/webgestalt_lib/src/methods/ora.rs +++ b/webgestalt_lib/src/methods/ora.rs @@ -53,14 +53,6 @@ pub fn ora_p(m: i64, j: i64, n: i64, k: i64) -> f64 { /// - `interest_list` - A [`AHashSet`] of the interesting analytes /// - `reference` - A [`AHashSet`] of the reference list /// - `gmt` - A [`Vec`] of the gmt file -/// -/// # Panics -/// -/// Panics if the [`Arc`] struggles to lock during parallelization. -/// -/// # Errors -/// -/// This function will return an error if . pub fn get_ora( interest_list: &AHashSet, reference: &AHashSet,