diff --git a/R-python-examples.html b/R-python-examples.html index 0b699cdd27..3e849f97af 100644 --- a/R-python-examples.html +++ b/R-python-examples.html @@ -590,11 +590,24 @@ margin-right: auto; } -
library(gemma.R)
library(dplyr)
get_datasets(query = "bipolar disorder") # plain text search
@@ -602,8 +615,8 @@ R
get_datasets(filter =
"experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985")# specific filters
import gemmapy
api = gemmapy.GemmaPy()
api.get_datasets(query = "bipolar disorder") # plain text search
@@ -700,65 +713,82 @@ Python
# differential expression p values, fold changes and statistics
-dif_exp <- get_differential_expression_values('GSE46416')[[1]]
-# metadata about contrasts
-contrasts <- get_dataset_differential_expression_analyses('GSE46416')
+
+R
+dif_exp <- get_differential_expression_values('GSE8397')
+contrasts <- get_dataset_differential_expression_analyses('GSE8397')
# identify the contrast of interest
-manic_contrast <- contrasts[contrasts$experimental.factors %>% sapply(\(x){
- x$summary == "bipolar disorder has_modifier manic phase"
+bp_contrast <- contrasts[contrasts$experimental.factors %>% sapply(\(x){
+ all(x$summary == "Parkinson disease")
}),]
+
frame <- data.frame(
- genes = dif_exp$GeneSymbol,
- ncbi_ids = dif_exp$NCBIid,
- p_vals = dif_exp[[paste0('contrast_',manic_contrast$contrast.ID,'_pvalue')]],
- fold_changes = dif_exp[[paste0('contrast_',manic_contrast$contrast.ID,'_log2fc')]]
+ genes = dif_exp[[as.character(bp_contrast$result.ID)]]$GeneSymbol,
+ ncbi_ids = dif_exp[[as.character(bp_contrast$result.ID)]]$NCBIid,
+ fdr = p.adjust(dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_pvalue')]],'fdr'),
+ fold_changes = dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_log2fc')]]
)
# mark differentially expressed genes
-frame <- frame %>% mutate(dif_exp = p_vals<0.05 & abs(fold_changes)>1)
-
-
+frame <- frame %>% mutate(`Differentially Expressed` = fdr<0.05 & abs(fold_changes)>1)
-
-Python
-# WIP
+
+Python
+import pandas as pd
+from statsmodels.stats.multitest import fdrcorrection as fdr
+
# differential expression p values, fold changes and statistics
-dif_exp = api.get_differential_expression_values('GSE46416')
-dif_exp = dif_exp[list(dif_exp.keys())[0]]
-contrasts = api.get_dataset_differential_expression_analyses('GSE46416')
+dif_exp = api.get_differential_expression_values('GSE8397')
+contrasts = api.get_dataset_differential_expression_analyses('GSE8397')
+
+bp_contrast = contrasts[[(x.summary == "Parkinson disease").all() for x in contrasts.experimental_factors]]
+
+frame = pd.DataFrame({
+ "genes": dif_exp[bp_contrast.result_ID[0]].GeneSymbol,
+ 'ncbi_ids': dif_exp[bp_contrast.result_ID[0]].NCBIid,
+ 'fdr': fdr(dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_pvalue"])[1],
+ 'fold_changes': dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_log2fc"]
+})
+
+frame["Differentially Expressed"] = (frame.fdr<.05) & (frame.fold_changes.abs() > 1)
+
-
+## [1] "https://gemma.msl.ubc.ca/rest/v2/"
+## Warning: Removed 15 rows containing missing values (geom_point).
+
+
dif_exp_genes = frame %>% filter(dif_exp) %>%
- {.$ncbi_ids} %>%
- {.[.!='' & !grepl('[|]',.)]}# remove invalid ids
+
+R
+dif_exp_genes = frame %>% filter(`Differentially Expressed`) %>%
+ {.$ncbi_ids}
# get a bioconductor object containing
-# expression and differential expression data
-expression <- get_dataset_object('GSE46416',
- genes = dif_exp_genes,type = 'se')[[1]]
+# expression data
+expression <- get_dataset_object('GSE8397',
+ genes = dif_exp_genes,type = 'se')
-
-Python
-# WIP
+
+Python
+dif_exp_genes = frame[frame["Differentially Expressed"]].ncbi_ids
+
+
+# get an AnnData object containing
+# expression data
+expression = api.get_dataset_object(["GSE8397"],genes = list(dif_exp_genes))
-
+