diff --git a/R-python-examples.html b/R-python-examples.html index 0b699cdd27..3e849f97af 100644 --- a/R-python-examples.html +++ b/R-python-examples.html @@ -590,11 +590,24 @@ margin-right: auto; } -
-

Example 1: Find datasets relevant to neurodegeneration

+
+

Vignettes

+ +
+
+

Quick examples

+
+

Find datasets relevant to neurodegeneration

-
-

R

+
+

R

library(gemma.R)
 library(dplyr)
 get_datasets(query = "bipolar disorder") # plain text search
@@ -602,8 +615,8 @@ 

R

get_datasets(filter = "experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985")# specific filters
-
-

Python

+
+

Python

import gemmapy
 api = gemmapy.GemmaPy()
 api.get_datasets(query = "bipolar disorder") # plain text search
@@ -700,65 +713,82 @@ 

Python

-
-

Retrieve differential expression in a bipolar disorder study

+
+

Retrieve differential expression in a bipolar disorder study

-
-

R

-
# differential expression p values, fold changes and statistics
-dif_exp <- get_differential_expression_values('GSE46416')[[1]]
-# metadata about contrasts
-contrasts <- get_dataset_differential_expression_analyses('GSE46416')
+
+

R

+
dif_exp <- get_differential_expression_values('GSE8397')
+contrasts <- get_dataset_differential_expression_analyses('GSE8397')
 
 # identify the contrast of interest
-manic_contrast <- contrasts[contrasts$experimental.factors %>% sapply(\(x){
-    x$summary == "bipolar disorder has_modifier manic phase"
+bp_contrast <- contrasts[contrasts$experimental.factors %>% sapply(\(x){
+    all(x$summary == "Parkinson disease")
 }),]
 
+
 frame <- data.frame(
-    genes = dif_exp$GeneSymbol,
-    ncbi_ids = dif_exp$NCBIid,
-    p_vals = dif_exp[[paste0('contrast_',manic_contrast$contrast.ID,'_pvalue')]],
-    fold_changes = dif_exp[[paste0('contrast_',manic_contrast$contrast.ID,'_log2fc')]]
+    genes = dif_exp[[as.character(bp_contrast$result.ID)]]$GeneSymbol,
+    ncbi_ids = dif_exp[[as.character(bp_contrast$result.ID)]]$NCBIid,
+    fdr = p.adjust(dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_pvalue')]],'fdr'),
+    fold_changes = dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_log2fc')]]
 )
 
 # mark differentially expressed genes
-frame <- frame %>% mutate(dif_exp = p_vals<0.05 & abs(fold_changes)>1)
-
-
+frame <- frame %>% mutate(`Differentially Expressed` = fdr<0.05 & abs(fold_changes)>1)
-
-

Python

-
# WIP
+
+

Python

+
import pandas as pd
+from statsmodels.stats.multitest import fdrcorrection as fdr
+
 
 # differential expression p values, fold changes and statistics
-dif_exp = api.get_differential_expression_values('GSE46416')
-dif_exp = dif_exp[list(dif_exp.keys())[0]]
-contrasts = api.get_dataset_differential_expression_analyses('GSE46416')
+dif_exp = api.get_differential_expression_values('GSE8397') +contrasts = api.get_dataset_differential_expression_analyses('GSE8397') + +bp_contrast = contrasts[[(x.summary == "Parkinson disease").all() for x in contrasts.experimental_factors]] + +frame = pd.DataFrame({ + "genes": dif_exp[bp_contrast.result_ID[0]].GeneSymbol, + 'ncbi_ids': dif_exp[bp_contrast.result_ID[0]].NCBIid, + 'fdr': fdr(dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_pvalue"])[1], + 'fold_changes': dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_log2fc"] +}) + +frame["Differentially Expressed"] = (frame.fdr<.05) & (frame.fold_changes.abs() > 1) +
-

+
## [1] "https://gemma.msl.ubc.ca/rest/v2/"
+
## Warning: Removed 15 rows containing missing values (geom_point).
+

+

Retrieve gene expression of genes of interest

-
-

R

-
dif_exp_genes = frame %>% filter(dif_exp) %>% 
-    {.$ncbi_ids} %>% 
-    {.[.!='' & !grepl('[|]',.)]}# remove invalid ids
+
+

R

+
dif_exp_genes = frame %>% filter(`Differentially Expressed`) %>% 
+    {.$ncbi_ids}
     
 # get a bioconductor object containing
-# expression and differential expression data
-expression <- get_dataset_object('GSE46416',
-                                 genes = dif_exp_genes,type = 'se')[[1]]
+# expression data +expression <- get_dataset_object('GSE8397', + genes = dif_exp_genes,type = 'se')
-
-

Python

-
# WIP
+
+

Python

+
dif_exp_genes = frame[frame["Differentially Expressed"]].ncbi_ids
+
+
+# get an AnnData object containing
+# expression data
+expression = api.get_dataset_object(["GSE8397"],genes = list(dif_exp_genes))
-

+