Merge branch 'master' of https://github.com/saezlab/ConservedFootprints

saezlab · Aug 30, 2019 · 74e9f70 · 74e9f70
2 parents bb06cab + e7875db
commit 74e9f70
Show file tree

Hide file tree

Showing 22 changed files with 826 additions and 302 deletions.
diff --git a/analyses/dorothea_benchmark/dorothea_benchmark_figures.Rmd b/analyses/dorothea_benchmark/dorothea_benchmark_figures.Rmd
@@ -22,6 +22,8 @@ library(plotrix)
 library(ggsignif)
 library(VennDiagram)
 library(ggpubr)
+
+theme_set(theme_cowplot())
 ```
 
 ```{r "dorothea benchmark data distribution"}
@@ -351,9 +353,13 @@ overlap_dorothea_trrust = draw.pairwise.venn(
   length(unique(trrust$tf)), 
   length(intersect(dorothea$tf, trrust$tf)), 
   category = c("DoRothEA", "TRRUST"),
-  fill = c("#A11035","#0098A1"),
-  cex=2,
-  alpha=0.3
+  fill = c("#F6A800","#0098A1"),
+  cex=c(1,1,1),
+  fontfamily=c("sans", "sans", "sans"),
+  cat.fontfamily = c("sans", "sans"),
+  cat.cex = c(1,1),
+  cat.pos = c(-25,25)
+  # cat.just = list(c(3,0), c(-3,0))
   ) %>%
   as_ggplot()
 
@@ -387,4 +393,48 @@ regulon_similarity_plot = similiartiy_scores %>%
 
 saveRDS(regulon_similarity_plot, 
         "fig/dorothea_benchmark/regulon_similarity.rds")
-```
+```
+
+```{r "table of global performance scores"}
+pr_global = readRDS(
+  "output/dorothea_benchmark/dorothea_benchmark_result.rds"
+  ) %>%
+  filter(organism == "mouse") %>%
+  unnest(pr) %>%
+  distinct(name, auc, tp, tn, confidence) %>%
+  mutate(random = tp / (tp + tn)) %>%
+  distinct(name, auc, confidence, random) %>%
+  mutate(metric = "AUPRC")
+
+
+roc_global = readRDS(
+  "output/dorothea_benchmark/dorothea_benchmark_result.rds"
+  ) %>%
+  filter(organism == "mouse") %>%
+  unnest(roc) %>%
+  distinct(name, auc, confidence) %>%
+  mutate(metric = "AUROC")
+
+
+p = bind_rows(pr_global, roc_global) %>%
+  mutate(name = case_when(name == "dorothea" ~ "DoRothEA",
+                          name == "trrust" ~ "TRRUST"))
+r = p %>% 
+  select(-auc) %>%
+  spread(metric, random) %>%
+  select(-AUROC, "random AUPRC" = "AUPRC")
+
+t = p %>% 
+  select(-random) %>%
+  spread(metric, auc) %>%
+  left_join(r) %>%
+  mutate_if(is.numeric, round,4) %>%
+  rename(Resource = name, Confidence = confidence) %>%
+  select(Resource, Confidence, AUROC, AUPRC, `random AUPRC`) %>%
+  arrange(Confidence)
+  
+
+table_dorothea_performance = gridExtra::tableGrob(t, row = NULL)
+
+saveRDS(table_dorothea_performance, "fig/dorothea_benchmark/table_dorothea_performance.rds")
+```
diff --git a/analyses/dorothea_benchmark/dorothea_benchmark_pipeline.Rmd b/analyses/dorothea_benchmark/dorothea_benchmark_pipeline.Rmd
@@ -1,5 +1,5 @@
 ---
-title: Translate human regulons to mouse regulons
+title: "Benchmark DoRothEA and TRRUST on TF perturbation data"
 author: "Christian Holland"
 date: "09/04/2018"
 output: html_document
@@ -25,7 +25,7 @@ source("src/dorothea_analysis.R")
 source("src/roc_curve_analysis.R")
 ```
 ### Benchmark
-#### Build regulons
+#### Build TRRUST regulons
 ```{r "build regulons", eval=F, echo=F}
 # build human and moust TRRUST regulon
 path_trrust_human = "data/dorothea_benchmark/regulons/trrust_rawdata.human.tsv"
@@ -55,8 +55,8 @@ write_csv(mouse_trrust_regulon,
           "data/dorothea_benchmark/regulons/trrust_regulon_mouse.csv")
 ```
 
-#### Consider confidence score in benchmarking
-```{r "consider confidence score in benchmarking"}
+#### Consider confidence score in benchmarking mouse-DoRothEA vs TRRUST
+```{r "consider confidence score in benchmarking mouse-DoRothEA vs TRRUST"}
 # load human regulons and subset regulons based on confidence score
 human_dorothea_all = read_csv(
   "data/dorothea_benchmark/regulons/dorothea_regulon_human_v1.csv"
@@ -245,6 +245,9 @@ dorothea_performance %>%
   filter(confidence == "AB")
 ```
 
+
+
+
 #### Similarity between mouse DoRothEA and mouse TRRUST
 ```{r "similarity between mouse dorothea and mouse trrust"}
 calc_jaccard_index = function(x) {

diff --git a/analyses/figure_arrangement.Rmd b/analyses/figure_arrangement.Rmd
@@ -17,24 +17,48 @@ knitr::knit(..., quiet = TRUE)
 library(tidyverse)
 library(cowplot)
 library(magick)
+
+theme_set(theme_cowplot())
 ```
 ## Arranging indiviual figures into a grid/collage
 ### Main figures
 #### Figure 1 - Benchmark overview
 ```{r "Figure 1"}
-fig1_1 = plot_grid(
+fig1_top_left = plot_grid(
   ggdraw() + draw_image("paper/figures/sub/Figure1a.png"),
+  labels = c("A"), label_size = 18
+)
+fig1_top_right_top = plot_grid(
   ggdraw() + draw_image("paper/figures/sub/Figure1c.png"),
-  labels=c("A", "C"), label_size = 18
+  NULL,
+  ggdraw() + draw_image("paper/figures/sub/Figure1d.png"),
+  labels=c("C", "", "D"), ncol=3, label_size = 18,
+  rel_widths = c(1,0.05,1)
 )
 
-fig1 = plot_grid(
-  fig1_1,
-  ggdraw() + draw_image("paper/figures/sub/Figure1b.png"),
-  ncol=1, labels=c("", "B"), label_size = 18
+fig1_top_right = plot_grid(
+  fig1_top_right_top,
+  ggdraw() + draw_image("paper/figures/sub/Figure1cd_legend.png"),
+  ncol=1, rel_heights = c(1,0.25), align = "h", axis="lr"
+)
+
+
+fig1_top = plot_grid(
+  fig1_top_left,NULL,fig1_top_right,
+  ncol=3,
+  rel_widths = c(1,0.05,1),
+  align = "v", axis = "tb"
   )
-ggsave("paper/figures/Figure1.png", fig1, width=10, height=10)
 
+fig1 = plot_grid(
+  fig1_top + theme(plot.margin = margin(0,0,0.5,0,unit = "cm")),
+  ggdraw() + draw_image("paper/figures/sub/Figure1b.png") + 
+    theme(plot.margin = margin(0.4, 0, 0, 0, unit = "cm")),
+  ncol=1, labels = c("", "B"),
+  label_size = 18
+)
+
+ggsave("paper/figures/Figure1.png", fig1, width=10, height=8)
 ```
 #### Figure 2 - PROGENy benchmark results
 ```{r "Figure 2"}
@@ -101,30 +125,52 @@ figS3 = pr_curve_pw
 ggsave("paper/figures/FigureS3.png", figS3, width = 10, height=10)
 ```
 
-#### Figure S4 - DoRothEA properties + benchmark downsampled
+#### Figure S4 - DoRothEA properties + comparison to TRRUST
 ```{r "Figure S4"}
 regulon_properties = readRDS("fig/dorothea_benchmark/regulon_properties.rds")
 tf_coverage = readRDS("fig/dorothea_benchmark/tf_coverage.rds")
-global_tp_tn_ratio = readRDS("fig/dorothea_benchmark/global_tp_tn_ratio.rds")
-auroc_downsampled = readRDS("fig/dorothea_benchmark/auroc_downsampled.rds")
+overlap_dorothea_trrust = readRDS("fig/dorothea_benchmark/overlap_dorothea_trrust.rds")
+regulon_similarity = readRDS("fig/dorothea_benchmark/regulon_similarity.rds")
 
-aligned_plots = align_plots(regulon_properties, global_tp_tn_ratio, 
+aligned_plots = align_plots(regulon_properties, overlap_dorothea_trrust, 
                             align="v", axis="l")
 
-figS4_1 = plot_grid(aligned_plots[[1]], tf_coverage, labels = c("A", "B"), 
+figS4_1 = plot_grid(aligned_plots[[1]], tf_coverage, labels = c("A", "D"), 
                     label_size=18, rel_widths = c(2,1), align="h", axis="bt")
-figS4_2 = plot_grid(aligned_plots[[2]], auroc_downsampled, 
-                    labels = c("C", "D"), label_size=18,
+figS4_2 = plot_grid(aligned_plots[[2]], regulon_similarity, 
+                    labels = c("B", "C"), label_size=18,
                     rel_widths = c(1,1), align="h", axis="bt")
 figS4 = plot_grid(figS4_1, figS4_2, ncol=1)
 ggsave("paper/figures/FigureS4.png", figS4, width = 10, height=10)
 ```
 
-#### Figure S5 - Comparison between DoRothEA and TRRUST
+#### Figure S5 - DoRothEA benchmark downsampled
 ```{r "Figure S5"}
-overlap_dorothea_trrust = readRDS("fig/dorothea_benchmark/overlap_dorothea_trrust.rds")
-regulon_similarity = readRDS("fig/dorothea_benchmark/regulon_similarity.rds")
-figS5 = plot_grid(overlap_dorothea_trrust, regulon_similarity, 
-                  labels = c("A", "B"), label_size = 18)
+
+global_tp_tn_ratio = readRDS("fig/dorothea_benchmark/global_tp_tn_ratio.rds")
+auroc_downsampled = readRDS("fig/dorothea_benchmark/auroc_downsampled.rds")
+
+figS5 = plot_grid(global_tp_tn_ratio, auroc_downsampled, 
+                  labels = c("A", "B"), label_size = 18, align = "h", axis = "t")
 ggsave("paper/figures/FigureS5.png", figS5, width=10, height=5)
 ```
+
+#### Supp Table S3
+```{r}
+p = readRDS("fig/progeny_benchmark/table_progeny_performance.rds")
+
+progeny_table = plot_grid(p)
+
+ggsave("paper/auxilliary_files/Supplementary Table S2.png",
+       progeny_table, width =6, height = 7.5)
+```
+
+#### Supp Table S4
+```{r}
+d = readRDS("fig/dorothea_benchmark/table_dorothea_performance.rds")
+
+dorothea_table = plot_grid(d)
+
+ggsave("paper/auxilliary_files/Supplementary Table S3.png",
+       dorothea_table, width =5, height = 3.25)
+```
diff --git a/analyses/progeny_benchmark/progeny_benchmark_figures.Rmd b/analyses/progeny_benchmark/progeny_benchmark_figures.Rmd
@@ -279,4 +279,74 @@ pr_curve_pw = ggplot(df_pr_pw, aes(x = recall, y=precision, color=organism)) +
   lims(x=c(0,1), y=c(0,1))
   
 saveRDS(pr_curve_pw, "fig/progeny_benchmark/pw_pr_curve.rds")
-```
+```
+
+```{r "table of global performance scores"}
+pr_global = readRDS(
+  "output/progeny_benchmark/progeny_benchmark_result_global.rds"
+  ) %>%
+  unnest(pr) %>%
+  distinct(organism, auc, tp, tn) %>%
+  mutate(random = tp / (tp + tn)) %>%
+  distinct(organism, auc, random) %>%
+  mutate(metric = "AUPRC") %>%
+  mutate(evaluation = "Global")
+
+pr_pw = readRDS(
+  "output/progeny_benchmark/progeny_benchmark_result_pw.rds"
+  ) %>% 
+  unnest(pr) %>%
+  distinct(pathway, organism, auc, tp, tn) %>%
+  mutate(random = tp / (tp + tn)) %>%
+  distinct(organism, auc, pathway, random) %>%
+  mutate(metric = "AUPRC") %>%
+  mutate(evaluation = "Pathway-wise")
+
+roc_global = readRDS(
+  "output/progeny_benchmark/progeny_benchmark_result_global.rds"
+  ) %>%
+  unnest(roc) %>%
+  distinct(organism, auc) %>%
+  mutate(metric = "AUROC") %>%
+  mutate(evaluation = "Global")
+
+roc_pw = readRDS(
+  "output/progeny_benchmark/progeny_benchmark_result_pw.rds"
+  ) %>% 
+  unnest(roc) %>%
+  distinct(organism, auc, pathway) %>%
+  mutate(metric = "AUROC") %>%
+  mutate(evaluation = "Pathway-wise")
+
+p = bind_rows(pr_global, pr_pw, roc_global, roc_pw)
+r = p %>% 
+  select(-auc) %>%
+  spread(metric, random) %>%
+  select(-AUROC, "random AUPRC" = "AUPRC")
+
+t = p %>% 
+  select(-random) %>%
+  spread(metric, auc) %>%
+  arrange(evaluation, pathway) %>%
+  left_join(r) %>%
+  mutate_if(is.numeric, round,4) %>%
+  mutate(organism = str_to_title(organism)) %>%
+  rename(Organism = organism,
+         Evaluation = evaluation, 
+         Pathway = pathway) %>%
+  select(Organism, Evaluation, Pathway, AUROC, AUPRC, `random AUPRC`)
+  
+
+table_progeny_performance = gridExtra::tableGrob(t, row = NULL) %>%
+  gtable::gtable_add_grob(.,
+        grobs = grid::segmentsGrob( # line across the bottom
+            x0 = unit(0,"npc"),
+            y0 = unit(0,"npc"),
+            x1 = unit(1,"npc"),
+            y1 = unit(0,"npc"),
+            gp = grid::gpar(lwd = 2.0, lty = "dashed", color=2)),
+        t = 3, l = 1, r = ncol(.))
+
+saveRDS(table_progeny_performance, "fig/progeny_benchmark/table_progeny_performance.rds")
+```
+
diff --git a/analyses/progeny_benchmark/progeny_benchmark_pipeline.Rmd b/analyses/progeny_benchmark/progeny_benchmark_pipeline.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Benchmark PROGENy on mouse and human single gene/drug perturbation data"
+title: "Benchmark PROGENy on mouse and human pathway perturbation data"
 author: "Christian Holland"
 date: "05/03/2018"
 output: html_document
@@ -49,11 +49,6 @@ M_human = read.csv("data/progeny_benchmark/models/progeny_matrix_human_v1.csv",
 M_mouse = read.csv("data/progeny_benchmark/models/progeny_matrix_mouse_v1.csv",
                    row.names=1, check.names = F)
 
-M_mouse %>% rownames_to_column("gene") %>%
-  gather(pathway, weight, -gene) %>%
-  filter(weight != 0) %>%
-  as_tibble() %>%
-  count(pathway)
 
 # load expression
 expr = readRDS("output/progeny_benchmark/progeny_input.rds") %>%

diff --git a/data/progeny_benchmark/models/build_full_matrix.R b/data/progeny_benchmark/models/build_full_matrix.R
@@ -8,8 +8,8 @@ e = read_csv("data/progeny_benchmark/models/sub/full_model_extended.csv") %>%
   filter(pathway %in% c("Androgen", "Estrogen", "WNT"))
 
 # combine both matrices to a the full matrix containing all 14 pathways
-full_matrix = bind_rows(o, new_pws) %>%
+full_matrix = bind_rows(o, e) %>%
   drop_na()
 
 write_csv(full_matrix, 
-          "data/progeny_benchmark/models/progeny_matrix_human_full_v1.csv")
+          "data/progeny_benchmark/models/progeny_matrix_human_full_v1.csv")
diff --git a/fig/dorothea_benchmark/auroc_downsampled.rds b/fig/dorothea_benchmark/auroc_downsampled.rds
diff --git a/fig/dorothea_benchmark/global_tp_tn_ratio.rds b/fig/dorothea_benchmark/global_tp_tn_ratio.rds
diff --git a/fig/dorothea_benchmark/overlap_dorothea_trrust.rds b/fig/dorothea_benchmark/overlap_dorothea_trrust.rds
diff --git a/fig/dorothea_benchmark/table_dorothea_performance.rds b/fig/dorothea_benchmark/table_dorothea_performance.rds
diff --git a/fig/progeny_benchmark/table_progeny_performance.rds b/fig/progeny_benchmark/table_progeny_performance.rds
diff --git a/paper/auxilliary_files/Supplementary Table S3.png b/paper/auxilliary_files/Supplementary Table S3.png
diff --git a/paper/auxilliary_files/Supplementary Table S4.png b/paper/auxilliary_files/Supplementary Table S4.png
diff --git a/paper/figures/Figure1.png b/paper/figures/Figure1.png
diff --git a/paper/figures/FigureS4.png b/paper/figures/FigureS4.png
diff --git a/paper/figures/FigureS5.png b/paper/figures/FigureS5.png
diff --git a/paper/figures/sub/Figure1c.png b/paper/figures/sub/Figure1c.png