Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
christianholland committed Aug 30, 2019
2 parents bb06cab + e7875db commit 74e9f70
Show file tree
Hide file tree
Showing 22 changed files with 826 additions and 302 deletions.
58 changes: 54 additions & 4 deletions analyses/dorothea_benchmark/dorothea_benchmark_figures.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ library(plotrix)
library(ggsignif)
library(VennDiagram)
library(ggpubr)
theme_set(theme_cowplot())
```

```{r "dorothea benchmark data distribution"}
Expand Down Expand Up @@ -351,9 +353,13 @@ overlap_dorothea_trrust = draw.pairwise.venn(
length(unique(trrust$tf)),
length(intersect(dorothea$tf, trrust$tf)),
category = c("DoRothEA", "TRRUST"),
fill = c("#A11035","#0098A1"),
cex=2,
alpha=0.3
fill = c("#F6A800","#0098A1"),
cex=c(1,1,1),
fontfamily=c("sans", "sans", "sans"),
cat.fontfamily = c("sans", "sans"),
cat.cex = c(1,1),
cat.pos = c(-25,25)
# cat.just = list(c(3,0), c(-3,0))
) %>%
as_ggplot()
Expand Down Expand Up @@ -387,4 +393,48 @@ regulon_similarity_plot = similiartiy_scores %>%
saveRDS(regulon_similarity_plot,
"fig/dorothea_benchmark/regulon_similarity.rds")
```
```

```{r "table of global performance scores"}
pr_global = readRDS(
"output/dorothea_benchmark/dorothea_benchmark_result.rds"
) %>%
filter(organism == "mouse") %>%
unnest(pr) %>%
distinct(name, auc, tp, tn, confidence) %>%
mutate(random = tp / (tp + tn)) %>%
distinct(name, auc, confidence, random) %>%
mutate(metric = "AUPRC")
roc_global = readRDS(
"output/dorothea_benchmark/dorothea_benchmark_result.rds"
) %>%
filter(organism == "mouse") %>%
unnest(roc) %>%
distinct(name, auc, confidence) %>%
mutate(metric = "AUROC")
p = bind_rows(pr_global, roc_global) %>%
mutate(name = case_when(name == "dorothea" ~ "DoRothEA",
name == "trrust" ~ "TRRUST"))
r = p %>%
select(-auc) %>%
spread(metric, random) %>%
select(-AUROC, "random AUPRC" = "AUPRC")
t = p %>%
select(-random) %>%
spread(metric, auc) %>%
left_join(r) %>%
mutate_if(is.numeric, round,4) %>%
rename(Resource = name, Confidence = confidence) %>%
select(Resource, Confidence, AUROC, AUPRC, `random AUPRC`) %>%
arrange(Confidence)
table_dorothea_performance = gridExtra::tableGrob(t, row = NULL)
saveRDS(table_dorothea_performance, "fig/dorothea_benchmark/table_dorothea_performance.rds")
```
11 changes: 7 additions & 4 deletions analyses/dorothea_benchmark/dorothea_benchmark_pipeline.Rmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: Translate human regulons to mouse regulons
title: "Benchmark DoRothEA and TRRUST on TF perturbation data"
author: "Christian Holland"
date: "09/04/2018"
output: html_document
Expand All @@ -25,7 +25,7 @@ source("src/dorothea_analysis.R")
source("src/roc_curve_analysis.R")
```
### Benchmark
#### Build regulons
#### Build TRRUST regulons
```{r "build regulons", eval=F, echo=F}
# build human and moust TRRUST regulon
path_trrust_human = "data/dorothea_benchmark/regulons/trrust_rawdata.human.tsv"
Expand Down Expand Up @@ -55,8 +55,8 @@ write_csv(mouse_trrust_regulon,
"data/dorothea_benchmark/regulons/trrust_regulon_mouse.csv")
```

#### Consider confidence score in benchmarking
```{r "consider confidence score in benchmarking"}
#### Consider confidence score in benchmarking mouse-DoRothEA vs TRRUST
```{r "consider confidence score in benchmarking mouse-DoRothEA vs TRRUST"}
# load human regulons and subset regulons based on confidence score
human_dorothea_all = read_csv(
"data/dorothea_benchmark/regulons/dorothea_regulon_human_v1.csv"
Expand Down Expand Up @@ -245,6 +245,9 @@ dorothea_performance %>%
filter(confidence == "AB")
```




#### Similarity between mouse DoRothEA and mouse TRRUST
```{r "similarity between mouse dorothea and mouse trrust"}
calc_jaccard_index = function(x) {
Expand Down
84 changes: 65 additions & 19 deletions analyses/figure_arrangement.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,48 @@ knitr::knit(..., quiet = TRUE)
library(tidyverse)
library(cowplot)
library(magick)
theme_set(theme_cowplot())
```
## Arranging indiviual figures into a grid/collage
### Main figures
#### Figure 1 - Benchmark overview
```{r "Figure 1"}
fig1_1 = plot_grid(
fig1_top_left = plot_grid(
ggdraw() + draw_image("paper/figures/sub/Figure1a.png"),
labels = c("A"), label_size = 18
)
fig1_top_right_top = plot_grid(
ggdraw() + draw_image("paper/figures/sub/Figure1c.png"),
labels=c("A", "C"), label_size = 18
NULL,
ggdraw() + draw_image("paper/figures/sub/Figure1d.png"),
labels=c("C", "", "D"), ncol=3, label_size = 18,
rel_widths = c(1,0.05,1)
)
fig1 = plot_grid(
fig1_1,
ggdraw() + draw_image("paper/figures/sub/Figure1b.png"),
ncol=1, labels=c("", "B"), label_size = 18
fig1_top_right = plot_grid(
fig1_top_right_top,
ggdraw() + draw_image("paper/figures/sub/Figure1cd_legend.png"),
ncol=1, rel_heights = c(1,0.25), align = "h", axis="lr"
)
fig1_top = plot_grid(
fig1_top_left,NULL,fig1_top_right,
ncol=3,
rel_widths = c(1,0.05,1),
align = "v", axis = "tb"
)
ggsave("paper/figures/Figure1.png", fig1, width=10, height=10)
fig1 = plot_grid(
fig1_top + theme(plot.margin = margin(0,0,0.5,0,unit = "cm")),
ggdraw() + draw_image("paper/figures/sub/Figure1b.png") +
theme(plot.margin = margin(0.4, 0, 0, 0, unit = "cm")),
ncol=1, labels = c("", "B"),
label_size = 18
)
ggsave("paper/figures/Figure1.png", fig1, width=10, height=8)
```
#### Figure 2 - PROGENy benchmark results
```{r "Figure 2"}
Expand Down Expand Up @@ -101,30 +125,52 @@ figS3 = pr_curve_pw
ggsave("paper/figures/FigureS3.png", figS3, width = 10, height=10)
```

#### Figure S4 - DoRothEA properties + benchmark downsampled
#### Figure S4 - DoRothEA properties + comparison to TRRUST
```{r "Figure S4"}
regulon_properties = readRDS("fig/dorothea_benchmark/regulon_properties.rds")
tf_coverage = readRDS("fig/dorothea_benchmark/tf_coverage.rds")
global_tp_tn_ratio = readRDS("fig/dorothea_benchmark/global_tp_tn_ratio.rds")
auroc_downsampled = readRDS("fig/dorothea_benchmark/auroc_downsampled.rds")
overlap_dorothea_trrust = readRDS("fig/dorothea_benchmark/overlap_dorothea_trrust.rds")
regulon_similarity = readRDS("fig/dorothea_benchmark/regulon_similarity.rds")
aligned_plots = align_plots(regulon_properties, global_tp_tn_ratio,
aligned_plots = align_plots(regulon_properties, overlap_dorothea_trrust,
align="v", axis="l")
figS4_1 = plot_grid(aligned_plots[[1]], tf_coverage, labels = c("A", "B"),
figS4_1 = plot_grid(aligned_plots[[1]], tf_coverage, labels = c("A", "D"),
label_size=18, rel_widths = c(2,1), align="h", axis="bt")
figS4_2 = plot_grid(aligned_plots[[2]], auroc_downsampled,
labels = c("C", "D"), label_size=18,
figS4_2 = plot_grid(aligned_plots[[2]], regulon_similarity,
labels = c("B", "C"), label_size=18,
rel_widths = c(1,1), align="h", axis="bt")
figS4 = plot_grid(figS4_1, figS4_2, ncol=1)
ggsave("paper/figures/FigureS4.png", figS4, width = 10, height=10)
```

#### Figure S5 - Comparison between DoRothEA and TRRUST
#### Figure S5 - DoRothEA benchmark downsampled
```{r "Figure S5"}
overlap_dorothea_trrust = readRDS("fig/dorothea_benchmark/overlap_dorothea_trrust.rds")
regulon_similarity = readRDS("fig/dorothea_benchmark/regulon_similarity.rds")
figS5 = plot_grid(overlap_dorothea_trrust, regulon_similarity,
labels = c("A", "B"), label_size = 18)
global_tp_tn_ratio = readRDS("fig/dorothea_benchmark/global_tp_tn_ratio.rds")
auroc_downsampled = readRDS("fig/dorothea_benchmark/auroc_downsampled.rds")
figS5 = plot_grid(global_tp_tn_ratio, auroc_downsampled,
labels = c("A", "B"), label_size = 18, align = "h", axis = "t")
ggsave("paper/figures/FigureS5.png", figS5, width=10, height=5)
```

#### Supp Table S3
```{r}
p = readRDS("fig/progeny_benchmark/table_progeny_performance.rds")
progeny_table = plot_grid(p)
ggsave("paper/auxilliary_files/Supplementary Table S2.png",
progeny_table, width =6, height = 7.5)
```

#### Supp Table S4
```{r}
d = readRDS("fig/dorothea_benchmark/table_dorothea_performance.rds")
dorothea_table = plot_grid(d)
ggsave("paper/auxilliary_files/Supplementary Table S3.png",
dorothea_table, width =5, height = 3.25)
```
72 changes: 71 additions & 1 deletion analyses/progeny_benchmark/progeny_benchmark_figures.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -279,4 +279,74 @@ pr_curve_pw = ggplot(df_pr_pw, aes(x = recall, y=precision, color=organism)) +
lims(x=c(0,1), y=c(0,1))
saveRDS(pr_curve_pw, "fig/progeny_benchmark/pw_pr_curve.rds")
```
```

```{r "table of global performance scores"}
pr_global = readRDS(
"output/progeny_benchmark/progeny_benchmark_result_global.rds"
) %>%
unnest(pr) %>%
distinct(organism, auc, tp, tn) %>%
mutate(random = tp / (tp + tn)) %>%
distinct(organism, auc, random) %>%
mutate(metric = "AUPRC") %>%
mutate(evaluation = "Global")
pr_pw = readRDS(
"output/progeny_benchmark/progeny_benchmark_result_pw.rds"
) %>%
unnest(pr) %>%
distinct(pathway, organism, auc, tp, tn) %>%
mutate(random = tp / (tp + tn)) %>%
distinct(organism, auc, pathway, random) %>%
mutate(metric = "AUPRC") %>%
mutate(evaluation = "Pathway-wise")
roc_global = readRDS(
"output/progeny_benchmark/progeny_benchmark_result_global.rds"
) %>%
unnest(roc) %>%
distinct(organism, auc) %>%
mutate(metric = "AUROC") %>%
mutate(evaluation = "Global")
roc_pw = readRDS(
"output/progeny_benchmark/progeny_benchmark_result_pw.rds"
) %>%
unnest(roc) %>%
distinct(organism, auc, pathway) %>%
mutate(metric = "AUROC") %>%
mutate(evaluation = "Pathway-wise")
p = bind_rows(pr_global, pr_pw, roc_global, roc_pw)
r = p %>%
select(-auc) %>%
spread(metric, random) %>%
select(-AUROC, "random AUPRC" = "AUPRC")
t = p %>%
select(-random) %>%
spread(metric, auc) %>%
arrange(evaluation, pathway) %>%
left_join(r) %>%
mutate_if(is.numeric, round,4) %>%
mutate(organism = str_to_title(organism)) %>%
rename(Organism = organism,
Evaluation = evaluation,
Pathway = pathway) %>%
select(Organism, Evaluation, Pathway, AUROC, AUPRC, `random AUPRC`)
table_progeny_performance = gridExtra::tableGrob(t, row = NULL) %>%
gtable::gtable_add_grob(.,
grobs = grid::segmentsGrob( # line across the bottom
x0 = unit(0,"npc"),
y0 = unit(0,"npc"),
x1 = unit(1,"npc"),
y1 = unit(0,"npc"),
gp = grid::gpar(lwd = 2.0, lty = "dashed", color=2)),
t = 3, l = 1, r = ncol(.))
saveRDS(table_progeny_performance, "fig/progeny_benchmark/table_progeny_performance.rds")
```

7 changes: 1 addition & 6 deletions analyses/progeny_benchmark/progeny_benchmark_pipeline.Rmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: "Benchmark PROGENy on mouse and human single gene/drug perturbation data"
title: "Benchmark PROGENy on mouse and human pathway perturbation data"
author: "Christian Holland"
date: "05/03/2018"
output: html_document
Expand Down Expand Up @@ -49,11 +49,6 @@ M_human = read.csv("data/progeny_benchmark/models/progeny_matrix_human_v1.csv",
M_mouse = read.csv("data/progeny_benchmark/models/progeny_matrix_mouse_v1.csv",
row.names=1, check.names = F)
M_mouse %>% rownames_to_column("gene") %>%
gather(pathway, weight, -gene) %>%
filter(weight != 0) %>%
as_tibble() %>%
count(pathway)
# load expression
expr = readRDS("output/progeny_benchmark/progeny_input.rds") %>%
Expand Down
4 changes: 2 additions & 2 deletions data/progeny_benchmark/models/build_full_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ e = read_csv("data/progeny_benchmark/models/sub/full_model_extended.csv") %>%
filter(pathway %in% c("Androgen", "Estrogen", "WNT"))

# combine both matrices to a the full matrix containing all 14 pathways
full_matrix = bind_rows(o, new_pws) %>%
full_matrix = bind_rows(o, e) %>%
drop_na()

write_csv(full_matrix,
"data/progeny_benchmark/models/progeny_matrix_human_full_v1.csv")
"data/progeny_benchmark/models/progeny_matrix_human_full_v1.csv")
Binary file modified fig/dorothea_benchmark/auroc_downsampled.rds
Binary file not shown.
Binary file modified fig/dorothea_benchmark/global_tp_tn_ratio.rds
Binary file not shown.
Binary file modified fig/dorothea_benchmark/overlap_dorothea_trrust.rds
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/figures/Figure1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/figures/FigureS4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/figures/FigureS5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/figures/sub/Figure1c.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 74e9f70

Please sign in to comment.