-
Notifications
You must be signed in to change notification settings - Fork 0
/
RNAseq_Utils
154 lines (149 loc) · 8.92 KB
/
RNAseq_Utils
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# add gene ID to GO enrichment table (GOseq)
# to be uses only with significantly enriched GO categories
add_genes2GO<-function(GO, gene2cat, DE_genes, sep=",", colID="id"){
require(stringi)
gene2cat_DE<-gene2cat[gene2cat[,1]%in%DE_genes[,colID],]
Genes<-unlist(lapply(GO$category, function(x) stri_paste(gene2cat_DE[gene2cat_DE[,2]==x,][,1], collapse = sep)))
#genes_list<-stri_paste(gene2cat[gene2cat[,2]==GO,][,1], collapse = sep)
data<-cbind(GO, Genes)
return(data)
}
# add GO category to DEG table
# use all mapped categories
add_GO2genes<-function(DE_genes, gene2cat, sep=",", cat.name="GO_annotation", colID="id"){
require(stringi, dplyr)
gene2cat_DE<-gene2cat[gene2cat[,1]%in%DE_genes[,colID],]
GO<-str_split_fixed(unlist(lapply(DE_genes[,colID], function(x) paste(x, stri_paste(gene2cat_DE[gene2cat_DE[,1]==x,][,2], collapse = sep), sep="/tx"))), pattern = "/tx", n=2)
#genes_list<-stri_paste(gene2cat[gene2cat[,2]==GO,][,1], collapse = sep)
#data<-cbind(DE_genes, GO)
colnames(GO)[2]<-cat.name
data<-merge(DE_genes, GO, by.x=colID, by.y=1, all.x=T)
#data<-data %>% relocate(all_of(colID), .before = all_of(cat.name))
return(data)
}
GOCircleR<-function (data, title, nsub, rad1, rad2, table.legend = T, zsc.col,
lfc.col, label.size, label.fontface, lab.wrap, trunc.width)
{
xmax <- y1 <- zscore <- y2 <- ID <- logx <- logy2 <- logy <- logFC <- NULL
if (missing(title))
title <- ""
if (missing(nsub))
if (dim(data)[1] > 10)
nsub <- 10
else nsub <- dim(data)[1]
if (missing(rad1))
rad1 <- 2
if (missing(rad2))
rad2 <- 3
if (missing(zsc.col))
zsc.col <- c("red", "white", "blue")
if (missing(lfc.col))
lfc.col <- c("cornflowerblue", "firebrick1")
else lfc.col <- rev(lfc.col)
if (missing(label.size))
label.size = 5
if (missing(label.fontface))
label.fontface = "plain"
data$adj_pval <- -log(data$adj_pval, 10)
suby <- data[!duplicated(data$term), ]
if (missing(trunc.width))
trunc.width = 100
if (missing(lab.wrap))
lab.wrap<-90
else suby$term<-sapply(suby$term, function(x) str_trunc(paste(str_wrap(x,lab.wrap), collapse="\n"), width=trunc.width))
if (is.numeric(nsub) == T) {
suby <- suby[1:nsub, ]
}
else {
if (strsplit(nsub[1], ":")[[1]][1] == "GO") {
suby <- suby[suby$ID %in% nsub, ]
}
else {
suby <- suby[suby$term %in% nsub, ]
}
nsub <- length(nsub)
}
N <- dim(suby)[1]
r_pval <- round(range(suby$adj_pval), 0) + c(-2, 2)
ymax <- c()
for (i in 1:length(suby$adj_pval)) {
val <- (suby$adj_pval[i] - r_pval[1])/(r_pval[2] - r_pval[1])
ymax <- c(ymax, val)
}
df <- data.frame(x = seq(0, 10 - (10/N), length = N), xmax = rep(10/N -
0.2, N), y1 = rep(rad1, N), y2 = rep(rad2, N), ymax = ymax,
zscore = suby$zscore, term = paste(suby$term, "\n", "[", suby$ID, "]", sep=""))
scount <- data[!duplicated(data$term), which(colnames(data) ==
"count")][1:nsub]
idx_term <- which(!duplicated(data$term) == T)
xm <- c()
logs <- c()
for (sc in 1:length(scount)) {
idx <- c(idx_term[sc], idx_term[sc] + scount[sc] - 1)
val <- stats::runif(scount[sc], df$x[sc] + 0.06, (df$x[sc] +
df$xmax[sc] - 0.06))
xm <- c(xm, val)
r_logFC <- round(range(data$logFC[idx[1]:idx[2]]), 0) +
c(-1, 1)
for (lfc in idx[1]:idx[2]) {
val <- (data$logFC[lfc] - r_logFC[1])/(r_logFC[2] -
r_logFC[1])
logs <- c(logs, val)
}
}
cols <- c()
for (ys in 1:length(logs)) cols <- c(cols, ifelse(data$logFC[ys] >
0, "upregulated", "downregulated"))
dfp <- data.frame(logx = xm, logy = logs, logFC = factor(cols),
logy2 = rep(rad2, length(logs)))
c <- ggplot() + geom_rect(data = df, aes(xmin = x, xmax = x +
xmax, ymin = y1, ymax = y1 + ymax, fill = zscore), colour = "black") +
geom_rect(data = df, aes(xmin = x, xmax = x + xmax,
ymin = y2, ymax = y2 + 1), fill = "gray70") + geom_rect(data = df,
aes(xmin = x, xmax = x + xmax, ymin = y2 + 0.5, ymax = y2 +
0.5), colour = "white") + geom_rect(data = df, aes(xmin = x,
xmax = x + xmax, ymin = y2 + 0.25, ymax = y2 + 0.25),
colour = "white") + geom_rect(data = df, aes(xmin = x,
xmax = x + xmax, ymin = y2 + 0.75, ymax = y2 + 0.75),
colour = "white") + geom_text(data = df, aes(x = x +
(xmax/2), y = y2 + 1.6, label = term, angle = 360 - (x = x +
(xmax/2))/(10/360)), size = label.size, fontface = label.fontface) +
coord_polar() + labs(title = title) + ylim(1, rad2 +
1.6) + xlim(0, 10) + theme_blank + scale_fill_gradient2("z-score",
space = "Lab", low = zsc.col[3], mid = zsc.col[2], high = zsc.col[1],
guide = guide_colourbar(title.position = "top", title.hjust = 0.5),
breaks = c(min(df$zscore), max(df$zscore)), labels = c("decreasing",
"increasing")) + theme(legend.position = "bottom",
legend.background = element_rect(fill = "transparent"),
legend.box = "horizontal", legend.direction = "horizontal") +
geom_point(data = dfp, aes(x = logx, y = logy2 + logy),
pch = 21, fill = "transparent", colour = "black",
size = 3) + geom_point(data = dfp, aes(x = logx,
y = logy2 + logy, colour = logFC), size = 2.5) + scale_colour_manual(values = lfc.col,
guide = guide_legend(title.position = "top", title.hjust = 0.5))
if (table.legend) {
table <- draw_table(suby)
graphics::par(mar = c(0.1, 0.1, 0.1, 0.1))
grid.arrange(c, table, ncol = 2)
}
else {
c + theme(plot.background = element_rect(colour="white", fill = "white"),
panel.background = element_rect(colour="white", fill = "white"))
}
}
#### Convert in DAVID format to add z-scores and 'GOplot' compatibility ####
goseq2david<-function(goseq_df){
data<-goseq_df[, c(1,6,7,8,9)]
colnames(data)<-c("ID", "Term", "Category", "adj_pval", "Genes")
return(data)
}
deseq2david<-function(deseq_df, logFC_col, colID="id"){
data<-data.frame(ID=deseq_df[,colID], logFC=deseq_df[, logFC_col])
return(data)
}
# plot only legend from any ggplot2 plot (credits: from https://stackoverflow.com/questions/12539348/ggplot-separate-legend-and-plot)
gglegend <- function(x){
tmp <- ggplot_gtable(ggplot_build(x))
leg <- which(sapply(tmp$grobs, function(y) y$name) == "guide-box")
tmp$grobs[[leg]]
}