-
Notifications
You must be signed in to change notification settings - Fork 11
/
DTEG.R
219 lines (186 loc) · 9.97 KB
/
DTEG.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
library(DESeq2)
library(ggplot2)
## Input files
# Calculating differential translation genes (DTGs) requires the count matrices from Ribo-seq and RNA-seq.
# These should be the raw counts obtained from feature counts or any other tool for counting reads,
# they should not be normalized or batch corrected.
# It also requires a sample information file which should be in the same order as samples in the count
# matrices. It should include information on sequencing type, treatment, batch or any other covariate you
# need to model.
### Get count matrix files and sample information
args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)!=4) {
stop("Ribo-seq counts, RNA-seq counts and Sample Information and batch presence (0/1) should be supplied.n", call.=FALSE)
}
# Input filenames
ribo_file <- args[1]
rna_file <- args[2]
data_file <- args[3]
batch <- args[4]
# Read and merge count matrices
ribo <- read.delim(ribo_file)
rna <- read.delim(rna_file)
merge <- cbind(ribo,rna)
head(merge)
# Sample information file
coldata <- read.delim(data_file)
coldata <- as.data.frame(apply(coldata,2,as.factor))
head(coldata)
## Detecting differential translation regulation
### DESeq2 object with batch and interaction term in the design
if(batch == 1){
ddsMat <- DESeqDataSetFromMatrix(countData = merge,
colData = coldata, design =~ Batch + Condition + SeqType + Condition:SeqType)
}else if(batch == 0){
ddsMat <- DESeqDataSetFromMatrix(countData = merge,
colData = coldata, design =~ Condition + SeqType + Condition:SeqType)
}else{
stop("Batch presence should be indicated by 0 or 1 only", call.=FALSE)
}
ddsMat$SeqType = relevel(ddsMat$SeqType,"RNA")
ddsMat <- DESeq(ddsMat)
resultsNames(ddsMat)
system("mkdir Results")
setwd("Results")
system("mkdir fold_changes")
system("mkdir gene_lists")
# Choose the term you want to look at from resultsNames(ddsMat)
# Condition2.SeqTypeRibo.seq means Changes in Ribo-seq levels in Condition2 vs
# Condition1 accounting for changes in RNA-seq levels in Condition2 vs Condition1
res <- results(ddsMat, contrast=list("Condition2.SeqTypeRIBO"))
summary(res)
length(which(res$padj < 0.05))
write.table(rownames(res)[which(res$padj < 0.05)],"gene_lists/DTEGs.txt",quote=F,sep="\t",col.names = F,row.names = F)
write.table(res,"fold_changes/deltaTE.txt",quote=F,sep="\t",col.names = T,row.names = T)
pdf("Result_figures.pdf",useDingbats = F)
## Visualisation and interpretation
### DESeq2 object with batch for Ribo-seq
ind = which(coldata$SeqType == "RIBO")
coldata_ribo = coldata[ind,]
# PCA
if(batch == 1){
ddsMat_ribo <- DESeqDataSetFromMatrix(countData = ribo,
colData = coldata_ribo, design =~ Condition + Batch)
vsd <- vst(ddsMat_ribo)
pcaData <- plotPCA(vsd, intgroup=c("Condition", "Batch"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=Condition, shape=Batch)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))+ theme(aspect.ratio=1)
}else if(batch ==0){
ddsMat_ribo <- DESeqDataSetFromMatrix(countData = ribo,
colData = coldata_ribo, design =~ Condition)
vsd <- vst(ddsMat_ribo)
pcaData <- plotPCA(vsd, intgroup="Condition", returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=Condition)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))+ theme(aspect.ratio=1)
}
ddsMat_ribo <- DESeq(ddsMat_ribo)
res_ribo <- results(ddsMat_ribo, contrast=c("Condition","2","1"))
res_ribo <- lfcShrink(ddsMat_ribo, coef=2,res=res_ribo,type="apeglm")
write.table(res_ribo,"fold_changes/deltaRibo.txt",quote=F,sep="\t",col.names = T,row.names = T)
### DESeq2 object with batch for RNA-seq
ind = which(coldata$SeqType == "RNA")
coldata_rna = coldata[ind,]
# PCA
if(batch == 1){
ddsMat_rna <- DESeqDataSetFromMatrix(countData = rna,
colData = coldata_rna, design =~ Condition + Batch)
vsd <- vst(ddsMat_rna)
pcaData <- plotPCA(vsd, intgroup=c("Condition", "Batch"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=Condition, shape=Batch)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))+ theme(aspect.ratio=1)
}else if(batch ==0){
ddsMat_rna <- DESeqDataSetFromMatrix(countData = rna,
colData = coldata_rna, design =~ Condition)
vsd <- vst(ddsMat_rna)
pcaData <- plotPCA(vsd, intgroup="Condition", returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=Condition)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(aspect.ratio=1)
}
ddsMat_rna <- DESeq(ddsMat_rna)
res_rna <- results(ddsMat_rna, contrast=c("Condition","2","1"))
res_rna <- lfcShrink(ddsMat_rna, coef=2,type="apeglm",res=res_rna)
write.table(res_rna,"fold_changes/deltaRNA.txt",quote=F,sep="\t",col.names = T,row.names = T)
write.table(rownames(res_rna)[which(res_rna$padj < 0.05)],"gene_lists/DTG.txt",quote=F,sep="\t",col.names = F,row.names = F)
## Classes of genes
forwarded = rownames(res)[which(res$padj > 0.05 & res_ribo$padj < 0.05 & res_rna$padj < 0.05)]
write.table(forwarded,"gene_lists/forwarded.txt",quote=F,sep="\t",col.names = F,row.names = F)
exclusive = rownames(res)[which(res$padj < 0.05 & res_ribo$padj < 0.05 & res_rna$padj > 0.05)]
write.table(exclusive,"gene_lists/exclusive.txt",quote=F,sep="\t",col.names = F,row.names = F)
both = which(res$padj < 0.05 & res_ribo$padj < 0.05 & res_rna$padj < 0.05)
intensified = rownames(res)[both[which(res[both,2]*res_rna[both,2] > 0)]]
write.table(intensified,"gene_lists/intensified.txt",quote=F,sep="\t",col.names = F,row.names = F)
buffered = rownames(res)[both[which(res[both,2]*res_rna[both,2] < 0)]]
buffered = c(rownames(res)[which(res$padj < 0.05 & res_ribo$padj > 0.05 & res_rna$padj < 0.05)],buffered)
write.table(buffered,"gene_lists/buffered.txt",quote=F,sep="\t",col.names = F,row.names = F)
max_val = max(res_ribo[,2],res_rna[,2],na.rm = T)
plot(y=res_ribo[,2],x=res_rna[,2], xlab="RNA-seq log2 fold change",ylab = "Ribo-seq log2 fold change",asp=1,pch=16,col=rgb(128/255,128/255,128/255,0.1),ylim=c(-max_val,max_val),xlim=c(-max_val,max_val),cex=0.4)
abline(a=0,b=1,col="gray")
abline(h=0,v=0,col="gray")
points(y=res_ribo[forwarded,2],x=res_rna[forwarded,2],pch=16,col=rgb(0,0,1,1))
points(y=res_ribo[exclusive,2],x=res_rna[exclusive,2],pch=16,col=rgb(1,0,0,1))
points(y=res_ribo[intensified,2],x=res_rna[intensified,2],pch=16,col=rgb(1,0,1,1))
points(y=res_ribo[buffered,2],x=res_rna[buffered,2],pch=16,col=rgb(1,0,1,1))
### Examples for each class of genes
par(mfrow=c(2,2))
goi = forwarded[1]
y_u = max(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
y_l = min(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
plot(c(1,2),c(0,res[goi,2]),type="l",col="red",xaxt="n",
xlab="Conditions",ylim=c(y_l,y_u),ylab="Log2 Fold Change",
main="Forwarded gene")
lines(c(1,2),c(0,res_ribo[goi,2]),col="gray")
lines(c(1,2),c(0,res_rna[goi,2]),col="blue")
axis(1,at=c(1,2),labels=c(1,2),las=1)
legend("bottomleft",c("RNA","Ribo","RibOnly"), fill=c("blue","gray","red"),
cex=1, border = NA, bty="n")
goi = exclusive[1]
y_u = max(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
y_l = min(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
plot(c(1,2),c(0,res[goi,2]),type="l",col="red",xaxt="n",
xlab="Conditions",ylim=c(y_l,y_u),ylab="Log2 Fold Change",
main="Exclusive gene")
lines(c(1,2),c(0,res_ribo[goi,2]),col="gray")
lines(c(1,2),c(0,res_rna[goi,2]),col="blue")
axis(1,at=c(1,2),labels=c(1,2),las=1)
legend("bottomleft",c("RNA","Ribo","RibOnly"), fill=c("blue","gray","red"),
cex=1, border = NA, bty="n")
goi = buffered[1]
y_u = max(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
y_l = min(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
plot(c(1,2),c(0,res[goi,2]),type="l",col="red",xaxt="n",
xlab="Conditions",ylim=c(y_l,y_u),ylab="Log2 Fold Change",
main="Buffered gene")
lines(c(1,2),c(0,res_ribo[goi,2]),col="gray")
lines(c(1,2),c(0,res_rna[goi,2]),col="blue")
axis(1,at=c(1,2),labels=c(1,2),las=1)
legend("bottomleft",c("RNA","Ribo","RibOnly"), fill=c("blue","gray","red"),
cex=1, border = NA, bty="n")
goi = intensified[1]
y_u = max(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
y_l = min(res[goi,2],res_ribo[goi,2], res_rna[goi,2],0)
plot(c(1,2),c(0,res[goi,2]),type="l",col="red",xaxt="n",
xlab="Conditions",ylim=c(y_l,y_u),ylab="Log2 Fold Change",
main="Intensified gene")
lines(c(1,2),c(0,res_ribo[goi,2]),col="gray")
lines(c(1,2),c(0,res_rna[goi,2]),col="blue")
axis(1,at=c(1,2),labels=c(1,2),las=1)
legend("bottomleft",c("RNA","Ribo","RibOnly"), fill=c("blue","gray","red"),
cex=1, border = NA, bty="n")
dev.off()