-
Notifications
You must be signed in to change notification settings - Fork 0
/
seq_analysis.R
113 lines (81 loc) · 2.99 KB
/
seq_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
library(pheatmap)
library(PoiClaClu) # poissson Distance
library(DESeq2)
library(ggplot2)
library(dplyr)
library(ggbeeswarm)
library(apeglm)
library(genefilter)
library(AnnotationDbi)
library(org.Hs.eg.db)
setwd("~/data_drive/Rnaseq_data")
res <- readRDS("EwS.rds")
counts <- assay(res)
meta <- as.data.frame (colData(res))
condition_df <- subset(meta,select = "condition")
# EDA analysis
#Deseq
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = condition_df,
design = ~condition)
dds <- DESeq(dds)
res <- results(dds,contrast = c("condition","shEF1","shCTR"))
res
summary(res)
# get the signnificant gene for down-regulation
resSig <- subset(res,padj <0.1)
head(resSig[ order(resSig$log2FoldChange), ])
# get the signicant gene for up -regulation
head(resSig[ order(resSig$log2FoldChange, decreasing = TRUE), ])
# get the top gene and plot using the plot counts
topGene <- rownames(res)[which.min(res$padj)]
plotCounts(dds, gene = topGene, intgroup=c("condition"))
# get the top gene and plot using the ggplot
geneCounts <- plotCounts(dds, gene = topGene, intgroup = c("condition"),
returnData = TRUE)
ggplot(geneCounts, aes(x = condition, y = count, color = condition)) +
scale_y_log10() + geom_beeswarm(cex = 2)
ggplot(geneCounts, aes(x = condition, y = count, color = condition, group = condition)) +
scale_y_log10() + geom_point(size = 3) + geom_line()
# MA plot by three techniques
resultsNames(dds)
res <- lfcShrink(dds, coef="condition_shEF1_vs_shCTR", type="apeglm")
plotMA(res, ylim = c(-5, 5))
resnos <- lfcShrink(dds, coef="condition_shEF1_vs_shCTR")
plotMA(resnos, ylim = c(-5, 5))
plotMA(res, ylim = c(-5,5))
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2)
text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue")
})
# histogram of p values
hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20,
col = "grey50", border = "white")
#Gene Clustering
# calculating the variance stabilizing transfromation
vsd <- vst(dds,blind = FALSE)
head(assay(vsd),3)
topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 20)
mat <- assay(vsd)[ topVarGenes, ]
mat <- mat - rowMeans(mat)
anno <- as.data.frame(colData(vsd)[, c("condition")])
rownames(anno) <- colnames(vsd)
colnames(anno)[1]<- "Condition"
pheatmap(mat, annotation_col = anno)
# Annotation using org.Hs.eg.db
columns(org.Hs.eg.db)
ens.str <- substr(rownames(res), 1, 15)
res$symbol <- mapIds(org.Hs.eg.db,
keys=ens.str,
column="SYMBOL",
keytype="ENSEMBL",
multiVals="first")
res$entrez <- mapIds(org.Hs.eg.db,
keys=ens.str,
column="ENTREZID",
keytype="ENSEMBL",
multiVals="first")
resOrdered <- res[order(res$pvalue),]
head(resOrdered)
results_ <- as.data.frame(resOrdered)