-
Notifications
You must be signed in to change notification settings - Fork 0
/
RNA-seq data analysis with edgeR.R
58 lines (40 loc) · 1.35 KB
/
RNA-seq data analysis with edgeR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# install required packages
BiocManager::install("edgeR")
# import important libraries
library(edgeR)
library(dplyr)
# read count matrix file
exprs <- get(load("dataFilt.RData"))
# Load clinical data
clinical_data <- read.csv("Clinical.csv")
clinical_data <- data.frame(clinical_data, row.names = T)
all(rownames(clinical_data) %in% colnames(exprs))
all(rownames(clinical_data) == colnames(exprs))
# build a count matrix
count_matrix <- as.matrix(exprs)
sample_info <- clinical_data$braf_status
# group samples
group = factor(sample_info)
group = relevel(group, ref="WT")
# Create DGE object
dim(dataFilt)
dge <- DGEList(counts = count_matrix, group = group)
dim(dataFilt)
# Calculate normalization factor
dge <- calcNormFactors(dge)
# Filter lowly- expressed genes
cutoff <- 1
drop <- which(apply(cpm(dge), 1, max) < cutoff)
dge <- dge[-drop,]
head(dge$counts)
# Estimate dispersion
dge <- estimateDisp(dge)
# Testing for DEG
test <- exactTest(dge) # If it is not working try test <- exactTest(dge,pair=c(1,2))
top_degs <- topTags(test,n="Inf")
top_degs <- top_degs$table
# Total number of differentially expressed genes at FDR < 0.05 and threshold value 1
summary(decideTests(test, lfc = 1))
# save degs file
write.csv(top_degs,file=" top_degs.csv")
save(top_degs, file="top_degs.RData")