-
Notifications
You must be signed in to change notification settings - Fork 3
/
SIG_Array_QA_QC_Workflow.r
116 lines (83 loc) · 4.26 KB
/
SIG_Array_QA_QC_Workflow.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
## Load libraries and functions for array processing and QA/QC plots
source('./scripts/array_qa_qc_functions.r')
## Read the annotation spreadsheet into R
## You will have to change the directory path
annot_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/Bat_Virus_Array'
setwd(annot_dir)
sample_annot = read.xls('BatPlate_Annotation_editedMM.xlsx', header=T, as.is=T, na.strings=c(""," ", "NA", "#DIV/0!"))
rownames(sample_annot) = sample_annot$ID
## Check annotation dataframe
head(sample_annot[,1:5])
## Set directory where .CEL files are located, and get the list of files
## You will have to change the directory path
cel_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/Bat_Virus_Array/data'
cel_files = list.celfiles(cel_dir)
## Check file names
cel_files[1:3]
## Create sample names (these must match the annotation file)
sample_names = gsub("_2.CEL", "", cel_files)
## Check sample names
sample_names[1:5]
## Subset sample annotation dataframe
sample_annot = sample_annot[sample_names,]
length(sample_names) == dim(sample_annot)[1]
## Create a phenoData object
phenoData = new("AnnotatedDataFrame", data=sample_annot)
phenoData
## Load the raw expression data
raw.exprs = read.celfiles(file.path(cel_dir, cel_files), pkgname="pd.mogene.2.1.st",
sampleNames=sample_names, phenoData=phenoData)
head(pData(phenoData)[,1:5])
## Save raw expression to file in same directory as .CEL files
## This may be used as input for DE and Pathway analysis
save(raw.exprs, file=file.path(cel_dir, 'bat_virus_raw_exprs_2-FEB-2016.rda'))
## Create un-normalized ExpressionSet
bgcor.exprs = rma(raw.exprs, normalize=FALSE, target="core")
## Create normalized ExpressionSet
norm.exprs = rma(raw.exprs, normalize=TRUE, target="core")
## Check normalized expression matrix
exprs(norm.exprs)[1:5,1:5]
## Save normalized expression to file (optional)
#save(norm.exprs, file="bat_virus_array_normalized.rda")
describe(make.boxplot)
## Boxplot of un-normalized expression values
make.boxplot(bgcor.exprs, type = 'raw', order.by=c("Mating", "Number"), color.by="Mating", make.pdf=F)
## Boxplot of normalized expression values
make.boxplot(norm.exprs, type = 'norm', order.by=c("Mating", "Number"), color.by="Mating", make.pdf=F)
describe(plot.bac.spikes)
## The probe group file from Affy for the array
pg_file = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/MoGene-2_1-st.pgf'
## Create bacterial spike plot
plot.bac.spikes(norm.exprs, pg_file, make.pdf=F)
describe(plot.polya.spikes)
## Create polyA spike plot
plot.polya.spikes(norm.exprs, pg_file, make.pdf=F)
describe(make.heatmap)
## Create annotated heatmap (don't include MAQC in heatmap)
make.heatmap(norm.exprs[, norm.exprs$ID != 'MAQC'], base.factors=c('Sex', 'D4_percent'), make.pdf=F)
## Load MAQC Annotations
maqc_annot_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/MAQC'
maqc_annot = read.xls(file.path(maqc_annot_dir, 'maqc_annotation.xlsx'), header=T, as.is=T, na.strings=c(""," ", "NA", "#DIV/0!"))
rownames(maqc_annot) = maqc_annot$ID
## Set directory where MAQC .CEL files are located, and get the list of files
## You will have to change the directory path
maqc_cel_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/MAQC'
maqc_cel_files = list.celfiles(maqc_cel_dir)
## Check file names
maqc_cel_files[1:3]
## Create sample names (these must match the annotation file)
maqc_names = gsub(".CEL", "", maqc_cel_files)
## Create a phenoData object
maqcData = new("AnnotatedDataFrame", data=maqc_annot)
maqcData
## Load the MAQC raw expression data
maqc.raw.exprs = read.celfiles(file.path(maqc_cel_dir, maqc_cel_files), pkgname="pd.mogene.2.1.st",
sampleNames=maqc_names, phenoData=maqcData)
## Create un-normalized MAQC ExpressionSet
maqc.bgcor.exprs = rma(maqc.raw.exprs, normalize=FALSE, target="core")
## Create normalized MAQC ExpressionSet
maqc.norm.exprs = rma(maqc.raw.exprs, normalize=TRUE, target="core")
## Create boxplots
make.boxplot(maqc.bgcor.exprs, type = 'raw', order.by=c("Date.Downloaded"), color.by="Date.Downloaded", make.pdf=F)
## Create boxplots
make.boxplot(maqc.norm.exprs, type = 'norm', order.by=c("Date.Downloaded"), color.by="Date.Downloaded", make.pdf=F)