-
Notifications
You must be signed in to change notification settings - Fork 0
/
goana_default.R
143 lines (113 loc) · 5.57 KB
/
goana_default.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
goana.default <- function(de, universe = NULL, species = "Hs", prior.prob = NULL, covariate=NULL, plot=FALSE, ...)
# Gene ontology analysis of DE genes
# Gordon Smyth and Yifang Hu
# Created 20 June 2014. Last modified 23 June 2016.
{
# Ensure de is a list
if(!is.list(de)) de <- list(DE = de)
# Stop if components of de are not vectors
if(!all(vapply(de,is.vector,TRUE))) stop("components of de should be vectors")
# Ensure gene IDs are of character mode
de <- lapply(de, as.character)
if(!is.null(universe)) universe <- as.character(universe)
# Ensure all gene sets have unique names
nsets <- length(de)
names(de) <- trimWhiteSpace(names(de))
NAME <- names(de)
i <- which(NAME == "" | is.na(NAME))
NAME[i] <- paste0("DE",i)
names(de) <- makeUnique(NAME)
# Fit trend in DE with respect to the covariate, combining all de lists
if(!is.null(covariate)) {
covariate <- as.numeric(covariate)
if(length(covariate) != length(covariate)) stop("universe and covariate must have same length")
isDE <- as.numeric(universe %in% unlist(de))
o <- order(covariate)
prior.prob <- covariate
span <- approx(x=c(20,200),y=c(1,0.5),xout=sum(isDE),rule=2)$y
prior.prob[o] <- tricubeMovingAverage(isDE[o],span=span)
if(plot) barcodeplot(covariate, index=(isDE==1), worm=TRUE, span.worm=span)
}
# Get access to package of GO terms
suppressPackageStartupMessages(OK <- requireNamespace("GO.db",quietly=TRUE))
if(!OK) stop("GO.db package required but not installed (or can't be loaded)")
# Get access to required annotation functions
suppressPackageStartupMessages(OK <- requireNamespace("AnnotationDbi",quietly=TRUE))
if(!OK) stop("AnnotationDbi package required but not installed (or can't be loaded)")
# Load appropriate organism package
orgPkg <- paste0("org.",species,".eg.db")
suppressPackageStartupMessages(OK <- requireNamespace(orgPkg,quietly=TRUE))
if(!OK) stop(orgPkg," package required but not not installed (or can't be loaded)")
# Get GO to Entrez Gene mappings
obj <- paste0("org.",species,".egGO2ALLEGS")
egGO2ALLEGS <- tryCatch(getFromNamespace(obj,orgPkg), error=function(e) FALSE)
if(is.logical(egGO2ALLEGS)) stop("Can't find gene ontology mappings in package ",orgPkg)
# Convert gene-GOterm mappings to data.frame and remove duplicate entries
if(is.null(universe)) {
EG.GO <- AnnotationDbi::toTable(egGO2ALLEGS)
d <- duplicated(EG.GO[,c("gene_id", "go_id", "Ontology")])
EG.GO <- EG.GO[!d, ]
universe <- unique(EG.GO$gene_id)
universe <- as.character(universe)
} else {
universe <- as.character(universe)
dup <- duplicated(universe)
if(!is.null(prior.prob)) {
if(length(prior.prob)!=length(universe)) stop("length(prior.prob) must equal length(universe)")
prior.prob <- rowsum(prior.prob,group=universe,reorder=FALSE)
n <- rowsum(rep_len(1L,length(universe)),group=universe,reorder=FALSE)
prior.prob <- prior.prob/n
}
universe <- universe[!dup]
m <- match(AnnotationDbi::Lkeys(egGO2ALLEGS),universe,0L)
universe <- universe[m]
if(!is.null(prior.prob)) prior.prob <- prior.prob[m]
AnnotationDbi::Lkeys(egGO2ALLEGS) <- universe
EG.GO <- AnnotationDbi::toTable(egGO2ALLEGS)
d <- duplicated(EG.GO[,c("gene_id", "go_id", "Ontology")])
EG.GO <- EG.GO[!d, ]
}
Total <- length(unique(EG.GO$gene_id))
if(Total<1L) stop("No genes found in universe")
# Check prior probabilities
if(!is.null(prior.prob)) {
if(length(prior.prob)!=length(universe)) stop("length(prior.prob) must equal length(universe)")
}
# Overlap with DE genes
isDE <- lapply(de, function(x) EG.GO$gene_id %in% x)
TotalDE <- lapply(isDE, function(x) length(unique(EG.GO$gene_id[x])))
nDE <- length(isDE)
if(length(prior.prob)) {
# Probability weight for each gene
m <- match(EG.GO$gene_id, universe)
PW2 <- list(prior.prob[m])
X <- do.call(cbind, c(N=1, isDE, PW=PW2))
} else
X <- do.call(cbind, c(N=1, isDE))
group <- paste(EG.GO$go_id, EG.GO$Ontology, sep=".")
S <- rowsum(X, group=group, reorder=FALSE)
P <- matrix(0, nrow = nrow(S), ncol = nsets)
if(length(prior.prob)) {
# Calculate average prior prob for each set
PW.ALL <- sum(prior.prob[universe %in% EG.GO$gene_id])
AVE.PW <- S[,"PW"]/S[,"N"]
W <- AVE.PW*(Total-S[,"N"])/(PW.ALL-S[,"N"]*AVE.PW)
# Wallenius' noncentral hypergeometric test
if(!requireNamespace("BiasedUrn",quietly=TRUE)) stop("BiasedUrn package required but is not installed (or can't be loaded)")
for(j in 1:nsets) for(i in 1:nrow(S))
P[i,j] <- BiasedUrn::pWNCHypergeo(S[i,1+j], S[i,"N"], Total-S[i,"N"], TotalDE[[j]], W[i],lower.tail=FALSE) + BiasedUrn::dWNCHypergeo(S[i,1+j], S[i,"N"], Total-S[i,"N"], TotalDE[[j]], W[i])
S <- S[,-ncol(S)]
} else {
# Fisher's exact test
for(j in 1:nsets)
P[,j] <- phyper(q=S[,1+j]-0.5,m=TotalDE[[j]],n=Total-TotalDE[[j]], k=S[,"N"],lower.tail=FALSE)
}
# Assemble output
g <- strsplit2(rownames(S),split="\\.")
TERM <- suppressMessages(AnnotationDbi::select(GO.db::GO.db,keys=g[,1],columns="TERM"))
Results <- data.frame(GOID = TERM[[1]], Term = TERM[[2]], Ont = g[,2], S, P, stringsAsFactors=FALSE)
rownames(Results) <- g[,1]
# Name p-value columns
colnames(Results)[4+nsets+(1L:nsets)] <- paste0("P.", names(de))
return(list(tbl = as_tibble(Results), gns = inner_join(tibble(gene_id = de$DE), tibble(EG.GO))))
}