-
Notifications
You must be signed in to change notification settings - Fork 1
/
pclda.R
executable file
·96 lines (87 loc) · 3.33 KB
/
pclda.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#' Create PCA-LDA model able to predict using original variables instead of PCs.
#'
#' @title PCA-LDA combination
#' @param x - original data to be transformed by PCA
#' @param y - target (grouping) variable for LDA
#' @param PCs - integer defining number of principal components (PCs) OR integer vector of PCs to use in LDA
#' @param center - center parameter to be sent to prcomp
#' @param scale - scale. parameter to be sent to prcomp
#' @param prior - vector of prior probabilities to use in lda
#'
#' @return Returns a list/pclda object of the following components:
#' PCs - principal components used for LDA
#' center - center used in PCA
#' scale - scale used in PCA
#' prior - priors used in LDA
#' scaling - rotation matrix/vector to project newdata to the space of LD(s)
#' means - group means in the space of original variables
#' lev - levels of grouping (\code{y}) variable
#' @author Rustam Guliev <glvrst@gmail.com>
#'
#' @examples
#' pclda_model <- pclda(iris[,-5],iris[,5],PCs = 2)
#' pr <- predict.pclda(pclda_model, iris[,-5])
#' table(iris$Species, pr$class)
#' plot(pr$x[,1], pr$x[,2], pch=16, col=iris$Species, xlab='LD 1', ylab = 'LD 2')
pclda <- function(x, y, PCs, center=TRUE, scale=FALSE, prior=NULL) {
# Prepare parameters
if (length(PCs) == 1) {
PCs <- 1:PCs
}
if (! is.factor(y)){
y <- factor(y)
}
if (is.null(prior)) {
prior <- rep(1/nlevels(y), nlevels(y))
names(prior) <- levels(y)
}
# PCA + LDA
pca <- prcomp(x, center = center, scale. = scale)
X <- pca$x[,PCs,drop=FALSE]
l <- lda(x = X, grouping=y, prior = prior)
# Prepare information for output. This is needed for predictions
means <- as.matrix(aggregate(x, by=list(y), mean, na.rm=TRUE)[,-1])
rownames(means) <- levels(y)
colnames(means) <- colnames(x)
# Return
res <- list(
PCs = PCs,
center = pca$center,
scale = pca$scale,
prior = l$prior,
scaling = pca$rotation[,PCs] %*% l$scaling[,,drop=F],
means = means,
lev = levels(y)
)
class(res) <- append(class(res),"pclda")
return(res)
}
predict.pclda <- function(object, newdata, prior = object$prior) {
if (is.null(dim(newdata))) {
dim(newdata) <- c(1L, length(newdata))
newdata <- as.matrix(newdata)
}
if (ncol(newdata) != ncol(object$means))
stop("wrong number of variables")
if (length(colnames(newdata)) > 0L && any(colnames(newdata) != dimnames(object$means)[[2L]]))
warning("variable names in 'newdata' do not match those in 'object'")
ng <- length(object$prior)
if (!missing(prior)) {
if (any(prior < 0) || round(sum(prior), 5) != 1)
stop("invalid 'prior'")
if (length(prior) != ng)
stop("'prior' is of incorrect length")
}
# PROJECT newdata to LDs
x <- scale(newdata, center = object$center, scale = object$scale) %*% object$scaling
dm <- scale(object$means, center = object$center, scale = object$scale) %*% object$scaling
# Calculate posteriors
dist <- matrix(0.5 * rowSums(dm^2) - log(prior), nrow(x), length(prior), byrow = TRUE) - x %*% t(dm)
dist <- exp(-(dist - apply(dist, 1L, min, na.rm = TRUE)))
posterior <- dist/drop(dist %*% rep(1, ng))
# Define predicted class
nm <- names(object$prior)
cl <- factor(nm[max.col(posterior)], levels = object$lev)
dimnames(posterior) <- list(rownames(x), nm)
list(class = cl, posterior = posterior, x = x)
}