Skip to content

Commit

Permalink
anomaly index working
Browse files Browse the repository at this point in the history
  • Loading branch information
dicook committed Mar 12, 2024
1 parent 45f5b73 commit da2e575
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 31 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ importFrom(graphics,rect)
importFrom(graphics,segments)
importFrom(graphics,stars)
importFrom(graphics,text)
importFrom(stats,mahalanobis)
importFrom(stats,na.omit)
importFrom(stats,qchisq)
importFrom(stats,quantile)
importFrom(stats,residuals)
importFrom(stats,rnorm)
Expand Down
4 changes: 2 additions & 2 deletions R/anomaly-pursuit.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
#' @export
anomaly_index <- function() {

function(mat, ell2d) {
function(mat, ell2d, ellmu2d) {

mat_tab <- #mean(mahal_dist(mat, ell2d))
mean(mahalanobis(mat, center=c(0,0), cov=ell2d))
mean(mahalanobis(mat, center=ellmu2d, cov=ell2d))
}
}

48 changes: 38 additions & 10 deletions R/display-xy.r
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
#' @param obs_labels vector of text labels to display
#' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL. Useful for
#' comparing data with some null hypothesis
#' @param ellsize This can be considered the equivalent of a critical value, used to
#' @param ellc This can be considered the equivalent of a critical value, used to
#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3.
#' @param ellmu This is the centre of the ellipse corresponding to the mean of the
#' normal population. Default vector of 0's
#' @param palette name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1"
#' @param shapeset numbers corresponding to shapes in base R points, to use for mapping
#' categorical variable to shapes, default=c(15:17, 23:25)
#' @param ... other arguments passed on to \code{\link{animate}} and
#' \code{\link{display_xy}}
#' @importFrom graphics legend
#' @importFrom stats mahalanobis qchisq
#' @export
#' @examples
#' animate_xy(flea[, 1:6])
Expand Down Expand Up @@ -68,7 +71,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,
col = "black", pch = 20, cex = 1,
edges = NULL, edges.col = "black", edges.width=1,
obs_labels = NULL,
ellipse = NULL, ellsize = 3,
ellipse = NULL, ellc = NULL, ellmu = NULL,
palette="Zissou 1", shapeset=c(15:17, 23:25), ...) {
# Needed for CRAN checks
labels <- NULL
Expand Down Expand Up @@ -154,36 +157,61 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,
if (!is.null(ellipse)) {
if (nrow(ellipse) == nrow(proj)) {

if (is.null(ellc))
ellc <- qchisq(0.95, nrow(proj))
else
stopifnot(ellc > 0) # Needs to be positive
if (is.null(ellmu))
ellmu <- rep(0, nrow(proj))
else
stopifnot(length(ellmu) == nrow(proj)) # Right dimension
message("Using ellc = ", format(ellc, digits = 2))

# Project ellipse into 2D
# Notation in paper: ellipse=A, ellinv=A^(-1),
# e2=P^TA^(-1)P, ell2d=B
evc <- eigen(ellipse*ellsize)
# ellipse=var-cov of normal pop
# ellinv for defining pD ellipse, for dist calc
# e2 is projected var-cov
# ell2d is B, used to project ellipse points
evc <- eigen(ellipse) #
ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
e2 <- t(proj) %*% ellinv %*% proj
e2 <- t(proj) %*% ellipse %*% proj
evc2 <- eigen(e2)
ell2d <- (evc2$vectors) %*% sqrt(diag(evc2$values)) %*% t(evc2$vectors)
e3 <- eigen(ell2d)
ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)
ell2d <- (evc2$vectors) %*% sqrt(diag(evc2$values*ellc)) %*% t(evc2$vectors)
#e3 <- eigen(ell2d)
#ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)


# Compute the points on an ellipse
# Generate points on a circle
sph <- geozoo::sphere.hollow(2, 200)$points
# Organise so lines connecting consecutive
# points creates the circle
sph <- sph[order(sph[,2]),]
sph1 <- sph[sph[,2]>=0,]
sph2 <- sph[sph[,2]<0,]
sph1 <- sph1[order(sph1[,1]),]
sph2 <- sph2[order(sph2[,1], decreasing=T),]
sph <- rbind(sph1, sph2)
sph <- rbind(sph, sph[1,])
sph2d <- sph%*%ell2dinv/half_range

# Transform circle points into an ellipse
sph2d <- sph%*%ell2d
# Centre on the given mean
ellmu2d <- t(as.matrix(ellmu)) %*% proj
sph2d <- sweep(sph2d, 2, ellmu2d, `+`)
# Scale ellipse into plot space
sph2d <- sph2d/half_range

lines(sph2d)

# Colour points outside the pD ellipse
mdst <- mahalanobis(data,
center=rep(0, ncol(data)),
center=ellmu,
cov=ellipse)
#mdst <- mahal_dist(data, ellipse)
anomalies <- which(mdst > ellsize)
anomalies <- which(mdst > ellc)
points(x[anomalies,],
col = "red",
pch = 4,
Expand Down
59 changes: 42 additions & 17 deletions R/tour-guided-anomaly.r
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,22 @@
#' @param max.i the maximum index value, stop search if a larger value is found
#' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL.
#' Useful for comparing data with some hypothesized null.
#' @param ellsize This can be considered the equivalent of a critical value, used to
#' @param ellc This can be considered the equivalent of a critical value, used to
#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3.
#' @param ellmu This is the centre of the ellipse corresponding to the mean of the
#' normal population. Default vector of 0's
#' @param ... arguments sent to the search_f
#' @seealso \code{\link{slice_index}} for an example of an index functions.
#' \code{\link{search_geodesic}}, \code{\link{search_better}},
#' \code{\link{search_better_random}} for different search strategies
#' @importFrom stats mahalanobis qchisq
#' @export
#' @examples
#' animate_xy(flea[, 1:6], guided_anomaly_tour(anomaly_index(),
#' ellipse=cov(flea[,1:6])), ellipse=cov(flea[,1:6]), axes="off")
guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99,
max.tries = 25, max.i = Inf,
ellipse, ellsize=3,
ellipse, ellc=NULL, ellmu=NULL,
search_f = search_geodesic, ...) {
h <- NULL

Expand All @@ -45,22 +48,44 @@ guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99,
}

index <- function(proj) {
# Check which observations are outside pD ellipse
mdst <- mahalanobis(data,
center=rep(0, ncol(data)),
cov=ellipse)
if (nrow(ellipse) == nrow(proj)) {

if (is.null(ellc))
ellc <- qchisq(0.95, nrow(proj))
else
stopifnot(ellc > 0) # Needs to be positive
if (is.null(ellmu))
ellmu <- rep(0, nrow(proj))
else
stopifnot(length(ellmu) == nrow(proj)) # Right dimension
message("Using ellc = ", format(ellc, digits = 2))

# Check which observations are outside pD ellipse
mdst <- mahalanobis(data,
center=ellmu,
cov=ellipse)
#mdst <- mahal_dist(data, ellipse)
anomalies <- which(mdst > ellsize)
stopifnot(length(anomalies) > 0)
# Project ellipse into 2D
evc <- eigen(ellipse)
ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
e2 <- t(proj) %*% ellinv %*% proj
evc2 <- eigen(e2)
ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors)
e3 <- eigen(ell2d)
ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)
index_f(as.matrix(data[anomalies,]) %*% proj, ell2dinv)
anomalies <- which(mdst > ellc)
stopifnot(length(anomalies) > 0)

# Project ellipse into 2D
evc <- eigen(ellipse) #
ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
e2 <- t(proj) %*% ellipse %*% proj
evc2 <- eigen(e2)
ell2d <- (evc2$vectors) %*% sqrt(diag(evc2$values*ellc)) %*% t(evc2$vectors)

ell2dinv <- (evc2$vectors) %*% diag(evc2$values*ellc) %*% t(evc2$vectors)
ellmu2d <- t(as.matrix(ellmu)) %*% proj
#evc <- eigen(ellipse)
#ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
#e2 <- t(proj) %*% ellinv %*% proj
#evc2 <- eigen(e2)
#ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors)
#e3 <- eigen(ell2d)
#ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)
index_f(as.matrix(data[anomalies,]) %*% proj, e2, ellmu2d)
}
}

cur_index <- index(current)
Expand Down
1 change: 1 addition & 0 deletions R/util.r
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ mapColors <- function(x, palette) {
#' Map vector of factors to pch
#'
#' @param x vector
#' @param shapeset vector of integers indicating point shapes
#' @export
mapShapes <- function(x, shapeset) {
n <- length(unique(x))
Expand Down
8 changes: 6 additions & 2 deletions man/display_xy.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/mapShapes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit da2e575

Please sign in to comment.