From 735219d15437ceccd0165d12d02f1c7318938f9b Mon Sep 17 00:00:00 2001 From: dicook Date: Thu, 7 Mar 2024 18:54:53 +1100 Subject: [PATCH] anomaly pursuit is definitely working --- NAMESPACE | 1 + R/anomaly-pursuit.r | 3 ++- R/display-xy.r | 5 +++-- R/linear-algebra.r | 22 ++++++++++++++++++++++ R/tour-guided-anomaly.r | 5 +++-- man/display_xy.Rd | 4 ++-- 6 files changed, 33 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 586f7bb6..e9235996 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -77,6 +77,7 @@ export(lda_pp) export(linear_breaks) export(little_tour) export(local_tour) +export(mahal_dist) export(manual_slice) export(mapColors) export(mapShapes) diff --git a/R/anomaly-pursuit.r b/R/anomaly-pursuit.r index 5d8ce23e..92e30216 100644 --- a/R/anomaly-pursuit.r +++ b/R/anomaly-pursuit.r @@ -8,7 +8,8 @@ anomaly_index <- function() { function(mat, ell2d) { - mat_tab <- mean(sqrt(mahalanobis(mat, center=c(0,0), cov=ell2d))) + mat_tab <- #mean(mahal_dist(mat, ell2d)) + mean(sqrt(mahalanobis(mat, center=c(0,0), cov=ell2d))) } } diff --git a/R/display-xy.r b/R/display-xy.r index 4286589f..0e95d1a9 100644 --- a/R/display-xy.r +++ b/R/display-xy.r @@ -18,7 +18,7 @@ #' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL. Useful for #' comparing data with some null hypothesis #' @param ellsize This can be considered the equivalent of a critical value, used to -#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1. +#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3. #' @param palette name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1" #' @param ... other arguments passed on to \code{\link{animate}} and #' \code{\link{display_xy}} @@ -66,7 +66,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL, col = "black", pch = 20, cex = 1, edges = NULL, edges.col = "black", edges.width=1, obs_labels = NULL, - ellipse = NULL, ellsize = 1, + ellipse = NULL, ellsize = 3, palette="Zissou 1", ...) { # Needed for CRAN checks labels <- NULL @@ -174,6 +174,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL, # Colour points outside the pD ellipse mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse)) + #mdst <- mahal_dist(data, ellipse) anomalies <- which(mdst > ellsize) cat(length(anomalies), "\n") points(x[anomalies,], diff --git a/R/linear-algebra.r b/R/linear-algebra.r index 5b1b0236..1aa441ff 100644 --- a/R/linear-algebra.r +++ b/R/linear-algebra.r @@ -111,3 +111,25 @@ orthonormalise_by <- function(x, by) { #' @keywords algebra #' @export proj_dist <- function(x, y) sqrt(sum((x %*% t(x) - y %*% t(y))^2)) + +#' Calculate the Mahalanobis distance between points and center. +#' +#' Computes the Mahalanobis distance using a provided variance-covariance +#' matrix of observations from 0. +# +#' @param x matrix of data +#' @param vc pre-determined variance-covariance matrix +#' @keywords algebra +#' @export +mahal_dist <- function(x, vc) { + n <- dim(x)[1] + p <- dim(x)[2] + mn <- rep(0, p) + ev <- eigen(vc) + vcinv <- ev$vectors %*% diag(1/ev$values) %*% t(ev$vectors) + x <- x - matrix(rep(mn, n), ncol = p, byrow = T) + dx <- NULL + for (i in 1:n) + dx <- c(dx, x[i, ] %*% vcinv %*% as.matrix(x[i, ])) + return(dx) +} diff --git a/R/tour-guided-anomaly.r b/R/tour-guided-anomaly.r index a0b1c843..fd166260 100644 --- a/R/tour-guided-anomaly.r +++ b/R/tour-guided-anomaly.r @@ -20,7 +20,7 @@ #' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL. #' Useful for comparing data with some hypothesized null. #' @param ellsize This can be considered the equivalent of a critical value, used to -#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1. +#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3. #' @param ... arguments sent to the search_f #' @seealso \code{\link{slice_index}} for an example of an index functions. #' \code{\link{search_geodesic}}, \code{\link{search_better}}, @@ -31,7 +31,7 @@ #' ellipse=cov(flea[,1:6])), ellipse=cov(flea[,1:6]), axes="off") guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = 25, max.i = Inf, - ellipse, ellsize=1, + ellipse, ellsize=3, search_f = search_geodesic, ...) { h <- NULL @@ -47,6 +47,7 @@ guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, index <- function(proj) { # Check which observations are outside pD ellipse mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse)) + #mdst <- mahal_dist(data, ellipse) anomalies <- which(mdst > ellsize) stopifnot(length(anomalies) > 0) # Project ellipse into 2D diff --git a/man/display_xy.Rd b/man/display_xy.Rd index 6c7096ef..692ce629 100644 --- a/man/display_xy.Rd +++ b/man/display_xy.Rd @@ -17,7 +17,7 @@ display_xy( edges.width = 1, obs_labels = NULL, ellipse = NULL, - ellsize = 1, + ellsize = 3, palette = "Zissou 1", ... ) @@ -52,7 +52,7 @@ If not set, defaults to maximum distance from origin to each row of data.} comparing data with some null hypothesis} \item{ellsize}{This can be considered the equivalent of a critical value, used to -scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1.} +scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3.} \item{palette}{name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1"}