From da2e575b59cf317650a8f44f83b4513474172fbd Mon Sep 17 00:00:00 2001
From: dicook <visnut@gmail.com>
Date: Tue, 12 Mar 2024 16:04:32 +1100
Subject: [PATCH] anomaly index working

---
 NAMESPACE               |  2 ++
 R/anomaly-pursuit.r     |  4 +--
 R/display-xy.r          | 48 ++++++++++++++++++++++++++-------
 R/tour-guided-anomaly.r | 59 +++++++++++++++++++++++++++++------------
 R/util.r                |  1 +
 man/display_xy.Rd       |  8 ++++--
 man/mapShapes.Rd        |  2 ++
 7 files changed, 93 insertions(+), 31 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index e9235996..dfd15912 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -136,7 +136,9 @@ importFrom(graphics,rect)
 importFrom(graphics,segments)
 importFrom(graphics,stars)
 importFrom(graphics,text)
+importFrom(stats,mahalanobis)
 importFrom(stats,na.omit)
+importFrom(stats,qchisq)
 importFrom(stats,quantile)
 importFrom(stats,residuals)
 importFrom(stats,rnorm)
diff --git a/R/anomaly-pursuit.r b/R/anomaly-pursuit.r
index c3f1ff41..48d86aeb 100644
--- a/R/anomaly-pursuit.r
+++ b/R/anomaly-pursuit.r
@@ -6,10 +6,10 @@
 #' @export
 anomaly_index <- function() {
 
-  function(mat, ell2d) {
+  function(mat, ell2d, ellmu2d) {
 
     mat_tab <- #mean(mahal_dist(mat, ell2d))
-      mean(mahalanobis(mat, center=c(0,0), cov=ell2d))
+      mean(mahalanobis(mat, center=ellmu2d, cov=ell2d))
   }
 }
 
diff --git a/R/display-xy.r b/R/display-xy.r
index 30bad6ee..3cd3963b 100644
--- a/R/display-xy.r
+++ b/R/display-xy.r
@@ -17,14 +17,17 @@
 #' @param obs_labels vector of text labels to display
 #' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL. Useful for
 #'        comparing data with some null hypothesis
-#' @param ellsize This can be considered the equivalent of a critical value, used to
+#' @param ellc This can be considered the equivalent of a critical value, used to
 #'        scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3.
+#' @param ellmu This is the centre of the ellipse corresponding to the mean of the
+#'        normal population. Default vector of 0's
 #' @param palette name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1"
 #' @param shapeset numbers corresponding to shapes in base R points, to use for mapping
 #'        categorical variable to shapes, default=c(15:17, 23:25)
 #' @param ...  other arguments passed on to \code{\link{animate}} and
 #'   \code{\link{display_xy}}
 #' @importFrom graphics legend
+#' @importFrom stats mahalanobis qchisq
 #' @export
 #' @examples
 #' animate_xy(flea[, 1:6])
@@ -68,7 +71,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,
                        col = "black", pch = 20, cex = 1,
                        edges = NULL, edges.col = "black", edges.width=1,
                        obs_labels = NULL,
-                       ellipse = NULL, ellsize = 3,
+                       ellipse = NULL, ellc = NULL, ellmu = NULL,
                        palette="Zissou 1", shapeset=c(15:17, 23:25), ...) {
   # Needed for CRAN checks
   labels <- NULL
@@ -154,19 +157,37 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,
     if (!is.null(ellipse)) {
       if (nrow(ellipse) == nrow(proj)) {
 
+        if (is.null(ellc))
+          ellc <- qchisq(0.95, nrow(proj))
+        else
+          stopifnot(ellc > 0) # Needs to be positive
+        if (is.null(ellmu))
+          ellmu <- rep(0, nrow(proj))
+        else
+          stopifnot(length(ellmu) == nrow(proj)) # Right dimension
+        message("Using ellc = ", format(ellc, digits = 2))
+
         # Project ellipse into 2D
         # Notation in paper: ellipse=A, ellinv=A^(-1),
         #                    e2=P^TA^(-1)P, ell2d=B
-        evc <- eigen(ellipse*ellsize)
+        # ellipse=var-cov of normal pop
+        # ellinv for defining pD ellipse, for dist calc
+        # e2 is projected var-cov
+        # ell2d is B, used to project ellipse points
+        evc <- eigen(ellipse) #
         ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
-        e2 <- t(proj) %*% ellinv %*% proj
+        e2 <- t(proj) %*% ellipse %*% proj
         evc2 <- eigen(e2)
-        ell2d <- (evc2$vectors) %*% sqrt(diag(evc2$values)) %*% t(evc2$vectors)
-        e3 <- eigen(ell2d)
-        ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)
+        ell2d <- (evc2$vectors) %*% sqrt(diag(evc2$values*ellc)) %*% t(evc2$vectors)
+        #e3 <- eigen(ell2d)
+        #ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)
+
 
         # Compute the points on an ellipse
+        # Generate points on a circle
         sph <- geozoo::sphere.hollow(2, 200)$points
+        # Organise so lines connecting consecutive
+        # points creates the circle
         sph <- sph[order(sph[,2]),]
         sph1 <- sph[sph[,2]>=0,]
         sph2 <- sph[sph[,2]<0,]
@@ -174,16 +195,23 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,
         sph2 <- sph2[order(sph2[,1], decreasing=T),]
         sph <- rbind(sph1, sph2)
         sph <- rbind(sph, sph[1,])
-        sph2d <- sph%*%ell2dinv/half_range
+
+        # Transform circle points into an ellipse
+        sph2d <- sph%*%ell2d
+        # Centre on the given mean
+        ellmu2d <- t(as.matrix(ellmu)) %*% proj
+        sph2d <- sweep(sph2d, 2, ellmu2d, `+`)
+        # Scale ellipse into plot space
+        sph2d <- sph2d/half_range
 
         lines(sph2d)
 
         # Colour points outside the pD ellipse
         mdst <- mahalanobis(data,
-                            center=rep(0, ncol(data)),
+                            center=ellmu,
                             cov=ellipse)
         #mdst <- mahal_dist(data, ellipse)
-        anomalies <- which(mdst > ellsize)
+        anomalies <- which(mdst > ellc)
         points(x[anomalies,],
                col = "red",
                pch = 4,
diff --git a/R/tour-guided-anomaly.r b/R/tour-guided-anomaly.r
index 5e44a3ad..11625b26 100644
--- a/R/tour-guided-anomaly.r
+++ b/R/tour-guided-anomaly.r
@@ -19,19 +19,22 @@
 #' @param max.i the maximum index value, stop search if a larger value is found
 #' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL.
 #'        Useful for comparing data with some hypothesized null.
-#' @param ellsize This can be considered the equivalent of a critical value, used to
+#' @param ellc This can be considered the equivalent of a critical value, used to
 #'        scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3.
+#' @param ellmu This is the centre of the ellipse corresponding to the mean of the
+#'        normal population. Default vector of 0's
 #' @param ... arguments sent to the search_f
 #' @seealso \code{\link{slice_index}} for an example of an index functions.
 #' \code{\link{search_geodesic}}, \code{\link{search_better}},
 #'   \code{\link{search_better_random}} for different search strategies
+#' @importFrom stats mahalanobis qchisq
 #' @export
 #' @examples
 #' animate_xy(flea[, 1:6], guided_anomaly_tour(anomaly_index(),
 #'   ellipse=cov(flea[,1:6])), ellipse=cov(flea[,1:6]), axes="off")
 guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99,
                                 max.tries = 25, max.i = Inf,
-                                ellipse, ellsize=3,
+                                ellipse, ellc=NULL, ellmu=NULL,
                                 search_f = search_geodesic, ...) {
   h <- NULL
 
@@ -45,22 +48,44 @@ guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99,
     }
 
     index <- function(proj) {
-      # Check which observations are outside pD ellipse
-      mdst <- mahalanobis(data,
-                          center=rep(0, ncol(data)),
-                          cov=ellipse)
+      if (nrow(ellipse) == nrow(proj)) {
+
+        if (is.null(ellc))
+          ellc <- qchisq(0.95, nrow(proj))
+        else
+          stopifnot(ellc > 0) # Needs to be positive
+        if (is.null(ellmu))
+          ellmu <- rep(0, nrow(proj))
+        else
+          stopifnot(length(ellmu) == nrow(proj)) # Right dimension
+        message("Using ellc = ", format(ellc, digits = 2))
+
+        # Check which observations are outside pD ellipse
+        mdst <- mahalanobis(data,
+                            center=ellmu,
+                            cov=ellipse)
       #mdst <- mahal_dist(data, ellipse)
-      anomalies <- which(mdst > ellsize)
-      stopifnot(length(anomalies) > 0)
-      # Project ellipse into 2D
-      evc <- eigen(ellipse)
-      ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
-      e2 <- t(proj) %*% ellinv %*% proj
-      evc2 <- eigen(e2)
-      ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors)
-      e3 <- eigen(ell2d)
-      ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)
-      index_f(as.matrix(data[anomalies,]) %*% proj, ell2dinv)
+        anomalies <- which(mdst > ellc)
+        stopifnot(length(anomalies) > 0)
+
+        # Project ellipse into 2D
+        evc <- eigen(ellipse) #
+        ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
+        e2 <- t(proj) %*% ellipse %*% proj
+        evc2 <- eigen(e2)
+        ell2d <- (evc2$vectors) %*% sqrt(diag(evc2$values*ellc)) %*% t(evc2$vectors)
+
+        ell2dinv <- (evc2$vectors) %*% diag(evc2$values*ellc) %*% t(evc2$vectors)
+        ellmu2d <- t(as.matrix(ellmu)) %*% proj
+        #evc <- eigen(ellipse)
+        #ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
+        #e2 <- t(proj) %*% ellinv %*% proj
+        #evc2 <- eigen(e2)
+        #ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors)
+        #e3 <- eigen(ell2d)
+        #ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors)
+        index_f(as.matrix(data[anomalies,]) %*% proj, e2, ellmu2d)
+      }
     }
 
     cur_index <- index(current)
diff --git a/R/util.r b/R/util.r
index 002bd315..c67e039e 100644
--- a/R/util.r
+++ b/R/util.r
@@ -136,6 +136,7 @@ mapColors <- function(x, palette) {
 #' Map vector of factors to pch
 #'
 #' @param x vector
+#' @param shapeset vector of integers indicating point shapes
 #' @export
 mapShapes <- function(x, shapeset) {
   n <- length(unique(x))
diff --git a/man/display_xy.Rd b/man/display_xy.Rd
index 7ad22e21..9e4f331d 100644
--- a/man/display_xy.Rd
+++ b/man/display_xy.Rd
@@ -17,7 +17,8 @@ display_xy(
   edges.width = 1,
   obs_labels = NULL,
   ellipse = NULL,
-  ellsize = 3,
+  ellc = NULL,
+  ellmu = NULL,
   palette = "Zissou 1",
   shapeset = c(15:17, 23:25),
   ...
@@ -52,9 +53,12 @@ If not set, defaults to maximum distance from origin to each row of data.}
 \item{ellipse}{pxp variance-covariance matrix defining ellipse, default NULL. Useful for
 comparing data with some null hypothesis}
 
-\item{ellsize}{This can be considered the equivalent of a critical value, used to
+\item{ellc}{This can be considered the equivalent of a critical value, used to
 scale the ellipse larger or smaller to capture more or fewer anomalies. Default 3.}
 
+\item{ellmu}{This is the centre of the ellipse corresponding to the mean of the
+normal population. Default vector of 0's}
+
 \item{palette}{name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1"}
 
 \item{shapeset}{numbers corresponding to shapes in base R points, to use for mapping
diff --git a/man/mapShapes.Rd b/man/mapShapes.Rd
index 7b3ab960..f8edd1f6 100644
--- a/man/mapShapes.Rd
+++ b/man/mapShapes.Rd
@@ -8,6 +8,8 @@ mapShapes(x, shapeset)
 }
 \arguments{
 \item{x}{vector}
+
+\item{shapeset}{vector of integers indicating point shapes}
 }
 \description{
 Map vector of factors to pch