Skip to content

Commit

Permalink
anomaly pursuit
Browse files Browse the repository at this point in the history
  • Loading branch information
dicook committed Mar 7, 2024
1 parent 133fee9 commit a273eb8
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 6 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export(animate_stars)
export(animate_stereo)
export(animate_trails)
export(animate_xy)
export(anomaly_index)
export(areColors)
export(basis_init)
export(basis_random)
Expand Down Expand Up @@ -66,6 +67,7 @@ export(frozen_guided_tour)
export(frozen_tour)
export(geodesic_path)
export(grand_tour)
export(guided_anomaly_tour)
export(guided_section_tour)
export(guided_tour)
export(holes)
Expand Down
14 changes: 14 additions & 0 deletions R/anomaly-pursuit.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#' Anomaly index.
#'
#' Calculates an index that looks for the best projection of
#' observations that are outside a pre-determined p-D ellipse.
#'
#' @export
anomaly_index <- function() {

function(mat, ell2d) {

mat_tab <- mean(sqrt(mahalanobis(mat, center=c(0,0), cov=ell2d)))
}
}

14 changes: 12 additions & 2 deletions R/display-xy.r
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#' @param obs_labels vector of text labels to display
#' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL. Useful for
#' comparing data with some null hypothesis
#' @param ellsize This can be considered the equivalent of a critical value, used to
#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1.
#' @param palette name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1"
#' @param ... other arguments passed on to \code{\link{animate}} and
#' \code{\link{display_xy}}
Expand Down Expand Up @@ -64,7 +66,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,
col = "black", pch = 20, cex = 1,
edges = NULL, edges.col = "black", edges.width=1,
obs_labels = NULL,
ellipse = NULL,
ellipse = NULL, ellsize = 1,
palette="Zissou 1", ...) {
# Needed for CRAN checks
labels <- NULL
Expand Down Expand Up @@ -151,7 +153,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,
if (nrow(ellipse) == nrow(proj)) {

# Project ellipse into 2D
evc <- eigen(ellipse)
evc <- eigen(ellipse*ellsize)
ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
e2 <- t(proj) %*% ellinv %*% proj
evc2 <- eigen(e2)
Expand All @@ -170,6 +172,14 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL,

lines(sph2d)

# Colour points outside the pD ellipse
mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse))
anomalies <- which(mdst > ellsize)
cat(length(anomalies), "\n")
points(x[anomalies,],
col = "red",
pch = 4,
cex = 2)
}
else
message("Check the variance-covariance matrix generating the ellipse\n")
Expand Down
92 changes: 92 additions & 0 deletions R/tour-guided-anomaly.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#' A guided anomaly tour path.
#'
#' The guided anomaly tour is a variation of the guided tour that is
#' using an ellipse to determine anomalies on which to select target planes.
#'
#' Usually, you will not call this function directly, but will pass it to
#' a method that works with tour paths like \code{\link{animate_slice}},
#' \code{\link{save_history}} or \code{\link{render}}.
#'
#' @param index_f the section pursuit index function to optimise. The function
#' needs to take two arguments, the projected data, indexes of anomalies.
#' @param d target dimensionality
#' @param alpha the initial size of the search window, in radians
#' @param cooling the amount the size of the search window should be adjusted
#' by after each step
#' @param search_f the search strategy to use
#' @param max.tries the maximum number of unsuccessful attempts to find
#' a better projection before giving up
#' @param max.i the maximum index value, stop search if a larger value is found
#' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL.
#' Useful for comparing data with some hypothesized null.
#' @param ellsize This can be considered the equivalent of a critical value, used to
#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1.
#' @param ... arguments sent to the search_f
#' @seealso \code{\link{slice_index}} for an example of an index functions.
#' \code{\link{search_geodesic}}, \code{\link{search_better}},
#' \code{\link{search_better_random}} for different search strategies
#' @export
#' @examples
#' animate_xy(flea[, 1:6], guided_anomaly_tour(anomaly_index(),
#' ellipse=cov(flea[,1:6])), ellipse=cov(flea[,1:6]), axes="off")
guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99,
max.tries = 25, max.i = Inf,
ellipse, ellsize=1,
search_f = search_geodesic, ...) {
h <- NULL

generator <- function(current, data, tries, ...) {
if (is.null(current)) {
return(basis_init(ncol(data), d))
}

if (is.null(h)) {
half_range <- compute_half_range(NULL, data, FALSE)
}

index <- function(proj) {
# Check which observations are outside pD ellipse
mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse))
anomalies <- which(mdst > ellsize)
stopifnot(length(anomalies) > 0)
# Project ellipse into 2D
evc <- eigen(ellipse)
ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors)
e2 <- t(proj) %*% ellinv %*% proj
evc2 <- eigen(e2)
ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors)
index_f(as.matrix(data[anomalies,]) %*% proj, ell2d)
}

cur_index <- index(current)

if (cur_index > max.i) {
cat("Found index ", cur_index, ", larger than selected maximum ", max.i, ". Stopping search.\n",
sep = ""
)
cat("Final projection: \n")
if (ncol(current) == 1) {
for (i in 1:length(current)) {
cat(sprintf("%.3f", current[i]), " ")
}
cat("\n")
}
else {
for (i in 1:nrow(current)) {
for (j in 1:ncol(current)) {
cat(sprintf("%.3f", current[i, j]), " ")
}
cat("\n")
}
}
return(NULL)
}

basis <- search_f(current, alpha, index, tries, max.tries, cur_index = cur_index, ...)
alpha <<- alpha * cooling

list(target = basis$target, index = index)
}

new_geodesic_path("guided", generator)
}
4 changes: 2 additions & 2 deletions R/tour-guided-section.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' a method that works with tour paths like \code{\link{animate_slice}},
#' \code{\link{save_history}} or \code{\link{render}}.
#'
#' @param index_f the section purusit index function to optimise. The function
#' @param index_f the section pursuit index function to optimise. The function
#' needs to take three arguments, the projected data, the vector of distances
#' from the current projection plane, and the slice thickness h.
#' @param d target dimensionality
Expand All @@ -19,7 +19,7 @@
#' a better projection before giving up
#' @param max.i the maximum index value, stop search if a larger value is found
#' @param v_rel relative volume of the slice. If not set, suggested value
#' is caluclated and printed to the screen.
#' is calculated and printed to the screen.
#' @param anchor A vector specifying the reference point to anchor the slice.
#' If NULL (default) the slice will be anchored at the data center.
#' @param ... arguments sent to the search_f
Expand Down
4 changes: 4 additions & 0 deletions man/display_xy.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/guided_section_tour.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a273eb8

Please sign in to comment.