diff --git a/NAMESPACE b/NAMESPACE index ad983aa3..586f7bb6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(animate_stars) export(animate_stereo) export(animate_trails) export(animate_xy) +export(anomaly_index) export(areColors) export(basis_init) export(basis_random) @@ -66,6 +67,7 @@ export(frozen_guided_tour) export(frozen_tour) export(geodesic_path) export(grand_tour) +export(guided_anomaly_tour) export(guided_section_tour) export(guided_tour) export(holes) diff --git a/R/anomaly-pursuit.r b/R/anomaly-pursuit.r new file mode 100644 index 00000000..5d8ce23e --- /dev/null +++ b/R/anomaly-pursuit.r @@ -0,0 +1,14 @@ +#' Anomaly index. +#' +#' Calculates an index that looks for the best projection of +#' observations that are outside a pre-determined p-D ellipse. +#' +#' @export +anomaly_index <- function() { + + function(mat, ell2d) { + + mat_tab <- mean(sqrt(mahalanobis(mat, center=c(0,0), cov=ell2d))) + } +} + diff --git a/R/display-xy.r b/R/display-xy.r index be632c70..4286589f 100644 --- a/R/display-xy.r +++ b/R/display-xy.r @@ -17,6 +17,8 @@ #' @param obs_labels vector of text labels to display #' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL. Useful for #' comparing data with some null hypothesis +#' @param ellsize This can be considered the equivalent of a critical value, used to +#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1. #' @param palette name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1" #' @param ... other arguments passed on to \code{\link{animate}} and #' \code{\link{display_xy}} @@ -64,7 +66,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL, col = "black", pch = 20, cex = 1, edges = NULL, edges.col = "black", edges.width=1, obs_labels = NULL, - ellipse = NULL, + ellipse = NULL, ellsize = 1, palette="Zissou 1", ...) { # Needed for CRAN checks labels <- NULL @@ -151,7 +153,7 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL, if (nrow(ellipse) == nrow(proj)) { # Project ellipse into 2D - evc <- eigen(ellipse) + evc <- eigen(ellipse*ellsize) ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors) e2 <- t(proj) %*% ellinv %*% proj evc2 <- eigen(e2) @@ -170,6 +172,14 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL, lines(sph2d) + # Colour points outside the pD ellipse + mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse)) + anomalies <- which(mdst > ellsize) + cat(length(anomalies), "\n") + points(x[anomalies,], + col = "red", + pch = 4, + cex = 2) } else message("Check the variance-covariance matrix generating the ellipse\n") diff --git a/R/tour-guided-anomaly.r b/R/tour-guided-anomaly.r new file mode 100644 index 00000000..a0b1c843 --- /dev/null +++ b/R/tour-guided-anomaly.r @@ -0,0 +1,92 @@ +#' A guided anomaly tour path. +#' +#' The guided anomaly tour is a variation of the guided tour that is +#' using an ellipse to determine anomalies on which to select target planes. +#' +#' Usually, you will not call this function directly, but will pass it to +#' a method that works with tour paths like \code{\link{animate_slice}}, +#' \code{\link{save_history}} or \code{\link{render}}. +#' +#' @param index_f the section pursuit index function to optimise. The function +#' needs to take two arguments, the projected data, indexes of anomalies. +#' @param d target dimensionality +#' @param alpha the initial size of the search window, in radians +#' @param cooling the amount the size of the search window should be adjusted +#' by after each step +#' @param search_f the search strategy to use +#' @param max.tries the maximum number of unsuccessful attempts to find +#' a better projection before giving up +#' @param max.i the maximum index value, stop search if a larger value is found +#' @param ellipse pxp variance-covariance matrix defining ellipse, default NULL. +#' Useful for comparing data with some hypothesized null. +#' @param ellsize This can be considered the equivalent of a critical value, used to +#' scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1. +#' @param ... arguments sent to the search_f +#' @seealso \code{\link{slice_index}} for an example of an index functions. +#' \code{\link{search_geodesic}}, \code{\link{search_better}}, +#' \code{\link{search_better_random}} for different search strategies +#' @export +#' @examples +#' animate_xy(flea[, 1:6], guided_anomaly_tour(anomaly_index(), +#' ellipse=cov(flea[,1:6])), ellipse=cov(flea[,1:6]), axes="off") +guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, + max.tries = 25, max.i = Inf, + ellipse, ellsize=1, + search_f = search_geodesic, ...) { + h <- NULL + + generator <- function(current, data, tries, ...) { + if (is.null(current)) { + return(basis_init(ncol(data), d)) + } + + if (is.null(h)) { + half_range <- compute_half_range(NULL, data, FALSE) + } + + index <- function(proj) { + # Check which observations are outside pD ellipse + mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse)) + anomalies <- which(mdst > ellsize) + stopifnot(length(anomalies) > 0) + # Project ellipse into 2D + evc <- eigen(ellipse) + ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors) + e2 <- t(proj) %*% ellinv %*% proj + evc2 <- eigen(e2) + ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors) + index_f(as.matrix(data[anomalies,]) %*% proj, ell2d) + } + + cur_index <- index(current) + + if (cur_index > max.i) { + cat("Found index ", cur_index, ", larger than selected maximum ", max.i, ". Stopping search.\n", + sep = "" + ) + cat("Final projection: \n") + if (ncol(current) == 1) { + for (i in 1:length(current)) { + cat(sprintf("%.3f", current[i]), " ") + } + cat("\n") + } + else { + for (i in 1:nrow(current)) { + for (j in 1:ncol(current)) { + cat(sprintf("%.3f", current[i, j]), " ") + } + cat("\n") + } + } + return(NULL) + } + + basis <- search_f(current, alpha, index, tries, max.tries, cur_index = cur_index, ...) + alpha <<- alpha * cooling + + list(target = basis$target, index = index) + } + + new_geodesic_path("guided", generator) +} diff --git a/R/tour-guided-section.r b/R/tour-guided-section.r index 158e21b9..1b496a6c 100644 --- a/R/tour-guided-section.r +++ b/R/tour-guided-section.r @@ -7,7 +7,7 @@ #' a method that works with tour paths like \code{\link{animate_slice}}, #' \code{\link{save_history}} or \code{\link{render}}. #' -#' @param index_f the section purusit index function to optimise. The function +#' @param index_f the section pursuit index function to optimise. The function #' needs to take three arguments, the projected data, the vector of distances #' from the current projection plane, and the slice thickness h. #' @param d target dimensionality @@ -19,7 +19,7 @@ #' a better projection before giving up #' @param max.i the maximum index value, stop search if a larger value is found #' @param v_rel relative volume of the slice. If not set, suggested value -#' is caluclated and printed to the screen. +#' is calculated and printed to the screen. #' @param anchor A vector specifying the reference point to anchor the slice. #' If NULL (default) the slice will be anchored at the data center. #' @param ... arguments sent to the search_f diff --git a/man/display_xy.Rd b/man/display_xy.Rd index d44ea0d8..6c7096ef 100644 --- a/man/display_xy.Rd +++ b/man/display_xy.Rd @@ -17,6 +17,7 @@ display_xy( edges.width = 1, obs_labels = NULL, ellipse = NULL, + ellsize = 1, palette = "Zissou 1", ... ) @@ -50,6 +51,9 @@ If not set, defaults to maximum distance from origin to each row of data.} \item{ellipse}{pxp variance-covariance matrix defining ellipse, default NULL. Useful for comparing data with some null hypothesis} +\item{ellsize}{This can be considered the equivalent of a critical value, used to +scale the ellipse larger or smaller to capture more or fewer anomalies. Default 1.} + \item{palette}{name of color palette for point colour, used by \code{\link{hcl.colors}}, default "Zissou 1"} \item{...}{other arguments passed on to \code{\link{animate}} and diff --git a/man/guided_section_tour.Rd b/man/guided_section_tour.Rd index 0f1a7d59..dc72ef58 100644 --- a/man/guided_section_tour.Rd +++ b/man/guided_section_tour.Rd @@ -18,7 +18,7 @@ guided_section_tour( ) } \arguments{ -\item{index_f}{the section purusit index function to optimise. The function +\item{index_f}{the section pursuit index function to optimise. The function needs to take three arguments, the projected data, the vector of distances from the current projection plane, and the slice thickness h.} @@ -35,7 +35,7 @@ a better projection before giving up} \item{max.i}{the maximum index value, stop search if a larger value is found} \item{v_rel}{relative volume of the slice. If not set, suggested value -is caluclated and printed to the screen.} +is calculated and printed to the screen.} \item{anchor}{A vector specifying the reference point to anchor the slice. If NULL (default) the slice will be anchored at the data center.}