diff --git a/NEWS.md b/NEWS.md index 6c79f173..4fcb4801 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ * TeachingDemos removed as a Suggests, and replaced with aplpack for drawing Chernoff faces * addition of a pre-specified ellipse can be added to the 2D display * palette can now be a vector of values +* a new projection pursuit index for finding anomalies relative to +a null variance-covariance matrix. # tourr 1.1.0 diff --git a/R/anomaly-pursuit.r b/R/anomaly-pursuit.r index 92e30216..c3f1ff41 100644 --- a/R/anomaly-pursuit.r +++ b/R/anomaly-pursuit.r @@ -9,7 +9,7 @@ anomaly_index <- function() { function(mat, ell2d) { mat_tab <- #mean(mahal_dist(mat, ell2d)) - mean(sqrt(mahalanobis(mat, center=c(0,0), cov=ell2d))) + mean(mahalanobis(mat, center=c(0,0), cov=ell2d)) } } diff --git a/R/display-xy.r b/R/display-xy.r index bdccd7fb..837eba8d 100644 --- a/R/display-xy.r +++ b/R/display-xy.r @@ -153,11 +153,15 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL, if (nrow(ellipse) == nrow(proj)) { # Project ellipse into 2D + # Notation in paper: ellipse=A, ellinv=A^(-1), + # e2=P^TA^(-1)P, ell2d=B evc <- eigen(ellipse*ellsize) ellinv <- (evc$vectors) %*% diag(evc$values) %*% t(evc$vectors) e2 <- t(proj) %*% ellinv %*% proj evc2 <- eigen(e2) - ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors) + ell2d <- (evc2$vectors) %*% sqrt(diag(evc2$values)) %*% t(evc2$vectors) + e3 <- eigen(ell2d) + ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors) # Compute the points on an ellipse sph <- geozoo::sphere.hollow(2, 200)$points @@ -168,12 +172,14 @@ display_xy <- function(center = TRUE, axes = "center", half_range = NULL, sph2 <- sph2[order(sph2[,1], decreasing=T),] sph <- rbind(sph1, sph2) sph <- rbind(sph, sph[1,]) - sph2d <- sph%*%ell2d/half_range + sph2d <- sph%*%ell2dinv/half_range lines(sph2d) # Colour points outside the pD ellipse - mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse)) + mdst <- mahalanobis(data, + center=rep(0, ncol(data)), + cov=ellipse) #mdst <- mahal_dist(data, ellipse) anomalies <- which(mdst > ellsize) points(x[anomalies,], diff --git a/R/tour-guided-anomaly.r b/R/tour-guided-anomaly.r index fd166260..5e44a3ad 100644 --- a/R/tour-guided-anomaly.r +++ b/R/tour-guided-anomaly.r @@ -46,7 +46,9 @@ guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, index <- function(proj) { # Check which observations are outside pD ellipse - mdst <- sqrt(mahalanobis(data, center=rep(0, ncol(data)), cov=ellipse)) + mdst <- mahalanobis(data, + center=rep(0, ncol(data)), + cov=ellipse) #mdst <- mahal_dist(data, ellipse) anomalies <- which(mdst > ellsize) stopifnot(length(anomalies) > 0) @@ -56,7 +58,9 @@ guided_anomaly_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, e2 <- t(proj) %*% ellinv %*% proj evc2 <- eigen(e2) ell2d <- (evc2$vectors) %*% diag(sqrt(evc2$values)) %*% t(evc2$vectors) - index_f(as.matrix(data[anomalies,]) %*% proj, ell2d) + e3 <- eigen(ell2d) + ell2dinv <- (e3$vectors) %*% diag(e3$values) %*% t(e3$vectors) + index_f(as.matrix(data[anomalies,]) %*% proj, ell2dinv) } cur_index <- index(current)