diff --git a/DESCRIPTION b/DESCRIPTION index 91528567..d9759ce8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,9 @@ Suggests: rmarkdown, tidyr, covr, - plotly + plotly, + cassowaryr, + minerva License: MIT + file LICENSE LazyData: true URL: https://github.com/ggobi/tourr, https://ggobi.github.io/tourr/ diff --git a/NAMESPACE b/NAMESPACE index e0c6e112..9ba0d279 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,8 @@ S3method(plot,path_index) S3method(print,history_array) S3method(print,tour_path) S3method(str,history_array) +export(MIC) +export(TIC) export(anchored_orthogonal_distance) export(andrews) export(angular_breaks) @@ -41,6 +43,7 @@ export(center) export(cmass) export(cumulative_radial) export(dcor2d) +export(dcor2d_2) export(dependence_tour) export(display_andrews) export(display_density2d) @@ -77,6 +80,7 @@ export(lda_pp) export(linear_breaks) export(little_tour) export(local_tour) +export(loess2d) export(mahal_dist) export(manual_slice) export(mapColors) @@ -106,12 +110,14 @@ export(save_history) export(search_better) export(search_better_random) export(search_geodesic) +export(search_jellyfish) export(search_polish) export(search_posse) export(skewness) export(slice_index) export(sphere_data) export(splines2d) +export(stringy) export(thaw) importFrom(grDevices,dev.cur) importFrom(grDevices,dev.flush) diff --git a/R/animate.r b/R/animate.r index 9a4a8511..d767ed88 100644 --- a/R/animate.r +++ b/R/animate.r @@ -73,9 +73,11 @@ animate <- function(data, tour_path = grand_tour(), display = display_xy(), bases <- array(NA, c(ncol(data), ncol(start$target), bs)) # Initialise display - display$init(data) - display$render_frame() - display$render_data(data, start$proj) + if (!is.null(start$target)){ + display$init(data) + display$render_frame() + display$render_data(data, start$proj) + } b <- 0 i <- 0 @@ -93,19 +95,23 @@ animate <- function(data, tour_path = grand_tour(), display = display_xy(), bs <- 2 * bs } bases[, , b] <- step$target + } else if (is.null(step$proj)){ + break } - dev.hold() - on.exit(dev.flush()) - if (plat$os == "win" || plat$iface == "rstudio") { - display$render_frame() - } else { - display$render_transition() + if (!is.null(start$target)){ + dev.hold() + on.exit(dev.flush()) + if (plat$os == "win" || plat$iface == "rstudio") { + display$render_frame() + } else { + display$render_transition() + } + display$render_data(data, step$proj, step$target) + dev.flush() + if (step$step < 0) break # break after rendering final projection + Sys.sleep(1 / fps) } - display$render_data(data, step$proj, step$target) - dev.flush() - if (step$step < 0) break # break after rendering final projection - Sys.sleep(1 / fps) } }, interrupt = function(cond) { diff --git a/R/geodesic-path.r b/R/geodesic-path.r index 7e7a2e64..6e7c849b 100644 --- a/R/geodesic-path.r +++ b/R/geodesic-path.r @@ -35,6 +35,7 @@ new_geodesic_path <- function(name, generator, frozen = NULL, ...) { gen <- generator(current, data, tries, ...) target <- gen$target + if (inherits(target, "multi-bases")) return(list(target = target)) # generator has run out, so give up if (is.null(target)) { @@ -46,7 +47,6 @@ new_geodesic_path <- function(name, generator, frozen = NULL, ...) { return(NULL) } - #cat("generation: dist = ", dist, "\n") } list(ingred = geodesic_path(current, target, frozen, ...), index = gen$index, tries = tries) } @@ -61,7 +61,7 @@ new_geodesic_path <- function(name, generator, frozen = NULL, ...) { #' @export "print.tour_path" <- function(x, ...) { - cat("Tour path:", attr(x, "name"), "\n") + message("Tour path: ", attr(x, "name")) # params <- as.list(environment(get("generator", environment(g)))) # str(params) diff --git a/R/geodesic.r b/R/geodesic.r index fcb226b9..e6666767 100644 --- a/R/geodesic.r +++ b/R/geodesic.r @@ -99,7 +99,7 @@ geodesic_info <- function(Fa, Fz, epsilon = 1e-6) { } # if (Fa.equivalent(Fz)) return(); - # cat("dim Fa",nrow(Fa),ncol(Fa),"dim Fz",nrow(Fz),ncol(Fz),"\n") + # message("dim Fa",nrow(Fa),ncol(Fa),"dim Fz",nrow(Fz),ncol(Fz)) # Compute the SVD: Fa'Fz = Va lambda Vz' -------------------------------- sv <- svd(t(Fa) %*% Fz) diff --git a/R/interesting-indices.r b/R/interesting-indices.r index 9d399e69..160d665a 100644 --- a/R/interesting-indices.r +++ b/R/interesting-indices.r @@ -1,11 +1,45 @@ +#' Scagnostic indexes. +#' +#' Compute the scagnostic measures from the cassowaryr package +#' @export +stringy <- function(){ + function(mat){ + cassowaryr::sc_stringy(mat[,1], mat[,2]) + } +} + + +#' Maximum and total information coefficient index. +#' +#' Compute the maximum and total information coefficient indexes, +#' see \code{minerva::mine}. +#' +#' @rdname indexes +#' @export +MIC <- function(){ + function(mat){ + minerva::mine(mat[,1], mat[,2], alpha = 0.3, est = "mic_e")$MIC + } +} + +#' @rdname indexes +#' @export +TIC <- function(){ + function(mat){ + minerva::mine(mat[,1], mat[,2], est = "mic_e", alpha = 0.3)$TIC + } +} + #' Distance correlation index. #' -#' Computes the distance correlation based index on -#' 2D projections of the data. +#' Computes the distance correlation based index on 2D projections of the data. +#' \code{dcor2d_2} uses the faster implementation of the distance correlation +#' for bivariate data, see \code{energy::dcor2d}. #' #' @keywords hplot #' @importFrom stats na.omit #' @export +#' @rdname dcor dcor2d <- function() { function(mat) { xy <- na.omit(data.frame(x = mat[, 1], y = mat[, 2])) @@ -14,15 +48,26 @@ dcor2d <- function() { } } -#' Spline based index. +#' @rdname dcor +#' @export +dcor2d_2 <- function() { + function(mat) { + xy <- na.omit(data.frame(x = mat[, 1], y = mat[, 2])) + measure <- with(xy, energy::dcor2d(x, y, type = "U")) + return(measure) + } +} + +#' Spline/loess based index. #' #' Compares the variance in residuals of a fitted -#' spline model to the overall variance to find +#' spline/loess model to the overall variance to find #' functional dependence in 2D projections #' of the data. #' #' @keywords hplot #' @importFrom stats residuals var +#' @rdname spline-loess #' @export splines2d <- function() { function(mat) { @@ -38,6 +83,20 @@ splines2d <- function() { } } +#' @rdname spline-loess +#' @export +loess2d <- function() { + function(mat) { + mat <- as.data.frame(mat) + colnames(mat) <- c("x", "y") + loess_fit <- loess(y ~ x, data = mat, span = 0.05) + loess_fit2 <- loess(x ~ y, data = mat, span = 0.05) + measure <- max(1 - var(residuals(loess_fit), na.rm = T) / var(mat$y, na.rm = T), + 1 - var(residuals(loess_fit2), na.rm = T) / var(mat$y, na.rm = T) + ) + return(measure) + } +} #' Normality index. diff --git a/R/search-better.r b/R/search-better.r index f5565c43..f5e106eb 100644 --- a/R/search-better.r +++ b/R/search-better.r @@ -48,7 +48,6 @@ search_better <- function(current, alpha = 0.5, index, tries, max.tries = Inf,.. warning("cur_index is zero!") } - cat("Old", cur_index, "\n") try <- 1 while (try < max.tries) { @@ -70,7 +69,7 @@ search_better <- function(current, alpha = 0.5, index, tries, max.tries = Inf,.. if (new_index > cur_index) { - cat("New", new_index, "try", try, "\n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try) nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -83,24 +82,8 @@ search_better <- function(current, alpha = 0.5, index, tries, max.tries = Inf,.. try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) rcd_env[["record"]] <- dplyr::mutate( rcd_env[["record"]], @@ -139,8 +122,6 @@ search_better_random <- function(current, alpha = 0.5, index, tries, if (cur_index == 0) { warning("cur_index is zero!") } - - cat("Old", cur_index, "\n") try <- 1 while (try < max.tries) { new_basis <- basis_nearby(current, alpha, method) @@ -160,8 +141,7 @@ search_better_random <- function(current, alpha = 0.5, index, tries, ) if (new_index > cur_index) { - cat("New", new_index, "try", try, "\n") - cat("Accept \n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try, ", accept") nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -176,8 +156,8 @@ search_better_random <- function(current, alpha = 0.5, index, tries, rand <- stats::runif(1) if (prob > rand) { - cat("New", new_index, "try", try, "\n") - cat("Accept with probability, prob =", prob, "\n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try, + ", probabilistic accept p = ", sprintf("%.3f", prob)) nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -193,24 +173,8 @@ search_better_random <- function(current, alpha = 0.5, index, tries, try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) rcd_env[["record"]] <- dplyr::mutate( rcd_env[["record"]], diff --git a/R/search-frozen-geodesic.r b/R/search-frozen-geodesic.r index b4eabb6a..19318386 100644 --- a/R/search-frozen-geodesic.r +++ b/R/search-frozen-geodesic.r @@ -44,13 +44,13 @@ search_frozen_geodesic <- function(current, index, tries, max.tries = 5, n = 5, dig3 <- function(x) sprintf("%.3f", x) pdiff <- (new_index - cur_index) / cur_index if (pdiff > 0.001) { - cat("New index: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away)\n", sep = "") + message("New index: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away)", sep = "") current <<- new_basis cur_index <<- new_index return(list(target = new_basis[[1]])) } - cat("Best was: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away). Trying again...\n", sep = "") + message("Best was: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away). Try again...", sep = "") try <- try + 1 } diff --git a/R/search-geodesic.r b/R/search-geodesic.r index dcd38802..7830da36 100644 --- a/R/search-geodesic.r +++ b/R/search-geodesic.r @@ -58,44 +58,21 @@ search_geodesic <- function(current, alpha = 1, index, tries, max.tries = 5, ... warning("either the cur_index or the new_index is zero!") } else { pdiff <- (new_index - cur_index) / cur_index - - dig3 <- function(x) sprintf("%.3f", x) - - cat( - "Value ", dig3(new_index), " ", - sprintf("%.1f", pdiff * 100), "% better " - ) if (pdiff > 0.001 & proj_dist(current, new_basis[[1]]) > 1e-2) { # FIXME: pdiff should pbly be a changeable parameter - cat(" - NEW BASIS\n") - + message("Target: ", sprintf("%.3f", new_index), ", ", + sprintf("%.1f", pdiff * 100), "% better ") current <- new_basis cur_index <- new_index return(list(target = new_basis[[1]])) } - cat("\n") } try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) NULL } diff --git a/R/search-jellyfish.R b/R/search-jellyfish.R new file mode 100644 index 00000000..36e14664 --- /dev/null +++ b/R/search-jellyfish.R @@ -0,0 +1,84 @@ +#' An jellyfish optimisers for the projection pursuit guided tour +#' +#' @param current starting projection, a list of basis of class "multi-bases" +#' @param index index function +#' @param tries the counter of the outer loop of the opotimiser +#' @param max.tries the maximum number of iteration before giving up +#' @param ... other arguments being passed into the \code{search_jellyfish()} +#' @keywords optimize +#' @export +#' @examples +#' res <- animate_xy(flea[, 1:6], guided_tour(holes(), search_f = search_jellyfish)) +#' res +search_jellyfish <- function(current, index, tries, max.tries = 50, ...) { + rcd_env <- parent.frame(n = 4) + if (is.null(rcd_env[["record"]])) rcd_env <- parent.frame(n = 1) + best_jelly <- current[[attr(current, "best_id")]] + current_idx <- sapply(current, index) + + c_t = abs((1 - tries / max.tries) * (2 * runif(1) - 1)) + + if (c_t >= 0.5) { + trend = best_jelly - 3 * runif(1) * Reduce("+", current) / length(current) # eq 9 + target = lapply(current, function(x) { + orthonormalise(x + runif(1) * trend) + }) + } else if (runif(1) > (1 - c_t)) { + # type A passive + target = lapply(current, function(x) { + orthonormalise(x + 0.1 * runif(1)) + }) # eq 12 + } else { + # type B active + # generate random jelly fish j and its index value + update_typeB <- function(jelly_i, jellies) { + jelly_j = jellies[[sample(1:length(jellies), 1)]] + if (index(jelly_i) > index(jelly_j)) { + new_i = orthonormalise(jelly_i + runif(1) * (jelly_i - jelly_j)) + } else { + new_i = orthonormalise(jelly_i + runif(1) * (jelly_j - jelly_i)) # eq 16 + } + return(new_i) + } + + target = lapply(current, function(i) {update_typeB(i, current)}) + } + + target <- purrr::map2(current, target, correct_orientation) + target_idx <- sapply(target, index) + + # if the target is worse than current, use current + worse_id <- current_idx > target_idx + target[worse_id] <- current[worse_id] + target_idx[worse_id] <- current_idx[worse_id] + + best_id <- which.max(target_idx) + #message("Target: ", sprintf("%.3f", max(target_idx))) + attr(target, "best_id") <- best_id + class(target) <- c("multi-bases", class(target)) + + rcd_env[["record"]] <- dplyr::add_row( + rcd_env[["record"]], basis = unclass(target), + index_val = target_idx, info = "jellyfish_update", + tries = tries, method = "search_jellyfish", alpha = NA, + loop = 1:length(target) + ) + rcd_env[["record"]] <- dplyr::mutate( + rcd_env[["record"]], + info = ifelse(tries == max(tries) & loop == best_id, "current_best", info) + ) + + if (tries >= max.tries) { + print_final_proj(target[[attr(target, "best_id")]]) + rcd_env[["record"]] <- dplyr::mutate( + rcd_env[["record"]], + id = dplyr::row_number() + ) + NULL + } else{ + tries <- tries + 1 + return(list(target = target)) + } +} + + diff --git a/R/search_polish.r b/R/search_polish.r index 2661d5db..58f5c176 100644 --- a/R/search_polish.r +++ b/R/search_polish.r @@ -60,20 +60,20 @@ search_polish <- function(current, alpha = 0.5, index, tries, polish_max_tries = # check condition 1: the two bases can't be too close if (polish_dist < 1e-3) { - cat("The new basis is too close to the current one! \n") - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("The new basis is too close to the current one!") + message("current basis: ", current, "cur_index: ", sprintf("%.3f", cur_index)) return(list(target = current, alpha = alpha)) } # check condition 2: there needs to be certain improvement if (polish_pdiff < 1e-5) { - cat("The improvement is too small! \n") - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("The improvement is too small!") + message("current basis: ", current, "cur_index: ", sprintf("%.3f", cur_index)) return(list(target = current, alpha = alpha)) } - cat("better basis found, index_val = ", best_row$index_val, "\n") + message("better basis found, index_val = ", sprintf("%.3f", best_row$index_val)) cur_index <- best_row$index_val current <- best_row$basis[[1]] best_row <- dplyr::mutate(best_row, @@ -86,13 +86,13 @@ search_polish <- function(current, alpha = 0.5, index, tries, polish_max_tries = } else { polish_cooling <- polish_cooling * 0.95 alpha <- alpha * polish_cooling - cat("alpha gets updated to", alpha, "\n") + message("alpha gets updated to", alpha, "\n") # check condition 3: alpha can't be too small if (alpha < 0.01) { - cat("alpha is", alpha, "and it is too small! \n") - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("alpha is", alpha, "and it is too small!") + message("current basis: ", current, "cur_index: ", sprintf("%.3f", cur_index)) return(list(target = current, alpha = alpha)) } } @@ -100,26 +100,8 @@ search_polish <- function(current, alpha = 0.5, index, tries, polish_max_tries = try <- try + 1 } - cat("No better bases found after ", polish_max_tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } - - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("No better bases found after ", polish_max_tries, " tries. Giving up.") + print_final_proj(current) return(list(target = current, alpha = alpha)) } # globalVariables("index_val") diff --git a/R/search_posse.R b/R/search_posse.R index 1d878cc2..9845edb1 100644 --- a/R/search_posse.R +++ b/R/search_posse.R @@ -31,7 +31,7 @@ search_posse <- function(current, alpha = 0.5, index, tries, max.tries = 300, cu ) if (new_index > cur_index) { - cat("New", new_index, "try", try, "\n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try) nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -44,24 +44,8 @@ search_posse <- function(current, alpha = 0.5, index, tries, max.tries = 300, cu try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) NULL } diff --git a/R/section-pursuit.r b/R/section-pursuit.r index 8c4dce69..aad30f87 100644 --- a/R/section-pursuit.r +++ b/R/section-pursuit.r @@ -21,17 +21,17 @@ slice_index <- function(breaks_x, breaks_y, eps, bintype = "polar", power = 1, f reweight = FALSE, p = 4) { if (reweight) { if (bintype != "polar") { - cat("Reweighting is only defined for polar binning and will be ignored.") + message("Reweighting is only defined for polar binning and will be ignored.") } else { - cat(paste0("Reweighting assuming p=", p)) + message(paste0("Reweighting assuming p=", p)) } } resc <- 1 if (bintype == "polar") { resc <- 1 / (1 - (1 / 10)^(1 / power))^power - cat(paste0("Rescaling raw index by a factor ", resc)) + message(paste0("Rescaling raw index by a factor ", resc)) } function(mat, dists, h) { @@ -133,7 +133,7 @@ slice_binning <- function(mat, dists, h, breaks_x, breaks_y, bintype = "square") ybin <- cut(ang, breaks_y) } else { - cat(bintype, " is not a recognised bin type\n") + message(bintype, " is not a recognised bin type") return(0) } diff --git a/R/tour-guided-section.r b/R/tour-guided-section.r index 1b496a6c..e7898fe7 100644 --- a/R/tour-guided-section.r +++ b/R/tour-guided-section.r @@ -68,24 +68,9 @@ guided_section_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, cur_index <- index(current) if (cur_index > max.i) { - cat("Found index ", cur_index, ", larger than selected maximum ", max.i, ". Stopping search.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("Found index ", sprintf("%.3f", cur_index), ", + larger than selected maximum ", max.i, ". Stopping search.") + print_final_proj(current) return(NULL) } diff --git a/R/tour-guided.r b/R/tour-guided.r index 1b98e929..25fbe3cb 100644 --- a/R/tour-guided.r +++ b/R/tour-guided.r @@ -42,8 +42,9 @@ #' f <- flea_std[, 1:3] #' tries <- replicate(5, save_history(f, guided_tour(holes())), simplify = FALSE) #' } -guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = 25, - max.i = Inf, search_f = search_geodesic, n_sample = 100, ...) { +guided_tour <- function(index_f, d = 2, cooling = 0.99, max.tries = 25, + max.i = Inf, search_f = search_geodesic, + n_jellies = 30, n_sample = 100, alpha = 0.5,...) { generator <- function(current, data, tries, ...) { index <- function(proj) { index_f(as.matrix(data) %*% proj) @@ -51,80 +52,80 @@ guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = valid_fun <- c( "search_geodesic", "search_better", "search_better_random", - "search_polish", "search_posse" + "search_polish", "search_posse", "search_jellyfish" ) method <- valid_fun[vapply(valid_fun, function(x) { identical(get(x), search_f) }, logical(1))] if (is.null(current)) { - current <- basis_random(ncol(data), d) + if (method == "search_jellyfish"){ + current <- replicate(n_jellies, basis_random(ncol(data), d), simplify = FALSE) + cur_index <- sapply(current, index) + best_id <- which.max(cur_index) + attr(current, "best_id") <- best_id + class(current) <- c("multi-bases", class(current)) + + rcd_env <- parent.frame(n = 3) + rcd_env[["record"]] <- dplyr::add_row( + rcd_env[["record"]], basis = current[-best_id], + index_val = cur_index[-best_id], info = "initiation", + method = method, alpha = NA, tries = 1, loop = 1:(length(current)-1) + ) + rcd_env[["record"]] <- dplyr::add_row( + rcd_env[["record"]], basis = current[best_id], + index_val = cur_index[best_id], info = "initiation", + method = method, alpha = NA, tries = 1, loop = length(current) + ) + return(current) + } + current <- basis_random(ncol(data), d) cur_index <- index(current) tryCatch({ rcd_env <- parent.frame(n = 3) rcd_env[["record"]] <- dplyr::add_row( - rcd_env[["record"]], - basis = list(current), - index_val = cur_index, - info = "new_basis", - method = method, - alpha = formals(guided_tour)$alpha, - tries = 1, - loop = 1 + rcd_env[["record"]], basis = list(current), + index_val = cur_index, info = "new_basis", method = method, + alpha = formals(guided_tour)$alpha, tries = 1, loop = 1 ) }, error = function(e){ assign("record", - tibble::tibble(basis = list(), - index_val = numeric(), - info = character(), - method = character(), - alpha = numeric(), - tries = numeric(), - loop = numeric()), + tibble::tibble( + basis = list(), index_val = numeric(), info = character(), + method = character(), alpha = numeric(), tries = numeric(), + loop = numeric()), envir = parent.frame()) rcd_env[["record"]] <- tibble::tibble( - basis = list(current), - index_val = cur_index, - info = "new_basis", - method = method, - alpha = formals(guided_tour)$alpha, - tries = 1, - loop = 1) - } - ) + basis = list(current), index_val = cur_index, info = "new_basis", + method = method, alpha = formals(guided_tour)$alpha, tries = 1, + loop = 1 + ) + }) return(current) } - cur_index <- index(current) + if (inherits(current, "multi-bases")){ + best_id <- attr(current, "best_id") + cur_index <- max(sapply(current, index)) + } else{ + cur_index <- index(current) + } if (cur_index > max.i) { - cat("Found index ", cur_index, ", larger than selected maximum ", max.i, ". Stopping search.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("Found index ", sprintf("%.3f", cur_index), ", + larger than selected maximum ", max.i, ". Stopping search.") + print_final_proj(current) return(NULL) } # current, alpha = 1, index, max.tries = 5, n = 5, delta = 0.01, cur_index = NA, .. - basis <- search_f(current, alpha, index, tries, max.tries, cur_index = cur_index, frozen = frozen, n_sample = n_sample, ...) + basis <- search_f( + current, alpha = alpha, index = index, tries = tries, max.tries = max.tries, + cur_index = cur_index, frozen = frozen, n_sample = n_sample, ...) if (method == "search_posse") { if (!is.null(basis$h)) { diff --git a/R/tour.r b/R/tour.r index e3fd906b..76d38e5e 100644 --- a/R/tour.r +++ b/R/tour.r @@ -42,16 +42,16 @@ new_tour <- function(data, tour_path, start = NULL, ...) { geodesic <- NULL function(step_size, ...) { - # cat("target_dist - cur_dist:", target_dist - cur_dist, "\n") step <<- step + 1 cur_dist <<- cur_dist + step_size - if (target_dist == 0 & step > 1) { # should only happen for guided tour when no better basis is found (relative to starting plane) + multi_bases_stop <- inherits(start, "multi-bases") && is.null(proj[[1]]) + if ((target_dist == 0 && step > 1 )|| multi_bases_stop) { # should only happen for guided tour when no better basis is found (relative to starting plane) return(list(proj = tail(proj, 1)[[1]], target = target, step = -1)) # use negative step size to signal that we have reached the final target } # We're at (or past) the target, so generate a new one and reset counters - if (step_size > 0 & is.finite(step_size) & cur_dist >= target_dist) { + if (step_size > 0 && is.finite(step_size) && cur_dist >= target_dist && !inherits(start, "multi-bases")) { ## interrupt if (attr(tour_path, "name") == "guided") { @@ -60,21 +60,21 @@ new_tour <- function(data, tour_path, start = NULL, ...) { last_two <- tail(dplyr::filter(rcd_env[["record"]], info == "new_basis"), 2) if (last_two$index_val[1] > last_two$index_val[2]) { - # search_better_random may give probabilistic acceptance, leave it as it is + # search_better_random may give probabilistic acceptance, leave it as it is } else { interp <- dplyr::filter(rcd_env[["record"]], tries == max(tries), info == "interpolation") interp <- dplyr::filter(interp, index_val == max(index_val)) target <- dplyr::filter(rcd_env[["record"]], tries == max(tries), info == "new_basis") - # deem the target basis as the new current basis if the interpolation doesn't reach the target basis - # used when the index_f is not smooth + # deem the target basis as the new current basis if the interpolation doesn't reach the target basis + # used when the index_f is not smooth if (target$index_val > interp$index_val) { proj[[length(proj) + 1]] <<- geodesic$ingred$interpolate(1.) # make sure next starting plane is previous target target <- dplyr::mutate(target, info = "interpolation", loop = step + 1, alpha = NA) rcd_env[["record"]] <- dplyr::add_row(rcd_env[["record"]], target) } else if (target$index_val < interp$index_val & nrow(interp) != 0) { - # the interrupt + # the interrupt proj[[length(proj) + 1]] <<- interp$basis[[1]] rcd_env[["record"]] <- dplyr::filter( @@ -93,7 +93,7 @@ new_tour <- function(data, tour_path, start = NULL, ...) { info = "interpolation", tries = geodesic$tries, method = dplyr::last(rcd_env[["record"]]$method), - loop = step + 1 + loop = step + 1 ) rcd_env[["record"]] <- dplyr::mutate( @@ -109,6 +109,14 @@ new_tour <- function(data, tour_path, start = NULL, ...) { } if (cur_dist >= target_dist) { + if (inherits(proj[[1]], "multi-bases")){ + geodesic <<- tour_path(proj[[1]], data, ...) + proj <<- list(geodesic$target) + target_dist <<- 10 + cur_dist <<-10 + return(list(proj = proj[[1]], target = NULL, step = -1)) # use negative step size to signal that we have reached the final target + } + geodesic <<- tour_path(proj[[length(proj)]], data, ...) if (is.null(geodesic$ingred)) { return(list(proj = proj[[length(proj)]], target = target, step = -1)) # use negative step size to signal that we have reached the final target diff --git a/R/util.r b/R/util.r index c67e039e..e2750d10 100644 --- a/R/util.r +++ b/R/util.r @@ -1,3 +1,21 @@ +#' Print out the final projection basis +#' @param basis the projection basis to print, used in search_* functions +#' @keywords internal +print_final_proj <- function(basis){ + message("Final projection: ") + + if (ncol(basis) == 1) { + for (i in 1:length(basis)) { + message(sprintf("% .3f", basis[i]), " ") + } + } else { + for (i in 1:nrow(basis)) { + message(paste0(sprintf("% .3f", basis[i, ]), sep = " ")) + } + } +} + + #' Rescale a matrix or data frame #' #' Standardise each column to have range [0, 1] diff --git a/_pkgdown.yml b/_pkgdown.yml index eb873103..ee29c591 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,6 +52,8 @@ reference: - slice_index - norm_bin - norm_kol + - stringy + - MIC - title: Search functions desc: > Functions for index optimisation in guided tour diff --git a/man/dcor.Rd b/man/dcor.Rd new file mode 100644 index 00000000..6f9f9669 --- /dev/null +++ b/man/dcor.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interesting-indices.r +\name{dcor2d} +\alias{dcor2d} +\alias{dcor2d_2} +\title{Distance correlation index.} +\usage{ +dcor2d() + +dcor2d_2() +} +\description{ +Computes the distance correlation based index on 2D projections of the data. +\code{dcor2d_2} uses the faster implementation of the distance correlation +for bivariate data, see \code{energy::dcor2d}. +} +\keyword{hplot} diff --git a/man/dcor2d.Rd b/man/dcor2d.Rd deleted file mode 100644 index 9c5e68d4..00000000 --- a/man/dcor2d.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/interesting-indices.r -\name{dcor2d} -\alias{dcor2d} -\title{Distance correlation index.} -\usage{ -dcor2d() -} -\description{ -Computes the distance correlation based index on -2D projections of the data. -} -\keyword{hplot} diff --git a/man/guided_tour.Rd b/man/guided_tour.Rd index 43bff085..83375437 100644 --- a/man/guided_tour.Rd +++ b/man/guided_tour.Rd @@ -7,12 +7,13 @@ guided_tour( index_f, d = 2, - alpha = 0.5, cooling = 0.99, max.tries = 25, max.i = Inf, search_f = search_geodesic, + n_jellies = 30, n_sample = 100, + alpha = 0.5, ... ) } @@ -21,8 +22,6 @@ guided_tour( \item{d}{target dimensionality} -\item{alpha}{the initial size of the search window, in radians} - \item{cooling}{the amount the size of the search window should be adjusted by after each step} @@ -36,6 +35,8 @@ a better projection before giving up} \item{n_sample}{number of samples to generate if \code{search_f} is \code{\link{search_polish}}} +\item{alpha}{the initial size of the search window, in radians} + \item{...}{arguments sent to the search_f} } \description{ diff --git a/man/indexes.Rd b/man/indexes.Rd new file mode 100644 index 00000000..3549b6bc --- /dev/null +++ b/man/indexes.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interesting-indices.r +\name{MIC} +\alias{MIC} +\alias{TIC} +\title{Maximum and total information coefficient index.} +\usage{ +MIC() + +TIC() +} +\description{ +Compute the maximum and total information coefficient indexes, +see \code{minerva::mine}. +} diff --git a/man/print_final_proj.Rd b/man/print_final_proj.Rd new file mode 100644 index 00000000..c373d452 --- /dev/null +++ b/man/print_final_proj.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.r +\name{print_final_proj} +\alias{print_final_proj} +\title{Print out the final projection basis} +\usage{ +print_final_proj(basis) +} +\arguments{ +\item{basis}{the projection basis to print, used in search_* functions} +} +\description{ +Print out the final projection basis +} +\keyword{internal} diff --git a/man/search_jellyfish.Rd b/man/search_jellyfish.Rd new file mode 100644 index 00000000..c4fec8d1 --- /dev/null +++ b/man/search_jellyfish.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/search-jellyfish.R +\name{search_jellyfish} +\alias{search_jellyfish} +\title{An jellyfish optimisers for the projection pursuit guided tour} +\usage{ +search_jellyfish(current, index, tries, max.tries = 50, ...) +} +\arguments{ +\item{current}{starting projection, a list of basis of class "multi-bases"} + +\item{index}{index function} + +\item{tries}{the counter of the outer loop of the opotimiser} + +\item{max.tries}{the maximum number of iteration before giving up} + +\item{...}{other arguments being passed into the \code{search_jellyfish()}} +} +\description{ +An jellyfish optimisers for the projection pursuit guided tour +} +\examples{ +res <- animate_xy(flea[, 1:6], guided_tour(holes(), search_f = search_jellyfish)) +res +} +\keyword{optimize} diff --git a/man/splines2d.Rd b/man/spline-loess.Rd similarity index 72% rename from man/splines2d.Rd rename to man/spline-loess.Rd index 05cf368e..760ce288 100644 --- a/man/splines2d.Rd +++ b/man/spline-loess.Rd @@ -2,13 +2,16 @@ % Please edit documentation in R/interesting-indices.r \name{splines2d} \alias{splines2d} -\title{Spline based index.} +\alias{loess2d} +\title{Spline/loess based index.} \usage{ splines2d() + +loess2d() } \description{ Compares the variance in residuals of a fitted -spline model to the overall variance to find +spline/loess model to the overall variance to find functional dependence in 2D projections of the data. } diff --git a/man/stringy.Rd b/man/stringy.Rd new file mode 100644 index 00000000..fbfe7313 --- /dev/null +++ b/man/stringy.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interesting-indices.r +\name{stringy} +\alias{stringy} +\title{Scagnostic indexes.} +\usage{ +stringy() +} +\description{ +Compute the scagnostic measures from the cassowaryr package +}