From 31fe4649a6f055acf596cd205390e288d53a7492 Mon Sep 17 00:00:00 2001 From: huizezhang-sherry Date: Thu, 11 Jan 2024 13:59:02 +0100 Subject: [PATCH 1/6] implement the jellyfish optimiser --- R/animate.r | 32 +++++++++------- R/geodesic-path.r | 1 + R/search-jellyfish.R | 82 +++++++++++++++++++++++++++++++++++++++++ R/tour-guided.r | 88 ++++++++++++++++++++++---------------------- R/tour.r | 23 ++++++++---- 5 files changed, 162 insertions(+), 64 deletions(-) create mode 100644 R/search-jellyfish.R diff --git a/R/animate.r b/R/animate.r index 9a4a8511..d767ed88 100644 --- a/R/animate.r +++ b/R/animate.r @@ -73,9 +73,11 @@ animate <- function(data, tour_path = grand_tour(), display = display_xy(), bases <- array(NA, c(ncol(data), ncol(start$target), bs)) # Initialise display - display$init(data) - display$render_frame() - display$render_data(data, start$proj) + if (!is.null(start$target)){ + display$init(data) + display$render_frame() + display$render_data(data, start$proj) + } b <- 0 i <- 0 @@ -93,19 +95,23 @@ animate <- function(data, tour_path = grand_tour(), display = display_xy(), bs <- 2 * bs } bases[, , b] <- step$target + } else if (is.null(step$proj)){ + break } - dev.hold() - on.exit(dev.flush()) - if (plat$os == "win" || plat$iface == "rstudio") { - display$render_frame() - } else { - display$render_transition() + if (!is.null(start$target)){ + dev.hold() + on.exit(dev.flush()) + if (plat$os == "win" || plat$iface == "rstudio") { + display$render_frame() + } else { + display$render_transition() + } + display$render_data(data, step$proj, step$target) + dev.flush() + if (step$step < 0) break # break after rendering final projection + Sys.sleep(1 / fps) } - display$render_data(data, step$proj, step$target) - dev.flush() - if (step$step < 0) break # break after rendering final projection - Sys.sleep(1 / fps) } }, interrupt = function(cond) { diff --git a/R/geodesic-path.r b/R/geodesic-path.r index 7e7a2e64..6958d28d 100644 --- a/R/geodesic-path.r +++ b/R/geodesic-path.r @@ -35,6 +35,7 @@ new_geodesic_path <- function(name, generator, frozen = NULL, ...) { gen <- generator(current, data, tries, ...) target <- gen$target + if (inherits(target, "multi-bases")) return(list(target = target)) # generator has run out, so give up if (is.null(target)) { diff --git a/R/search-jellyfish.R b/R/search-jellyfish.R new file mode 100644 index 00000000..30d1df19 --- /dev/null +++ b/R/search-jellyfish.R @@ -0,0 +1,82 @@ +search_jellyfish <- function(current, alpha = 0.5, index, tries, + max.tries = Inf, ..., cur_index = NA) { + rcd_env <- parent.frame(n = 4) + if (is.null(rcd_env[["record"]])) rcd_env <- parent.frame(n = 1) + best_jelly <- current[[attr(current, "best_id")]] + + c_t = abs((1 - tries / max.tries) * (2 * runif(1) - 1)) + + if (c_t >= 0.5) { + trend = best_jelly - 3 * runif(1) * Reduce("+", current) / length(current) # eq 9 + target = lapply(current, function(x) { + orthonormalise(x + runif(1) * trend) + }) + } else if (runif(1) > (1 - c_t)) { + # type A passive + target = lapply(current, function(x) { + orthonormalise(x + 0.1 * runif(1) * 2) + }) # eq 12 + } else { + # type B active + # generate random jelly fish j and its index value + update_typeB <- function(jelly_i, jellies) { + jelly_j = jellies[[sample(1:length(jellies), 1)]] + if (index(jelly_i) > index(jelly_j)) { + new_i = orthonormalise(jelly_i + runif(1) * (jelly_i - jelly_j)) + } else { + new_i = orthonormalise(jelly_i + runif(1) * (jelly_j - jelly_i)) # eq 16 + } + return(new_i) + } + + target = lapply(current, function(i) {update_typeB(i, current)}) + } + + target_idx <- sapply(target, index) + best_id <- which.max(target_idx) + cat("Best Index: ", max(target_idx), "\n") + attr(target, "best_id") <- best_id + class(target) <- c("multi-bases", class(target)) + + rcd_env[["record"]] <- dplyr::add_row( + rcd_env[["record"]], basis = list(target[-best_id]), + index_val = target_idx[-best_id], info = "jellyfish_update", + tries = tries, method = "jellyfish_optimiser", alpha = NA + ) + rcd_env[["record"]] <- dplyr::add_row( + rcd_env[["record"]], basis = list(target[best_id]), + index_val = max(target_idx), info = "current_best", tries = tries, + method = "jellyfish_optimiser", alpha = NA + ) + + + if (diff(quantile(target_idx, c(0.05, 0.95))) < 0.05 || tries > max.tries) { + print_final_proj(target[[attr(target, "best_id")]]) + rcd_env[["record"]] <- dplyr::mutate( + rcd_env[["record"]], + id = dplyr::row_number() + ) + NULL + } else{ + tries <- tries + 1 + return(list(target = target)) + } +} + + +print_final_proj <- function(current){ + cat("Final projection: \n") + if (ncol(current) == 1) { + for (i in 1:length(current)) { + cat(sprintf("%.3f", current[i]), " ") + } + cat("\n") + } else { + for (i in 1:nrow(current)) { + for (j in 1:ncol(current)) { + cat(sprintf("%.3f", current[i, j]), " ") + } + cat("\n") + } + } +} diff --git a/R/tour-guided.r b/R/tour-guided.r index 1b98e929..2886fdfa 100644 --- a/R/tour-guided.r +++ b/R/tour-guided.r @@ -51,80 +51,80 @@ guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = valid_fun <- c( "search_geodesic", "search_better", "search_better_random", - "search_polish", "search_posse" + "search_polish", "search_posse", "search_jellyfish" ) method <- valid_fun[vapply(valid_fun, function(x) { identical(get(x), search_f) }, logical(1))] if (is.null(current)) { - current <- basis_random(ncol(data), d) + if (method == "search_jellyfish"){ + current <- replicate(30, basis_random(ncol(data), d), simplify = FALSE) + cur_index <- sapply(current, index) + best_id <- which.max(cur_index) + attr(current, "best_id") <- best_id + class(current) <- c("multi-bases", class(current)) + + rcd_env <- parent.frame(n = 3) + rcd_env[["record"]] <- dplyr::add_row( + rcd_env[["record"]], basis = current[-best_id], + index_val = cur_index[-best_id], info = "initiation", + method = method, alpha = NA, tries = 1, loop = 1 + ) + rcd_env[["record"]] <- dplyr::add_row( + rcd_env[["record"]], basis = current[best_id], + index_val = cur_index[best_id], info = "initiation", + method = method, alpha = NA, tries = 1, loop = 1 + ) + return(current) + } + current <- basis_random(ncol(data), d) cur_index <- index(current) tryCatch({ rcd_env <- parent.frame(n = 3) rcd_env[["record"]] <- dplyr::add_row( - rcd_env[["record"]], - basis = list(current), - index_val = cur_index, - info = "new_basis", - method = method, - alpha = formals(guided_tour)$alpha, - tries = 1, - loop = 1 + rcd_env[["record"]], basis = list(current), + index_val = cur_index, info = "new_basis", method = method, + alpha = formals(guided_tour)$alpha, tries = 1, loop = 1 ) }, error = function(e){ assign("record", - tibble::tibble(basis = list(), - index_val = numeric(), - info = character(), - method = character(), - alpha = numeric(), - tries = numeric(), - loop = numeric()), + tibble::tibble( + basis = list(), index_val = numeric(), info = character(), + method = character(), alpha = numeric(), tries = numeric(), + loop = numeric()), envir = parent.frame()) rcd_env[["record"]] <- tibble::tibble( - basis = list(current), - index_val = cur_index, - info = "new_basis", - method = method, - alpha = formals(guided_tour)$alpha, - tries = 1, - loop = 1) - } - ) + basis = list(current), index_val = cur_index, info = "new_basis", + method = method, alpha = formals(guided_tour)$alpha, tries = 1, + loop = 1 + ) + }) return(current) } - cur_index <- index(current) + if (inherits(current, "multi-bases")){ + best_id <- attr(current, "best_id") + cur_index <- max(sapply(current, index)) + } else{ + cur_index <- index(current) + } if (cur_index > max.i) { cat("Found index ", cur_index, ", larger than selected maximum ", max.i, ". Stopping search.\n", - sep = "" + sep = "" ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + print_final_proj(current) return(NULL) } # current, alpha = 1, index, max.tries = 5, n = 5, delta = 0.01, cur_index = NA, .. - basis <- search_f(current, alpha, index, tries, max.tries, cur_index = cur_index, frozen = frozen, n_sample = n_sample, ...) + basis <- search_f(current, alpha, index, tries, max.tries = max.tries, + cur_index = cur_index, frozen = frozen, n_sample = n_sample, ...) if (method == "search_posse") { if (!is.null(basis$h)) { diff --git a/R/tour.r b/R/tour.r index e3fd906b..bc3bf6e6 100644 --- a/R/tour.r +++ b/R/tour.r @@ -47,11 +47,12 @@ new_tour <- function(data, tour_path, start = NULL, ...) { step <<- step + 1 cur_dist <<- cur_dist + step_size - if (target_dist == 0 & step > 1) { # should only happen for guided tour when no better basis is found (relative to starting plane) + multi_bases_stop <- inherits(start, "multi-bases") && is.null(proj[[1]]) + if ((target_dist == 0 && step > 1 )|| multi_bases_stop) { # should only happen for guided tour when no better basis is found (relative to starting plane) return(list(proj = tail(proj, 1)[[1]], target = target, step = -1)) # use negative step size to signal that we have reached the final target } # We're at (or past) the target, so generate a new one and reset counters - if (step_size > 0 & is.finite(step_size) & cur_dist >= target_dist) { + if (step_size > 0 && is.finite(step_size) && cur_dist >= target_dist && !inherits(start, "multi-bases")) { ## interrupt if (attr(tour_path, "name") == "guided") { @@ -60,21 +61,21 @@ new_tour <- function(data, tour_path, start = NULL, ...) { last_two <- tail(dplyr::filter(rcd_env[["record"]], info == "new_basis"), 2) if (last_two$index_val[1] > last_two$index_val[2]) { - # search_better_random may give probabilistic acceptance, leave it as it is + # search_better_random may give probabilistic acceptance, leave it as it is } else { interp <- dplyr::filter(rcd_env[["record"]], tries == max(tries), info == "interpolation") interp <- dplyr::filter(interp, index_val == max(index_val)) target <- dplyr::filter(rcd_env[["record"]], tries == max(tries), info == "new_basis") - # deem the target basis as the new current basis if the interpolation doesn't reach the target basis - # used when the index_f is not smooth + # deem the target basis as the new current basis if the interpolation doesn't reach the target basis + # used when the index_f is not smooth if (target$index_val > interp$index_val) { proj[[length(proj) + 1]] <<- geodesic$ingred$interpolate(1.) # make sure next starting plane is previous target target <- dplyr::mutate(target, info = "interpolation", loop = step + 1, alpha = NA) rcd_env[["record"]] <- dplyr::add_row(rcd_env[["record"]], target) } else if (target$index_val < interp$index_val & nrow(interp) != 0) { - # the interrupt + # the interrupt proj[[length(proj) + 1]] <<- interp$basis[[1]] rcd_env[["record"]] <- dplyr::filter( @@ -93,7 +94,7 @@ new_tour <- function(data, tour_path, start = NULL, ...) { info = "interpolation", tries = geodesic$tries, method = dplyr::last(rcd_env[["record"]]$method), - loop = step + 1 + loop = step + 1 ) rcd_env[["record"]] <- dplyr::mutate( @@ -109,6 +110,14 @@ new_tour <- function(data, tour_path, start = NULL, ...) { } if (cur_dist >= target_dist) { + if (inherits(proj[[1]], "multi-bases")){ + geodesic <<- tour_path(proj[[1]], data, ...) + proj <<- list(geodesic$target) + target_dist <<- 10 + cur_dist <<-10 + return(list(proj = proj[[1]], target = NULL, step = -1)) # use negative step size to signal that we have reached the final target + } + geodesic <<- tour_path(proj[[length(proj)]], data, ...) if (is.null(geodesic$ingred)) { return(list(proj = proj[[length(proj)]], target = target, step = -1)) # use negative step size to signal that we have reached the final target From a9f2e15c9c9e7145966263e5b2fe6fe966e2b9dc Mon Sep 17 00:00:00 2001 From: huizezhang-sherry Date: Fri, 12 Jan 2024 13:23:22 +0100 Subject: [PATCH 2/6] documentation and small changes --- NAMESPACE | 1 + R/search-jellyfish.R | 32 ++++++++++++++++++++++---------- R/tour-guided.r | 11 ++++++----- man/display_idx.Rd | 12 ------------ man/guided_tour.Rd | 1 + man/search_jellyfish.Rd | 27 +++++++++++++++++++++++++++ 6 files changed, 57 insertions(+), 27 deletions(-) create mode 100644 man/search_jellyfish.Rd diff --git a/NAMESPACE b/NAMESPACE index ad983aa3..47466dc0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -103,6 +103,7 @@ export(save_history) export(search_better) export(search_better_random) export(search_geodesic) +export(search_jellyfish) export(search_polish) export(search_posse) export(slice_index) diff --git a/R/search-jellyfish.R b/R/search-jellyfish.R index 30d1df19..8202be34 100644 --- a/R/search-jellyfish.R +++ b/R/search-jellyfish.R @@ -1,5 +1,16 @@ -search_jellyfish <- function(current, alpha = 0.5, index, tries, - max.tries = Inf, ..., cur_index = NA) { +#' An jellyfish optimisers for the projection pursuit guided tour +#' +#' @param current starting projection, a list of basis of class "multi-bases" +#' @param index index function +#' @param tries the counter of the outer loop of the opotimiser +#' @param max.tries maximum number of iteration before giving up +#' @param ... other arguments being passed into the \code{search_jellyfish()} +#' @keywords optimize +#' @export +#' @examples +#' res <- animate_xy(flea[, 1:6], guided_tour(holes(), search_f = search_jellyfish)) +#' res +search_jellyfish <- function(current, index, tries, max.tries = Inf, ...) { rcd_env <- parent.frame(n = 4) if (is.null(rcd_env[["record"]])) rcd_env <- parent.frame(n = 1) best_jelly <- current[[attr(current, "best_id")]] @@ -32,6 +43,7 @@ search_jellyfish <- function(current, alpha = 0.5, index, tries, target = lapply(current, function(i) {update_typeB(i, current)}) } + target <- purrr::map2(current, target, correct_orientation) target_idx <- sapply(target, index) best_id <- which.max(target_idx) cat("Best Index: ", max(target_idx), "\n") @@ -39,18 +51,18 @@ search_jellyfish <- function(current, alpha = 0.5, index, tries, class(target) <- c("multi-bases", class(target)) rcd_env[["record"]] <- dplyr::add_row( - rcd_env[["record"]], basis = list(target[-best_id]), - index_val = target_idx[-best_id], info = "jellyfish_update", - tries = tries, method = "jellyfish_optimiser", alpha = NA + rcd_env[["record"]], basis = unclass(target), + index_val = target_idx, info = "jellyfish_update", + tries = tries, method = "search_jellyfish", alpha = NA, + loop = 1:length(target) ) - rcd_env[["record"]] <- dplyr::add_row( - rcd_env[["record"]], basis = list(target[best_id]), - index_val = max(target_idx), info = "current_best", tries = tries, - method = "jellyfish_optimiser", alpha = NA + rcd_env[["record"]] <- dplyr::mutate( + rcd_env[["record"]], + info = ifelse(tries == max(tries) & loop == best_id, "current_best", info) ) - if (diff(quantile(target_idx, c(0.05, 0.95))) < 0.05 || tries > max.tries) { + if (diff(quantile(target_idx, c(0.05, 0.95))) < 0.05 || tries >= max.tries) { print_final_proj(target[[attr(target, "best_id")]]) rcd_env[["record"]] <- dplyr::mutate( rcd_env[["record"]], diff --git a/R/tour-guided.r b/R/tour-guided.r index 2886fdfa..6a7e1eb6 100644 --- a/R/tour-guided.r +++ b/R/tour-guided.r @@ -43,7 +43,8 @@ #' tries <- replicate(5, save_history(f, guided_tour(holes())), simplify = FALSE) #' } guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = 25, - max.i = Inf, search_f = search_geodesic, n_sample = 100, ...) { + max.i = Inf, search_f = search_geodesic, n_jellies = 30, + n_sample = 100, ...) { generator <- function(current, data, tries, ...) { index <- function(proj) { index_f(as.matrix(data) %*% proj) @@ -59,7 +60,7 @@ guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = if (is.null(current)) { if (method == "search_jellyfish"){ - current <- replicate(30, basis_random(ncol(data), d), simplify = FALSE) + current <- replicate(n_jellies, basis_random(ncol(data), d), simplify = FALSE) cur_index <- sapply(current, index) best_id <- which.max(cur_index) attr(current, "best_id") <- best_id @@ -69,12 +70,12 @@ guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = rcd_env[["record"]] <- dplyr::add_row( rcd_env[["record"]], basis = current[-best_id], index_val = cur_index[-best_id], info = "initiation", - method = method, alpha = NA, tries = 1, loop = 1 + method = method, alpha = NA, tries = 1, loop = 1:(length(current)-1) ) rcd_env[["record"]] <- dplyr::add_row( rcd_env[["record"]], basis = current[best_id], index_val = cur_index[best_id], info = "initiation", - method = method, alpha = NA, tries = 1, loop = 1 + method = method, alpha = NA, tries = 1, loop = length(current) ) return(current) @@ -123,7 +124,7 @@ guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = } # current, alpha = 1, index, max.tries = 5, n = 5, delta = 0.01, cur_index = NA, .. - basis <- search_f(current, alpha, index, tries, max.tries = max.tries, + basis <- search_f(current, alpha, index = index, tries = tries, max.tries = max.tries, cur_index = cur_index, frozen = frozen, n_sample = n_sample, ...) if (method == "search_posse") { diff --git a/man/display_idx.Rd b/man/display_idx.Rd index 6ac9bbe4..9bdd1b0d 100644 --- a/man/display_idx.Rd +++ b/man/display_idx.Rd @@ -12,10 +12,6 @@ display_idx( col = "red", cex = 3, panel_height_ratio = c(3, 2), - frame_x_pos = 0.15, - frame_y_pos = 3, - frame_cex = 1, - frame_col = "#000000", label_x_pos = 0.7, label = NULL, label_cex = 1, @@ -53,14 +49,6 @@ factor, default to "red".} \item{panel_height_ratio}{input to the height argument in [graphics::layout()] for the height of data and axis panel.} -\item{frame_x_pos}{the x position of the frame label} - -\item{frame_y_pos}{the y position of the frame label} - -\item{frame_cex}{the size of the frame text} - -\item{frame_col}{the color of the frame text} - \item{label_x_pos}{the x position of text label, currently labels are positioned at a fixed x value for each observation} diff --git a/man/guided_tour.Rd b/man/guided_tour.Rd index 43bff085..ba99114b 100644 --- a/man/guided_tour.Rd +++ b/man/guided_tour.Rd @@ -12,6 +12,7 @@ guided_tour( max.tries = 25, max.i = Inf, search_f = search_geodesic, + n_jellies = 30, n_sample = 100, ... ) diff --git a/man/search_jellyfish.Rd b/man/search_jellyfish.Rd new file mode 100644 index 00000000..2fcf6dbc --- /dev/null +++ b/man/search_jellyfish.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/search-jellyfish.R +\name{search_jellyfish} +\alias{search_jellyfish} +\title{An jellyfish optimisers for the projection pursuit guided tour} +\usage{ +search_jellyfish(current, index, tries, max.tries = Inf, ...) +} +\arguments{ +\item{current}{starting projection, a list of basis of class "multi-bases"} + +\item{index}{index function} + +\item{tries}{the counter of the outer loop of the opotimiser} + +\item{max.tries}{maximum number of iteration before giving up} + +\item{...}{other arguments being passed into the \code{search_jellyfish()}} +} +\description{ +An jellyfish optimisers for the projection pursuit guided tour +} +\examples{ +res <- animate_xy(flea[, 1:6], guided_tour(holes(), search_f = search_jellyfish)) +res +} +\keyword{optimize} From f281b5b61f29167b4250779d58b5bfc7c17beb1a Mon Sep 17 00:00:00 2001 From: huizezhang-sherry Date: Mon, 5 Feb 2024 22:21:33 +0100 Subject: [PATCH 3/6] add argument minimum tries --- R/search-jellyfish.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/search-jellyfish.R b/R/search-jellyfish.R index 8202be34..5b099ecc 100644 --- a/R/search-jellyfish.R +++ b/R/search-jellyfish.R @@ -10,10 +10,11 @@ #' @examples #' res <- animate_xy(flea[, 1:6], guided_tour(holes(), search_f = search_jellyfish)) #' res -search_jellyfish <- function(current, index, tries, max.tries = Inf, ...) { +search_jellyfish <- function(current, index, tries, max.tries = Inf, min.tries = 1, ...) { rcd_env <- parent.frame(n = 4) if (is.null(rcd_env[["record"]])) rcd_env <- parent.frame(n = 1) best_jelly <- current[[attr(current, "best_id")]] + current_idx <- index(best_jelly) c_t = abs((1 - tries / max.tries) * (2 * runif(1) - 1)) @@ -45,6 +46,7 @@ search_jellyfish <- function(current, index, tries, max.tries = Inf, ...) { target <- purrr::map2(current, target, correct_orientation) target_idx <- sapply(target, index) + best_id <- which.max(target_idx) cat("Best Index: ", max(target_idx), "\n") attr(target, "best_id") <- best_id @@ -62,7 +64,8 @@ search_jellyfish <- function(current, index, tries, max.tries = Inf, ...) { ) - if (diff(quantile(target_idx, c(0.05, 0.95))) < 0.05 || tries >= max.tries) { + if (abs(max(target_idx) - current_idx) < 0.05 && + tries >= max.tries) { print_final_proj(target[[attr(target, "best_id")]]) rcd_env[["record"]] <- dplyr::mutate( rcd_env[["record"]], From adf198bbb7b371acefb2efa2c41cbe32ce0f067a Mon Sep 17 00:00:00 2001 From: huizezhang-sherry Date: Mon, 5 Feb 2024 23:25:00 +0100 Subject: [PATCH 4/6] use message instead of cat for all the msg printing --- R/geodesic-path.r | 3 +-- R/geodesic.r | 2 +- R/search-better.r | 52 ++++++-------------------------------- R/search-frozen-geodesic.r | 4 +-- R/search-geodesic.r | 33 ++++-------------------- R/search-jellyfish.R | 18 +------------ R/search_polish.r | 38 ++++++++-------------------- R/search_posse.R | 22 +++------------- R/section-pursuit.r | 8 +++--- R/tour-guided-section.r | 21 +++------------ R/tour-guided.r | 5 ++-- R/tour.r | 1 - R/util.r | 18 +++++++++++++ 13 files changed, 58 insertions(+), 167 deletions(-) diff --git a/R/geodesic-path.r b/R/geodesic-path.r index 6958d28d..6e7c849b 100644 --- a/R/geodesic-path.r +++ b/R/geodesic-path.r @@ -47,7 +47,6 @@ new_geodesic_path <- function(name, generator, frozen = NULL, ...) { return(NULL) } - #cat("generation: dist = ", dist, "\n") } list(ingred = geodesic_path(current, target, frozen, ...), index = gen$index, tries = tries) } @@ -62,7 +61,7 @@ new_geodesic_path <- function(name, generator, frozen = NULL, ...) { #' @export "print.tour_path" <- function(x, ...) { - cat("Tour path:", attr(x, "name"), "\n") + message("Tour path: ", attr(x, "name")) # params <- as.list(environment(get("generator", environment(g)))) # str(params) diff --git a/R/geodesic.r b/R/geodesic.r index fcb226b9..e6666767 100644 --- a/R/geodesic.r +++ b/R/geodesic.r @@ -99,7 +99,7 @@ geodesic_info <- function(Fa, Fz, epsilon = 1e-6) { } # if (Fa.equivalent(Fz)) return(); - # cat("dim Fa",nrow(Fa),ncol(Fa),"dim Fz",nrow(Fz),ncol(Fz),"\n") + # message("dim Fa",nrow(Fa),ncol(Fa),"dim Fz",nrow(Fz),ncol(Fz)) # Compute the SVD: Fa'Fz = Va lambda Vz' -------------------------------- sv <- svd(t(Fa) %*% Fz) diff --git a/R/search-better.r b/R/search-better.r index f5565c43..f5e106eb 100644 --- a/R/search-better.r +++ b/R/search-better.r @@ -48,7 +48,6 @@ search_better <- function(current, alpha = 0.5, index, tries, max.tries = Inf,.. warning("cur_index is zero!") } - cat("Old", cur_index, "\n") try <- 1 while (try < max.tries) { @@ -70,7 +69,7 @@ search_better <- function(current, alpha = 0.5, index, tries, max.tries = Inf,.. if (new_index > cur_index) { - cat("New", new_index, "try", try, "\n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try) nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -83,24 +82,8 @@ search_better <- function(current, alpha = 0.5, index, tries, max.tries = Inf,.. try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) rcd_env[["record"]] <- dplyr::mutate( rcd_env[["record"]], @@ -139,8 +122,6 @@ search_better_random <- function(current, alpha = 0.5, index, tries, if (cur_index == 0) { warning("cur_index is zero!") } - - cat("Old", cur_index, "\n") try <- 1 while (try < max.tries) { new_basis <- basis_nearby(current, alpha, method) @@ -160,8 +141,7 @@ search_better_random <- function(current, alpha = 0.5, index, tries, ) if (new_index > cur_index) { - cat("New", new_index, "try", try, "\n") - cat("Accept \n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try, ", accept") nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -176,8 +156,8 @@ search_better_random <- function(current, alpha = 0.5, index, tries, rand <- stats::runif(1) if (prob > rand) { - cat("New", new_index, "try", try, "\n") - cat("Accept with probability, prob =", prob, "\n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try, + ", probabilistic accept p = ", sprintf("%.3f", prob)) nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -193,24 +173,8 @@ search_better_random <- function(current, alpha = 0.5, index, tries, try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) rcd_env[["record"]] <- dplyr::mutate( rcd_env[["record"]], diff --git a/R/search-frozen-geodesic.r b/R/search-frozen-geodesic.r index b4eabb6a..19318386 100644 --- a/R/search-frozen-geodesic.r +++ b/R/search-frozen-geodesic.r @@ -44,13 +44,13 @@ search_frozen_geodesic <- function(current, index, tries, max.tries = 5, n = 5, dig3 <- function(x) sprintf("%.3f", x) pdiff <- (new_index - cur_index) / cur_index if (pdiff > 0.001) { - cat("New index: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away)\n", sep = "") + message("New index: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away)", sep = "") current <<- new_basis cur_index <<- new_index return(list(target = new_basis[[1]])) } - cat("Best was: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away). Trying again...\n", sep = "") + message("Best was: ", dig3(new_index), " (", dig3(peak$alpha$maximum), " away). Try again...", sep = "") try <- try + 1 } diff --git a/R/search-geodesic.r b/R/search-geodesic.r index dcd38802..7830da36 100644 --- a/R/search-geodesic.r +++ b/R/search-geodesic.r @@ -58,44 +58,21 @@ search_geodesic <- function(current, alpha = 1, index, tries, max.tries = 5, ... warning("either the cur_index or the new_index is zero!") } else { pdiff <- (new_index - cur_index) / cur_index - - dig3 <- function(x) sprintf("%.3f", x) - - cat( - "Value ", dig3(new_index), " ", - sprintf("%.1f", pdiff * 100), "% better " - ) if (pdiff > 0.001 & proj_dist(current, new_basis[[1]]) > 1e-2) { # FIXME: pdiff should pbly be a changeable parameter - cat(" - NEW BASIS\n") - + message("Target: ", sprintf("%.3f", new_index), ", ", + sprintf("%.1f", pdiff * 100), "% better ") current <- new_basis cur_index <- new_index return(list(target = new_basis[[1]])) } - cat("\n") } try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) NULL } diff --git a/R/search-jellyfish.R b/R/search-jellyfish.R index 5b099ecc..783da85d 100644 --- a/R/search-jellyfish.R +++ b/R/search-jellyfish.R @@ -48,7 +48,7 @@ search_jellyfish <- function(current, index, tries, max.tries = Inf, min.tries = target_idx <- sapply(target, index) best_id <- which.max(target_idx) - cat("Best Index: ", max(target_idx), "\n") + message("Target: ", sprintf("%.3f", max(target_idx))) attr(target, "best_id") <- best_id class(target) <- c("multi-bases", class(target)) @@ -79,19 +79,3 @@ search_jellyfish <- function(current, index, tries, max.tries = Inf, min.tries = } -print_final_proj <- function(current){ - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } -} diff --git a/R/search_polish.r b/R/search_polish.r index 2661d5db..58f5c176 100644 --- a/R/search_polish.r +++ b/R/search_polish.r @@ -60,20 +60,20 @@ search_polish <- function(current, alpha = 0.5, index, tries, polish_max_tries = # check condition 1: the two bases can't be too close if (polish_dist < 1e-3) { - cat("The new basis is too close to the current one! \n") - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("The new basis is too close to the current one!") + message("current basis: ", current, "cur_index: ", sprintf("%.3f", cur_index)) return(list(target = current, alpha = alpha)) } # check condition 2: there needs to be certain improvement if (polish_pdiff < 1e-5) { - cat("The improvement is too small! \n") - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("The improvement is too small!") + message("current basis: ", current, "cur_index: ", sprintf("%.3f", cur_index)) return(list(target = current, alpha = alpha)) } - cat("better basis found, index_val = ", best_row$index_val, "\n") + message("better basis found, index_val = ", sprintf("%.3f", best_row$index_val)) cur_index <- best_row$index_val current <- best_row$basis[[1]] best_row <- dplyr::mutate(best_row, @@ -86,13 +86,13 @@ search_polish <- function(current, alpha = 0.5, index, tries, polish_max_tries = } else { polish_cooling <- polish_cooling * 0.95 alpha <- alpha * polish_cooling - cat("alpha gets updated to", alpha, "\n") + message("alpha gets updated to", alpha, "\n") # check condition 3: alpha can't be too small if (alpha < 0.01) { - cat("alpha is", alpha, "and it is too small! \n") - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("alpha is", alpha, "and it is too small!") + message("current basis: ", current, "cur_index: ", sprintf("%.3f", cur_index)) return(list(target = current, alpha = alpha)) } } @@ -100,26 +100,8 @@ search_polish <- function(current, alpha = 0.5, index, tries, polish_max_tries = try <- try + 1 } - cat("No better bases found after ", polish_max_tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } - - cat("current basis: ", current, "cur_index: ", cur_index, "\n") + message("No better bases found after ", polish_max_tries, " tries. Giving up.") + print_final_proj(current) return(list(target = current, alpha = alpha)) } # globalVariables("index_val") diff --git a/R/search_posse.R b/R/search_posse.R index 1d878cc2..9845edb1 100644 --- a/R/search_posse.R +++ b/R/search_posse.R @@ -31,7 +31,7 @@ search_posse <- function(current, alpha = 0.5, index, tries, max.tries = 300, cu ) if (new_index > cur_index) { - cat("New", new_index, "try", try, "\n") + message("Target: ", sprintf("%.3f", new_index), ", try: ", try) nr <- nrow(rcd_env[["record"]]) rcd_env[["record"]][nr, "info"] <- "new_basis" @@ -44,24 +44,8 @@ search_posse <- function(current, alpha = 0.5, index, tries, max.tries = 300, cu try <- try + 1 } - cat("No better bases found after ", max.tries, " tries. Giving up.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("No better bases found after ", max.tries, " tries. Giving up.") + print_final_proj(current) NULL } diff --git a/R/section-pursuit.r b/R/section-pursuit.r index 8c4dce69..aad30f87 100644 --- a/R/section-pursuit.r +++ b/R/section-pursuit.r @@ -21,17 +21,17 @@ slice_index <- function(breaks_x, breaks_y, eps, bintype = "polar", power = 1, f reweight = FALSE, p = 4) { if (reweight) { if (bintype != "polar") { - cat("Reweighting is only defined for polar binning and will be ignored.") + message("Reweighting is only defined for polar binning and will be ignored.") } else { - cat(paste0("Reweighting assuming p=", p)) + message(paste0("Reweighting assuming p=", p)) } } resc <- 1 if (bintype == "polar") { resc <- 1 / (1 - (1 / 10)^(1 / power))^power - cat(paste0("Rescaling raw index by a factor ", resc)) + message(paste0("Rescaling raw index by a factor ", resc)) } function(mat, dists, h) { @@ -133,7 +133,7 @@ slice_binning <- function(mat, dists, h, breaks_x, breaks_y, bintype = "square") ybin <- cut(ang, breaks_y) } else { - cat(bintype, " is not a recognised bin type\n") + message(bintype, " is not a recognised bin type") return(0) } diff --git a/R/tour-guided-section.r b/R/tour-guided-section.r index 158e21b9..84bc0574 100644 --- a/R/tour-guided-section.r +++ b/R/tour-guided-section.r @@ -68,24 +68,9 @@ guided_section_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, cur_index <- index(current) if (cur_index > max.i) { - cat("Found index ", cur_index, ", larger than selected maximum ", max.i, ". Stopping search.\n", - sep = "" - ) - cat("Final projection: \n") - if (ncol(current) == 1) { - for (i in 1:length(current)) { - cat(sprintf("%.3f", current[i]), " ") - } - cat("\n") - } - else { - for (i in 1:nrow(current)) { - for (j in 1:ncol(current)) { - cat(sprintf("%.3f", current[i, j]), " ") - } - cat("\n") - } - } + message("Found index ", sprintf("%.3f", cur_index), ", + larger than selected maximum ", max.i, ". Stopping search.") + print_final_proj(current) return(NULL) } diff --git a/R/tour-guided.r b/R/tour-guided.r index 6a7e1eb6..bb18801a 100644 --- a/R/tour-guided.r +++ b/R/tour-guided.r @@ -116,9 +116,8 @@ guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = } if (cur_index > max.i) { - cat("Found index ", cur_index, ", larger than selected maximum ", max.i, ". Stopping search.\n", - sep = "" - ) + message("Found index ", sprintf("%.3f", cur_index), ", + larger than selected maximum ", max.i, ". Stopping search.") print_final_proj(current) return(NULL) } diff --git a/R/tour.r b/R/tour.r index bc3bf6e6..76d38e5e 100644 --- a/R/tour.r +++ b/R/tour.r @@ -42,7 +42,6 @@ new_tour <- function(data, tour_path, start = NULL, ...) { geodesic <- NULL function(step_size, ...) { - # cat("target_dist - cur_dist:", target_dist - cur_dist, "\n") step <<- step + 1 cur_dist <<- cur_dist + step_size diff --git a/R/util.r b/R/util.r index c2290cd4..9a7bb10a 100644 --- a/R/util.r +++ b/R/util.r @@ -1,3 +1,21 @@ +#' Print out the final projection basis +#' @param basis the projection basis to print, used in search_* functions +#' @keywords internal +print_final_proj <- function(basis){ + message("Final projection: ") + + if (ncol(basis) == 1) { + for (i in 1:length(basis)) { + message(sprintf("% .3f", basis[i]), " ") + } + } else { + for (i in 1:nrow(basis)) { + message(paste0(sprintf("% .3f", basis[i, ]), sep = " ")) + } + } +} + + #' Rescale a matrix or data frame #' #' Standardise each column to have range [0, 1] From 096e620b8836d584c8a9e83b2a6dc7e867465f0e Mon Sep 17 00:00:00 2001 From: huizezhang-sherry Date: Mon, 11 Mar 2024 10:38:25 -0500 Subject: [PATCH 5/6] update the jellyfish optimiser --- R/search-jellyfish.R | 19 +++++++++++-------- R/tour-guided.r | 11 ++++++----- man/print_final_proj.Rd | 15 +++++++++++++++ man/search_jellyfish.Rd | 4 ++-- 4 files changed, 34 insertions(+), 15 deletions(-) create mode 100644 man/print_final_proj.Rd diff --git a/R/search-jellyfish.R b/R/search-jellyfish.R index 783da85d..36e14664 100644 --- a/R/search-jellyfish.R +++ b/R/search-jellyfish.R @@ -3,18 +3,18 @@ #' @param current starting projection, a list of basis of class "multi-bases" #' @param index index function #' @param tries the counter of the outer loop of the opotimiser -#' @param max.tries maximum number of iteration before giving up +#' @param max.tries the maximum number of iteration before giving up #' @param ... other arguments being passed into the \code{search_jellyfish()} #' @keywords optimize #' @export #' @examples #' res <- animate_xy(flea[, 1:6], guided_tour(holes(), search_f = search_jellyfish)) #' res -search_jellyfish <- function(current, index, tries, max.tries = Inf, min.tries = 1, ...) { +search_jellyfish <- function(current, index, tries, max.tries = 50, ...) { rcd_env <- parent.frame(n = 4) if (is.null(rcd_env[["record"]])) rcd_env <- parent.frame(n = 1) best_jelly <- current[[attr(current, "best_id")]] - current_idx <- index(best_jelly) + current_idx <- sapply(current, index) c_t = abs((1 - tries / max.tries) * (2 * runif(1) - 1)) @@ -26,7 +26,7 @@ search_jellyfish <- function(current, index, tries, max.tries = Inf, min.tries = } else if (runif(1) > (1 - c_t)) { # type A passive target = lapply(current, function(x) { - orthonormalise(x + 0.1 * runif(1) * 2) + orthonormalise(x + 0.1 * runif(1)) }) # eq 12 } else { # type B active @@ -47,8 +47,13 @@ search_jellyfish <- function(current, index, tries, max.tries = Inf, min.tries = target <- purrr::map2(current, target, correct_orientation) target_idx <- sapply(target, index) + # if the target is worse than current, use current + worse_id <- current_idx > target_idx + target[worse_id] <- current[worse_id] + target_idx[worse_id] <- current_idx[worse_id] + best_id <- which.max(target_idx) - message("Target: ", sprintf("%.3f", max(target_idx))) + #message("Target: ", sprintf("%.3f", max(target_idx))) attr(target, "best_id") <- best_id class(target) <- c("multi-bases", class(target)) @@ -63,9 +68,7 @@ search_jellyfish <- function(current, index, tries, max.tries = Inf, min.tries = info = ifelse(tries == max(tries) & loop == best_id, "current_best", info) ) - - if (abs(max(target_idx) - current_idx) < 0.05 && - tries >= max.tries) { + if (tries >= max.tries) { print_final_proj(target[[attr(target, "best_id")]]) rcd_env[["record"]] <- dplyr::mutate( rcd_env[["record"]], diff --git a/R/tour-guided.r b/R/tour-guided.r index bb18801a..25fbe3cb 100644 --- a/R/tour-guided.r +++ b/R/tour-guided.r @@ -42,9 +42,9 @@ #' f <- flea_std[, 1:3] #' tries <- replicate(5, save_history(f, guided_tour(holes())), simplify = FALSE) #' } -guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = 25, - max.i = Inf, search_f = search_geodesic, n_jellies = 30, - n_sample = 100, ...) { +guided_tour <- function(index_f, d = 2, cooling = 0.99, max.tries = 25, + max.i = Inf, search_f = search_geodesic, + n_jellies = 30, n_sample = 100, alpha = 0.5,...) { generator <- function(current, data, tries, ...) { index <- function(proj) { index_f(as.matrix(data) %*% proj) @@ -123,8 +123,9 @@ guided_tour <- function(index_f, d = 2, alpha = 0.5, cooling = 0.99, max.tries = } # current, alpha = 1, index, max.tries = 5, n = 5, delta = 0.01, cur_index = NA, .. - basis <- search_f(current, alpha, index = index, tries = tries, max.tries = max.tries, - cur_index = cur_index, frozen = frozen, n_sample = n_sample, ...) + basis <- search_f( + current, alpha = alpha, index = index, tries = tries, max.tries = max.tries, + cur_index = cur_index, frozen = frozen, n_sample = n_sample, ...) if (method == "search_posse") { if (!is.null(basis$h)) { diff --git a/man/print_final_proj.Rd b/man/print_final_proj.Rd new file mode 100644 index 00000000..c373d452 --- /dev/null +++ b/man/print_final_proj.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.r +\name{print_final_proj} +\alias{print_final_proj} +\title{Print out the final projection basis} +\usage{ +print_final_proj(basis) +} +\arguments{ +\item{basis}{the projection basis to print, used in search_* functions} +} +\description{ +Print out the final projection basis +} +\keyword{internal} diff --git a/man/search_jellyfish.Rd b/man/search_jellyfish.Rd index 2fcf6dbc..68037ebf 100644 --- a/man/search_jellyfish.Rd +++ b/man/search_jellyfish.Rd @@ -4,7 +4,7 @@ \alias{search_jellyfish} \title{An jellyfish optimisers for the projection pursuit guided tour} \usage{ -search_jellyfish(current, index, tries, max.tries = Inf, ...) +search_jellyfish(current, index, tries, max.tries = Inf, min.tries = 30, ...) } \arguments{ \item{current}{starting projection, a list of basis of class "multi-bases"} @@ -13,7 +13,7 @@ search_jellyfish(current, index, tries, max.tries = Inf, ...) \item{tries}{the counter of the outer loop of the opotimiser} -\item{max.tries}{maximum number of iteration before giving up} +\item{max.tries, min.tries}{the maximum/minimum number of iteration before giving up} \item{...}{other arguments being passed into the \code{search_jellyfish()}} } From fa7c546b507bf601da2f232f583660654a3d935e Mon Sep 17 00:00:00 2001 From: huizezhang-sherry Date: Thu, 23 May 2024 23:44:59 -0500 Subject: [PATCH 6/6] add more useful indexes --- DESCRIPTION | 6 ++- NAMESPACE | 5 ++ R/interesting-indices.r | 67 +++++++++++++++++++++++++-- _pkgdown.yml | 2 + man/dcor.Rd | 17 +++++++ man/dcor2d.Rd | 13 ------ man/guided_tour.Rd | 6 +-- man/indexes.Rd | 15 ++++++ man/search_jellyfish.Rd | 4 +- man/{splines2d.Rd => spline-loess.Rd} | 7 ++- man/stringy.Rd | 11 +++++ 11 files changed, 127 insertions(+), 26 deletions(-) create mode 100644 man/dcor.Rd delete mode 100644 man/dcor2d.Rd create mode 100644 man/indexes.Rd rename man/{splines2d.Rd => spline-loess.Rd} (72%) create mode 100644 man/stringy.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d28ebb9f..ba51f602 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,11 +41,13 @@ Suggests: rmarkdown, tidyr, covr, - plotly + plotly, + cassowaryr, + minerva License: MIT + file LICENSE LazyData: true URL: https://github.com/ggobi/tourr BugReports: https://github.com/ggobi/tourr/issues -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Encoding: UTF-8 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 47466dc0..bb145d3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,8 @@ S3method(plot,path_index) S3method(print,history_array) S3method(print,tour_path) S3method(str,history_array) +export(MIC) +export(TIC) export(anchored_orthogonal_distance) export(andrews) export(angular_breaks) @@ -40,6 +42,7 @@ export(center) export(cmass) export(cumulative_radial) export(dcor2d) +export(dcor2d_2) export(dependence_tour) export(display_andrews) export(display_density2d) @@ -75,6 +78,7 @@ export(lda_pp) export(linear_breaks) export(little_tour) export(local_tour) +export(loess2d) export(manual_slice) export(mapColors) export(mapShapes) @@ -109,6 +113,7 @@ export(search_posse) export(slice_index) export(sphere_data) export(splines2d) +export(stringy) export(thaw) importFrom(grDevices,dev.cur) importFrom(grDevices,dev.flush) diff --git a/R/interesting-indices.r b/R/interesting-indices.r index 49f6d234..462f0314 100644 --- a/R/interesting-indices.r +++ b/R/interesting-indices.r @@ -1,11 +1,45 @@ +#' Scagnostic indexes. +#' +#' Compute the scagnostic measures from the cassowaryr package +#' @export +stringy <- function(){ + function(mat){ + cassowaryr::sc_stringy(mat[,1], mat[,2]) + } +} + + +#' Maximum and total information coefficient index. +#' +#' Compute the maximum and total information coefficient indexes, +#' see \code{minerva::mine}. +#' +#' @rdname indexes +#' @export +MIC <- function(){ + function(mat){ + minerva::mine(mat[,1], mat[,2], alpha = 0.3, est = "mic_e")$MIC + } +} + +#' @rdname indexes +#' @export +TIC <- function(){ + function(mat){ + minerva::mine(mat[,1], mat[,2], est = "mic_e", alpha = 0.3)$TIC + } +} + #' Distance correlation index. #' -#' Computes the distance correlation based index on -#' 2D projections of the data. +#' Computes the distance correlation based index on 2D projections of the data. +#' \code{dcor2d_2} uses the faster implementation of the distance correlation +#' for bivariate data, see \code{energy::dcor2d}. #' #' @keywords hplot #' @importFrom stats na.omit #' @export +#' @rdname dcor dcor2d <- function() { function(mat) { xy <- na.omit(data.frame(x = mat[, 1], y = mat[, 2])) @@ -14,15 +48,26 @@ dcor2d <- function() { } } -#' Spline based index. +#' @rdname dcor +#' @export +dcor2d_2 <- function() { + function(mat) { + xy <- na.omit(data.frame(x = mat[, 1], y = mat[, 2])) + measure <- with(xy, energy::dcor2d(x, y, type = "U")) + return(measure) + } +} + +#' Spline/loess based index. #' #' Compares the variance in residuals of a fitted -#' spline model to the overall variance to find +#' spline/loess model to the overall variance to find #' functional dependence in 2D projections #' of the data. #' #' @keywords hplot #' @importFrom stats residuals var +#' @rdname spline-loess #' @export splines2d <- function() { function(mat) { @@ -38,6 +83,20 @@ splines2d <- function() { } } +#' @rdname spline-loess +#' @export +loess2d <- function() { + function(mat) { + mat <- as.data.frame(mat) + colnames(mat) <- c("x", "y") + loess_fit <- loess(y ~ x, data = mat, span = 0.05) + loess_fit2 <- loess(x ~ y, data = mat, span = 0.05) + measure <- max(1 - var(residuals(loess_fit), na.rm = T) / var(mat$y, na.rm = T), + 1 - var(residuals(loess_fit2), na.rm = T) / var(mat$y, na.rm = T) + ) + return(measure) + } +} #' Normality index. diff --git a/_pkgdown.yml b/_pkgdown.yml index eb873103..ee29c591 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,6 +52,8 @@ reference: - slice_index - norm_bin - norm_kol + - stringy + - MIC - title: Search functions desc: > Functions for index optimisation in guided tour diff --git a/man/dcor.Rd b/man/dcor.Rd new file mode 100644 index 00000000..6f9f9669 --- /dev/null +++ b/man/dcor.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interesting-indices.r +\name{dcor2d} +\alias{dcor2d} +\alias{dcor2d_2} +\title{Distance correlation index.} +\usage{ +dcor2d() + +dcor2d_2() +} +\description{ +Computes the distance correlation based index on 2D projections of the data. +\code{dcor2d_2} uses the faster implementation of the distance correlation +for bivariate data, see \code{energy::dcor2d}. +} +\keyword{hplot} diff --git a/man/dcor2d.Rd b/man/dcor2d.Rd deleted file mode 100644 index 9c5e68d4..00000000 --- a/man/dcor2d.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/interesting-indices.r -\name{dcor2d} -\alias{dcor2d} -\title{Distance correlation index.} -\usage{ -dcor2d() -} -\description{ -Computes the distance correlation based index on -2D projections of the data. -} -\keyword{hplot} diff --git a/man/guided_tour.Rd b/man/guided_tour.Rd index ba99114b..83375437 100644 --- a/man/guided_tour.Rd +++ b/man/guided_tour.Rd @@ -7,13 +7,13 @@ guided_tour( index_f, d = 2, - alpha = 0.5, cooling = 0.99, max.tries = 25, max.i = Inf, search_f = search_geodesic, n_jellies = 30, n_sample = 100, + alpha = 0.5, ... ) } @@ -22,8 +22,6 @@ guided_tour( \item{d}{target dimensionality} -\item{alpha}{the initial size of the search window, in radians} - \item{cooling}{the amount the size of the search window should be adjusted by after each step} @@ -37,6 +35,8 @@ a better projection before giving up} \item{n_sample}{number of samples to generate if \code{search_f} is \code{\link{search_polish}}} +\item{alpha}{the initial size of the search window, in radians} + \item{...}{arguments sent to the search_f} } \description{ diff --git a/man/indexes.Rd b/man/indexes.Rd new file mode 100644 index 00000000..3549b6bc --- /dev/null +++ b/man/indexes.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interesting-indices.r +\name{MIC} +\alias{MIC} +\alias{TIC} +\title{Maximum and total information coefficient index.} +\usage{ +MIC() + +TIC() +} +\description{ +Compute the maximum and total information coefficient indexes, +see \code{minerva::mine}. +} diff --git a/man/search_jellyfish.Rd b/man/search_jellyfish.Rd index 68037ebf..c4fec8d1 100644 --- a/man/search_jellyfish.Rd +++ b/man/search_jellyfish.Rd @@ -4,7 +4,7 @@ \alias{search_jellyfish} \title{An jellyfish optimisers for the projection pursuit guided tour} \usage{ -search_jellyfish(current, index, tries, max.tries = Inf, min.tries = 30, ...) +search_jellyfish(current, index, tries, max.tries = 50, ...) } \arguments{ \item{current}{starting projection, a list of basis of class "multi-bases"} @@ -13,7 +13,7 @@ search_jellyfish(current, index, tries, max.tries = Inf, min.tries = 30, ...) \item{tries}{the counter of the outer loop of the opotimiser} -\item{max.tries, min.tries}{the maximum/minimum number of iteration before giving up} +\item{max.tries}{the maximum number of iteration before giving up} \item{...}{other arguments being passed into the \code{search_jellyfish()}} } diff --git a/man/splines2d.Rd b/man/spline-loess.Rd similarity index 72% rename from man/splines2d.Rd rename to man/spline-loess.Rd index 05cf368e..760ce288 100644 --- a/man/splines2d.Rd +++ b/man/spline-loess.Rd @@ -2,13 +2,16 @@ % Please edit documentation in R/interesting-indices.r \name{splines2d} \alias{splines2d} -\title{Spline based index.} +\alias{loess2d} +\title{Spline/loess based index.} \usage{ splines2d() + +loess2d() } \description{ Compares the variance in residuals of a fitted -spline model to the overall variance to find +spline/loess model to the overall variance to find functional dependence in 2D projections of the data. } diff --git a/man/stringy.Rd b/man/stringy.Rd new file mode 100644 index 00000000..fbfe7313 --- /dev/null +++ b/man/stringy.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interesting-indices.r +\name{stringy} +\alias{stringy} +\title{Scagnostic indexes.} +\usage{ +stringy() +} +\description{ +Compute the scagnostic measures from the cassowaryr package +}