From e79a2a53d8ac0b445f387b4b0ae9b56bb2cd50a7 Mon Sep 17 00:00:00 2001 From: huizezhang-sherry Date: Mon, 17 Jun 2024 12:32:14 -0500 Subject: [PATCH] re-engineer the smoothness and squintability calculation --- NAMESPACE | 6 + R/calc-smoothness.R | 85 ---------- R/calc-squintability.R | 358 +++++++++++++++++++++++++++++------------ man/optim.Rd | 141 ++++++++-------- 4 files changed, 331 insertions(+), 259 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 314f5b3..e1f347e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,11 @@ # Generated by roxygen2: do not edit by hand +S3method(print,basis_df) S3method(print,smoothness_res) +S3method(print,squintability_res) +S3method(tbl_sum,basis_df) S3method(tbl_sum,smoothness_res) +S3method(tbl_sum,squintability_res) export("%>%") export(add_anchor) export(add_anno) @@ -48,6 +52,7 @@ export(get_start) export(get_theo) export(plot_projection) export(prep_space_tour) +export(sample_bases) export(scale_color_continuous_botanical) export(scale_color_discrete_botanical) export(scale_fill_continuous_botanical) @@ -60,6 +65,7 @@ importFrom(magrittr,"%>%") importFrom(progress,progress_bar) importFrom(rlang,.data) importFrom(rlang,`:=`) +importFrom(stats,ksmooth) importFrom(tibble,tbl_sum) importFrom(tidyr,unnest) importFrom(tourr,basis_random) diff --git a/R/calc-smoothness.R b/R/calc-smoothness.R index f77e3bc..e69de29 100644 --- a/R/calc-smoothness.R +++ b/R/calc-smoothness.R @@ -1,85 +0,0 @@ -#' Function to calculate smoothness and squintability -#' -#' @param idx character, the name of projection pursuit index function, e.g. -#' "holes" (see the \pkg{tourr} package for index examples) -#' @param n_basis numeric, the number of random basis to generate for calculating smoothness -#' @inheritParams tourr::basis_random -#' @param best a matrix, the theoretical best projection matrix, used to calculate -#' projection distance with the simulated random bases. -#' @param data matrix, the data to be projected -#' @param other_gp_params list, other parameters to be passed to \code{GpGp::fit_model} -#' @param verbose logical, whether to print optimisation progression when -#' fitting the Gaussian process - #' @inheritParams GpGp::fit_model -#' @inheritParams base::format -#' @inheritParams base::print -#' @examples -#' library(tourr) -#' calc_smoothness("holes", data = pipe1000) -#' -#' @rdname optim -#' @export -calc_smoothness <- function(idx, data = sine1000, n_basis = 300, n = 6, d = 2, - best = matrix(c(0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 1), nrow = 6), - start_parms = c(0.001, 0.5, 2, 2), - other_gp_params = NULL, verbose = FALSE){ - - # sample basis - cli::cli_inform("sample random bases ...") - idx <- dplyr::sym(idx) - set.seed(123) - seed <- sample(1: 10000, size = n_basis) - basis_df <- tibble::tibble(basis = lapply(1:n_basis, function(i){ - set.seed(seed[i]); tourr::basis_random(n = n, d = d)})) |> - dplyr::rowwise() |> - dplyr::mutate(proj_dist = tourr::proj_dist(best, basis), - index_val = get(idx)()(as.matrix(data) %*% basis)) - - # construct gp - cli::cli_inform("fit the gaussian process ...") - if (verbose) {silent <- FALSE} else {silent <- TRUE} - gp_params <- list(y = basis_df$index_val, locs = basis_df$proj_dist, - X = as.matrix(rep(1,nrow(basis_df))), - start_parms = start_parms, covfun_name = "matern_isotropic", - silent = silent, - other_gp_params - ) - fit <- do.call("fit_model", gp_params) - - cov_params <- suppressMessages(tibble::as_tibble_row(fit$covparms, .name_repair = "unique")) - colnames(cov_params) <- c("variance", "range", "smoothness", "nugget", "convergence") - cov_params <- cov_params |> dplyr::mutate(convergence = fit$conv, idx = as.character(idx)) - - # return - res <- tibble::as_tibble(cov_params) - attr(res, "basis_df") <- basis_df |> dplyr::ungroup() - attr(res, "gp_res") <- fit - attr(res, "data") <- tibble::as_tibble(data) - attr(res, "best_basis") <- best - - class(res) <- c("smoothness_res", class(res)) - return(res) -} - - -globalVariables(c("basis", "sine1000")) - - -#' @rdname optim -#' @export -print.smoothness_res <- function(x, width = NULL, ...){ - writeLines(format(x, width = width, ...)) -} - -#' @importFrom tibble tbl_sum -#' @rdname optim -#' @export -tbl_sum.smoothness_res <- function(x){ - - cli::cli_rule() - dim <- attr(x, "basis_df")$basis[[1]] |> dim() - line <- glue::glue("No. of basis = ", nrow(attr(x, "basis_df")), - ", bases [", dim[1], " x ", dim[2], "]") - c("Smoothness" = line) -} diff --git a/R/calc-squintability.R b/R/calc-squintability.R index 7031988..ebe338a 100644 --- a/R/calc-squintability.R +++ b/R/calc-squintability.R @@ -1,93 +1,134 @@ -#' @param proj_dist_threshold only for squintability, the threshold for projection +#' Function to calculate smoothness and squintability +#' +#' @param idx character, the name of projection pursuit index function, e.g. +#' "holes" +#' @param data a matrix or data frame, the high dimensional data to be projected +#' @param n_basis numeric, the number of random bases to generate +#' @param best a matrix, the theoretical/ empirical best projection matrix to +#' calculate the projection distance from the simulated random bases. +#' @param step_size numeric, step size for interpolating from each random basis to +#' the best basis, recommend 0.005 +#' @param seed numeric, seed for sampling random bases +#' @param x objects with specialised printing methods +#' @param basis_df the basis data frame returned from \code{sample_bases} +#' @param start_params list, the starting parameters for the Gaussian process +#' for smoothness +#' @param other_gp_params list, additional parameters to be passed to +#' [GpGp::fit_model()] +#' for calculating smoothness +#' @param verbose logical, whether to print optimisation progression when +#' fitting the Gaussian process +#' @param min_proj_dist only for squintability, the threshold for projection #' distance for the random basis to be considered in sampling -#' @param return_early logical, whether to return early of all the bases -#' before fitting the kernel or non-linear least square. This can be useful if -#' the index value evaluation is time-consuming and the user wants to save a copy -#' before fitting the kernel or non-linear least square. -#' @param method only for squintability, the method to calculate squintability, -#' either through kernel smoothing ("ks") or non-linear least square ("nls") -#' @param step only for squintability, the step size for interpolation, -#' recommend between 0.01 and 0.1 -#' @param bin_nobs_threshold numeric, only for squintability, the threshold -#' number of observations for -#' applying binning before fitting the kernel -#' @param bin_width only for squintability, the bin size for binning the data -#' before fitting the kernel, recommend to set as the same as step parameter -#' @param sampling_seed the seed used for sampling the random basis -#' @param basis_df a basis data frame, returned from \code{calc_squintability -#' (..., return_early = TRUE)} -#' @param nls_params additional parameter for fitting the nls model, see -#' \code{stats::nls()} +#' @param method either "ks" (kernel smoothing) or "nls" (non-linear least +#' square) for calculating squintability. +#' @param bin_width numeric, the bin width to average the index value +#' before fitting the kernel, recommend to set as the same as `step` parameter +#' @param other_params list additional parameters for fitting kernel smoothing +#' or non-linear least square, see [stats::ksmooth()] and +#' [stats::nls()] for details +#' @inheritParams base::print +#' @importFrom stats ksmooth +#' @examples +#' # define the holes index as per tourr::holes +#' holes <- function() { +#' function(mat) { +#' n <- nrow(mat) +#' d <- ncol(mat) +#' +#' num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2))) +#' den <- 1 - exp(-d / 2) +#' +#' num / den +#' } +#' } +#' basis_smoothness <- sample_bases(idx = "holes") +#' calc_smoothness(basis_smoothness) +#' basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5) +#' calc_squintability(basis_squint, method = "ks", bin_width = 0.01) #' @rdname optim #' @export -calc_squintability <- function(idx, data = sine1000, return_early = FALSE, - method = c("ks", "nls"), n_basis = 50, n = 6, d = 2, - proj_dist_threshold = 1.5, step = 0.005, - best = matrix(c(0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 1), nrow = 6), - bin_nobs_threshold = 5000, bin_width = 0.005, - sampling_seed = 123 - ){ - cli::cli_inform("sample random bases ...") - ## sample basis - set.seed(sampling_seed) - seed <- sample(1: 10000, size = 1000) +sample_bases <- function(idx, data = sine1000, n_basis = 300, + best = matrix(c(0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 1), nrow = 6), + min_proj_dist = NA, step_size = NA, seed = 123){ + + dim <- dim(best) + n <- dim[1] + d <- dim[2] + idx <- dplyr::sym(idx) + set.seed(seed) + seed_vec <- sample(1: 10000, size = 1000) bb_lst <- list() i <- 1 - if (!all(dim(best) == c(n, d))){ - cli::cli_abort("sampled bases and the best basis must have the same dimension, - check the parameter {.field n}, {.field d}, and {.field best}.") - } + while (length(bb_lst) < n_basis){ - set.seed(seed[i]) - bb <- tourr::basis_random(n = n, d = d) - if (tourr::proj_dist(best, bb) > proj_dist_threshold){ - bb_lst <- c(bb_lst, list(bb)) + set.seed(seed_vec[i]); bb <- tourr::basis_random(n = n, d = d) + if (!is.na(min_proj_dist)){ + dist <- tourr::proj_dist(bb, best) + if (dist > min_proj_dist){bb_lst <- c(bb_lst, list(bb))} + i <- i +1 + } else{ + bb_lst <- c(bb_lst, list(bb)); i <- i + 1 } - i <- i +1 } - ## interpolate between the best and the random basis - ## TODO: progress bar here - cli::cli_inform("interpolate between the best and the random bases ...") - basis_df <- tibble::tibble(id = 1:n_basis) |> - dplyr::mutate(res = lapply(bb_lst, function(bb){ - interp_bb_best(bb = bb, best = best, step = step) - })) |> - tidyr::unnest(res) |> - unnest(dist) + if (!is.na(step_size)){ + basis_df <- tibble::tibble(id = 1:n_basis) |> + dplyr::mutate(basis = lapply(bb_lst, function(bb){ + interp_bb_best(bb = bb, best = best, step = step_size) + })) |> + tidyr::unnest(basis) + } else{ + basis_df <- tibble::tibble(id = 1:n_basis, basis = bb_lst) + } df_add_idx_val <- function(data, idx, org_data){ pb$tick() - data |> dplyr::mutate(!!rlang::sym(idx) := get(idx)()(org_data %*% basis[[1]])) + tibble::as_tibble(data) |> dplyr::mutate(index := get(idx)()(org_data %*% basis[[1]])) } - idx_sym <- rlang::sym(idx) - cli::cli_inform("calculate index values for interpolated bases ...") pb <- progress::progress_bar$new(total = nrow(basis_df)) basis_df <- basis_df |> + dplyr::mutate(dist = lapply(basis, function(bb){tourr::proj_dist(bb, best)})) |> + tidyr::unnest(dist) |> dplyr::group_split(aa = dplyr::row_number()) |> purrr::map_dfr(~df_add_idx_val(.x, idx, data)) |> - dplyr::mutate(!!idx_sym := if (idx %in% c("TIC")) { - (!!idx_sym - min(!!idx_sym)) / (max(!!idx_sym) - min(!!idx_sym)) - } else { - !!idx_sym - }) |> dplyr::select(-aa) - if (return_early) return(basis_df) + attr(basis_df, "data") <- tibble::as_tibble(data) + attr(basis_df, "idx") <- idx + attr(basis_df, "n_basis") <- n_basis + attr(basis_df, "best") <- best + attr(basis_df, "step_size") <- step_size + attr(basis_df, "min_proj_dist") <- min_proj_dist + attr(basis_df, "seed") <- seed + class(basis_df) <- c("basis_df", class(basis_df)) - cli::cli_inform("fit kernel smoothing or non-linear least square ...") - res <- switch(method, - ks = fit_ks(basis_df, idx = idx, bin_width = bin_width, - bin_nobs_threshold = bin_nobs_threshold), - nls = fit_nls(basis_df, idx = idx, bin_width = bin_width, - bin_nobs_threshold = bin_nobs_threshold) - ) + return(basis_df) +} - tibble::tibble(basis_df = list(basis_df), measure = res) |> - unnest(measure) +#' @rdname optim +#' @export +print.basis_df <- function(x, width = NULL, ...){ + writeLines(format(x, width = width, ...)) +} + + +#' @importFrom tibble tbl_sum +#' @rdname optim +#' @export +tbl_sum.basis_df <- function(x){ + dim <- x$basis[[1]] |> dim() + if (!is.na(attr(x, "step_size"))) { + line <- glue::glue(length(unique(x$id)), " -> ", nrow(x)) + } else{ + line <- glue::glue(nrow(x)) + } + c("PP index" = attr(x, "idx"),"No. of bases" = line, + "interpolation step size" = attr(x, "step_size"), + "Min. proj. dist." = attr(x, "min_proj_dist")) } interp_bb_best <- function(bb, best, step = 0.02){ @@ -103,66 +144,177 @@ interp_bb_best <- function(bb, best, step = 0.02){ class(t) <- c("history_array") tt <- tourr::interpolate(t, step) tt_mat <- lapply(1:length(tt), function(ll){ - as.vector(tt[,,ll])|> matrix(nrow = n, ncol = d)}) - dist <- as.vector(lapply(tt_mat, function(mat){proj_dist(best, mat)})) - tibble::tibble(basis = tt_mat, dist = dist) + as.vector(tt[,,ll])|> matrix(nrow = n, ncol = d) + }) + tibble::tibble(basis = tt_mat) +} + +#' @rdname optim +#' @export +calc_smoothness <- function(basis_df, start_params = c(0.001, 0.5, 2, 2), + other_gp_params = NULL, verbose = FALSE){ + + if (!inherits(basis_df, "basis_df")){ + cli::cli_abort("Please use {.fn sample_bases} to generate the bases data frame first. See {.code ?calc_smoothness}") + } + + # construct gp + if (verbose) {silent <- FALSE} else {silent <- TRUE} + gp_params <- list(y = basis_df[["index"]], locs = basis_df[["dist"]], + X = as.matrix(rep(1,nrow(basis_df))), + start_parms = start_params, covfun_name = "matern_isotropic", + silent = silent, + other_gp_params + ) + fit <- do.call("fit_model", gp_params) + + cov_params <- suppressMessages(tibble::as_tibble_row(fit$covparms, .name_repair = "unique")) + colnames(cov_params) <- c("variance", "range", "smoothness", "nugget", "convergence") + cov_params <- cov_params |> dplyr::mutate(convergence = fit$conv) + + # return + res <- tibble::as_tibble(cov_params) + attr(res, "basis_df") <- basis_df + attr(res, "fit_res") <- fit + class(res) <- c("smoothness_res", class(res)) + return(res) } + +globalVariables(c("basis", "sine1000")) + + +#' @rdname optim #' @export +print.smoothness_res <- function(x, width = NULL, ...){ + writeLines(format(x, width = width, ...)) +} + +#' @importFrom tibble tbl_sum #' @rdname optim -fit_ks <- function(basis_df, idx, bin_nobs_threshold = 5000, bin_width = 0.005){ - if (nrow(basis_df) > bin_nobs_threshold){ - cli::cli_inform("apply binning: bin_width = {bin_width}") - dist_bin <- ceiling(basis_df$dist / bin_width) * bin_width - basis_df <- basis_df |> +#' @export +tbl_sum.smoothness_res <- function(x){ + + dim <- attr(x, "basis_df")$basis[[1]] |> dim() + line <- glue::glue(nrow(attr(x, "basis_df")), + " [", dim[1], " x ", dim[2], "]") + c("PP index" = attr(attr(x, "basis_df"), "idx"), "No. of bases" = line) +} + +#' @rdname optim +#' @export +calc_squintability <- function(basis_df, method = c("ks", "nls"), + bin_width = 0.005, other_params = NULL){ + + if (!inherits(basis_df, "basis_df")){ + cli::cli_abort("Please use {.fn sample_bases} to generate the bases data frame first. See {.code ?calc_smoothness}") + } + + cli::cli_inform("Apply bin width of 0.005...") + if (!is.na(bin_width)){ + dist_bin <- ceiling(basis_df[["dist"]] / bin_width) * bin_width + basis_new <- basis_df |> dplyr::bind_cols(dist_bin = dist_bin) |> dplyr::group_by(dist_bin) |> - dplyr::summarise(!!rlang::sym(idx) := mean(!!rlang::sym(idx))) + dplyr::summarise(index = mean(index)) } else{ - basis_df <- basis_df |> dplyr::mutate(dist_bin = dist) + basis_new <- basis_df |> + dplyr::mutate(dist_bin = dist, + index = (index - min(index))/(max(index) - min(index))) + } + + fit_params <- list(basis_new, other_params) + res <- switch(method, + ks = do.call("fit_ks", fit_params), + nls = do.call("fit_nls", fit_params) + ) + + + if (method == "nls"){ + max_dist <- basis_df |> dplyr::pull(dist) |> max() + + res$res <- res$res |> + dplyr::bind_cols(max_dist = max_dist) |> + dplyr::mutate( + dd = (1/(1 + exp(-theta3 * theta2)) - 1/(1 + exp(theta3 * (max_dist)))), + squint = abs((theta1 - theta4)/dd * theta2 * theta3 / 4)) |> + dplyr::select(-dd, -max_dist) } - fit <- stats::ksmooth(basis_df$dist_bin, basis_df[[idx]]) + + attr(res$res, "basis_df") <- basis_df + attr(res$res, "fit_res") <- res$fit + attr(res$res, "method") <- method + + class(res$res) <- c("squintability_res", class(res$res)) + return(res$res) + +} + +#' @rdname optim +#' @export +print.squintability_res <- function(x, width = NULL, ...){ + writeLines(format(x, width = width, ...)) +} + +#' @rdname optim +#' @export +tbl_sum.squintability_res <- function(x){ + + dim <- attr(x, "basis_df")$basis[[1]] |> dim() + basis_df <- attr(x, "basis_df") + line <- glue::glue(nrow(attr(x, "basis_df")), + " [", dim[1], " x ", dim[2], "]") + if (length(attr(basis_df, "step_size")) != 0) { + line <- glue::glue(length(unique(basis_df$id)), " -> ", nrow(basis_df)) + } else{ + line <- glue::glue(nrow(x)) + } + c("PP index" = attr(attr(x, "basis_df"), "idx"), + "No. of bases" = line, method = attr(x, "method")) +} + +#' @export +#' @rdname optim +fit_ks <- function(basis_df, idx, other_params = NULL){ + + ks_params <- list(basis_df[["dist_bin"]], basis_df[["index"]], other_params) + fit <- do.call("ksmooth", ks_params) ks_dt <- tibble::tibble(x = fit$x, y = fit$y) |> dplyr::mutate(dy = c(NA, -diff(y) / diff(x))) largest <- ks_dt |> dplyr::filter(dy == max(dy, na.rm = TRUE)) - tibble::tibble(idx = idx, max_x = largest$x, max_dev = largest$dy, - squintability = max_dev * max_x) + list(res = tibble::tibble(idx = idx, max_x = largest$x, max_d = largest$dy, + squint = max_d * max_x), + fit_res = NULL) } #' @export #' @rdname optim -fit_nls <- function(basis_df, idx, bin_nobs_threshold = 5000, bin_width = 0.005, - nls_params = list(start = list(theta1 = 1, theta2 = 1, theta3 = 50, theta4 = 0))){ - if (nrow(basis_df) > bin_nobs_threshold){ - cli::cli_inform("apply binning: bin_width = {bin_width}") - dist_bin <- ceiling(basis_df$dist / bin_width) * bin_width - basis_df <- basis_df |> - dplyr::bind_cols(dist_bin = dist_bin ) |> - dplyr::group_by(dist_bin) |> - dplyr::summarise(idx := mean(!!rlang::sym(idx))) - } else{ - dist_bin <- ceiling(basis_df$dist / bin_width) * bin_width - basis_df <- basis_df |> dplyr::bind_cols(dist_bin = dist_bin) |> - dplyr::rename(idx = !!dplyr::sym(idx)) - } - ff <- function(x, theta2, theta3){ - 1 / (1 + exp(theta3 * (x - theta2))) - } +fit_nls <- function(basis_df, other_params = NULL){ + + ff <- function(x, theta2, theta3){ 1 / (1 + exp(theta3 * (x - theta2)))} + ff_ratio <- function(x, theta2, theta3){ (ff(x, theta2, theta3) - ff(max(x), theta2, theta3))/ (ff(0, theta2, theta3) - ff(max(x), theta2, theta3)) } nls_prms <- list( - formula = idx ~ (theta1 - theta4) * ff_ratio(dist_bin, theta2, theta3) + theta4, - data = basis_df) |> append(nls_params) + formula = index ~ (theta1 - theta4) * ff_ratio(dist_bin, theta2, theta3) + theta4, data = basis_df) |> + append(other_params) + + if (!"start" %in% names(other_params)){ + nls_prms <- c(nls_prms, list(start = list(theta1 = 1, theta2 = 1, theta3 = 50, theta4 = 0))) + } - model <- do.call("nls", nls_prms) + fit <- do.call("nls", nls_prms) + theta_params <- stats::coef(fit) + list(res = tibble::as_tibble_row(theta_params), + fit = fit) - theta_params <- stats::coef(model) - tibble::tibble(idx = idx) |> dplyr::bind_cols(tibble::as_tibble_row(theta_params)) } -globalVariables(c("dist", "dist_bin", "dist", "y", "x", "dy", "max_dev", "max_x", "aa", "measure")) +globalVariables(c("dist", "dist_bin", "dist", "y", "x", "dy", + "max_d", "max_x", "aa", "measure", "index", "theta1", + "theta2", "theta3", "theta4", "dd")) diff --git a/man/optim.Rd b/man/optim.Rd index 87ad292..94c1228 100644 --- a/man/optim.Rd +++ b/man/optim.Rd @@ -1,22 +1,36 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calc-smoothness.R, R/calc-squintability.R -\name{calc_smoothness} +% Please edit documentation in R/calc-squintability.R +\name{sample_bases} +\alias{sample_bases} +\alias{print.basis_df} +\alias{tbl_sum.basis_df} \alias{calc_smoothness} \alias{print.smoothness_res} \alias{tbl_sum.smoothness_res} \alias{calc_squintability} +\alias{print.squintability_res} +\alias{tbl_sum.squintability_res} \alias{fit_ks} \alias{fit_nls} \title{Function to calculate smoothness and squintability} \usage{ -calc_smoothness( +sample_bases( idx, data = sine1000, n_basis = 300, - n = 6, - d = 2, best = matrix(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1), nrow = 6), - start_parms = c(0.001, 0.5, 2, 2), + min_proj_dist = NA, + step_size = NA, + seed = 123 +) + +\method{print}{basis_df}(x, width = NULL, ...) + +\method{tbl_sum}{basis_df}(x) + +calc_smoothness( + basis_df, + start_params = c(0.001, 0.5, 2, 2), other_gp_params = NULL, verbose = FALSE ) @@ -26,100 +40,85 @@ calc_smoothness( \method{tbl_sum}{smoothness_res}(x) calc_squintability( - idx, - data = sine1000, - return_early = FALSE, + basis_df, method = c("ks", "nls"), - n_basis = 50, - n = 6, - d = 2, - proj_dist_threshold = 1.5, - step = 0.005, - best = matrix(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1), nrow = 6), - bin_nobs_threshold = 5000, bin_width = 0.005, - sampling_seed = 123 + other_params = NULL ) -fit_ks(basis_df, idx, bin_nobs_threshold = 5000, bin_width = 0.005) +\method{print}{squintability_res}(x, width = NULL, ...) -fit_nls( - basis_df, - idx, - bin_nobs_threshold = 5000, - bin_width = 0.005, - nls_params = list(start = list(theta1 = 1, theta2 = 1, theta3 = 50, theta4 = 0)) -) +\method{tbl_sum}{squintability_res}(x) + +fit_ks(basis_df, idx, other_params = NULL) + +fit_nls(basis_df, other_params = NULL) } \arguments{ \item{idx}{character, the name of projection pursuit index function, e.g. -"holes" (see the \pkg{tourr} package for index examples)} - -\item{data}{matrix, the data to be projected} - -\item{n_basis}{numeric, the number of random basis to generate for calculating smoothness} - -\item{n}{dimensionality of data} +"holes"} -\item{d}{dimensionality of target projection} +\item{data}{a matrix or data frame, the high dimensional data to be projected} -\item{best}{a matrix, the theoretical best projection matrix, used to calculate -projection distance with the simulated random bases.} +\item{n_basis}{numeric, the number of random bases to generate} -\item{start_parms}{Optionally specified starting values for parameters. -If \code{NULL}, -fit_model will select default starting values.} +\item{best}{a matrix, the theoretical/ empirical best projection matrix to +calculate the projection distance from the simulated random bases.} -\item{other_gp_params}{list, other parameters to be passed to \code{GpGp::fit_model}} +\item{min_proj_dist}{only for squintability, the threshold for projection +distance for the random basis to be considered in sampling} -\item{verbose}{logical, whether to print optimisation progression when -fitting the Gaussian process} +\item{step_size}{numeric, step size for interpolating from each random basis to +the best basis, recommend 0.005} -\item{x}{any \R object (conceptually); typically numeric.} +\item{seed}{numeric, seed for sampling random bases} -\item{width}{\code{default} method: the \emph{minimum} field width or - \code{NULL} or \code{0} for no restriction. +\item{x}{objects with specialised printing methods} - \code{AsIs} method: the \emph{maximum} field width for non-character - objects. \code{NULL} corresponds to the default \code{12}. - } +\item{width}{only used when \code{max.levels} is NULL, see above.} \item{...}{further arguments passed to or from other methods.} -\item{return_early}{logical, whether to return early of all the bases -before fitting the kernel or non-linear least square. This can be useful if -the index value evaluation is time-consuming and the user wants to save a copy -before fitting the kernel or non-linear least square.} +\item{basis_df}{the basis data frame returned from \code{sample_bases}} -\item{method}{only for squintability, the method to calculate squintability, -either through kernel smoothing ("ks") or non-linear least square ("nls")} +\item{start_params}{list, the starting parameters for the Gaussian process +for smoothness} -\item{proj_dist_threshold}{only for squintability, the threshold for projection -distance for the random basis to be considered in sampling} +\item{other_gp_params}{list, additional parameters to be passed to +[GpGp::fit_model()] +for calculating smoothness} -\item{step}{only for squintability, the step size for interpolation, -recommend between 0.01 and 0.1} - -\item{bin_nobs_threshold}{numeric, only for squintability, the threshold -number of observations for -applying binning before fitting the kernel} - -\item{bin_width}{only for squintability, the bin size for binning the data -before fitting the kernel, recommend to set as the same as step parameter} +\item{verbose}{logical, whether to print optimisation progression when +fitting the Gaussian process} -\item{sampling_seed}{the seed used for sampling the random basis} +\item{method}{either "ks" (kernel smoothing) or "nls" (non-linear least +square) for calculating squintability.} -\item{basis_df}{a basis data frame, returned from \code{calc_squintability -(..., return_early = TRUE)}} +\item{bin_width}{numeric, the bin width to average the index value +before fitting the kernel, recommend to set as the same as `step` parameter} -\item{nls_params}{additional parameter for fitting the nls model, see -\code{stats::nls()}} +\item{other_params}{list additional parameters for fitting kernel smoothing +or non-linear least square, see [stats::ksmooth()] and +[stats::nls()] for details} } \description{ Function to calculate smoothness and squintability } \examples{ -library(tourr) -calc_smoothness("holes", data = pipe1000) +# define the holes index as per tourr::holes +holes <- function() { +function(mat) { + n <- nrow(mat) + d <- ncol(mat) + num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2))) + den <- 1 - exp(-d / 2) + + num / den +} +} +basis_smoothness <- sample_bases(idx = "holes") +calc_smoothness(basis_smoothness) +basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5) +calc_squintability(basis_squint, method = "ks", bin_width = 0.01) }