From e79a2a53d8ac0b445f387b4b0ae9b56bb2cd50a7 Mon Sep 17 00:00:00 2001
From: huizezhang-sherry <huizezhangsh@gmail.com>
Date: Mon, 17 Jun 2024 12:32:14 -0500
Subject: [PATCH] re-engineer the smoothness and squintability calculation

---
 NAMESPACE              |   6 +
 R/calc-smoothness.R    |  85 ----------
 R/calc-squintability.R | 358 +++++++++++++++++++++++++++++------------
 man/optim.Rd           | 141 ++++++++--------
 4 files changed, 331 insertions(+), 259 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 314f5b3..e1f347e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(print,basis_df)
 S3method(print,smoothness_res)
+S3method(print,squintability_res)
+S3method(tbl_sum,basis_df)
 S3method(tbl_sum,smoothness_res)
+S3method(tbl_sum,squintability_res)
 export("%>%")
 export(add_anchor)
 export(add_anno)
@@ -48,6 +52,7 @@ export(get_start)
 export(get_theo)
 export(plot_projection)
 export(prep_space_tour)
+export(sample_bases)
 export(scale_color_continuous_botanical)
 export(scale_color_discrete_botanical)
 export(scale_fill_continuous_botanical)
@@ -60,6 +65,7 @@ importFrom(magrittr,"%>%")
 importFrom(progress,progress_bar)
 importFrom(rlang,.data)
 importFrom(rlang,`:=`)
+importFrom(stats,ksmooth)
 importFrom(tibble,tbl_sum)
 importFrom(tidyr,unnest)
 importFrom(tourr,basis_random)
diff --git a/R/calc-smoothness.R b/R/calc-smoothness.R
index f77e3bc..e69de29 100644
--- a/R/calc-smoothness.R
+++ b/R/calc-smoothness.R
@@ -1,85 +0,0 @@
-#' Function to calculate smoothness and squintability
-#'
-#' @param idx character, the name of projection pursuit index function, e.g.
-#' "holes" (see the \pkg{tourr} package for index examples)
-#' @param n_basis numeric, the number of random basis to generate for calculating smoothness
-#' @inheritParams tourr::basis_random
-#' @param best a matrix, the theoretical best projection matrix, used to calculate
-#' projection distance with the simulated random bases.
-#' @param data matrix, the data to be projected
-#' @param other_gp_params list, other parameters to be passed to \code{GpGp::fit_model}
-#' @param verbose logical, whether to print optimisation progression when
-#' fitting the Gaussian process
- #' @inheritParams GpGp::fit_model
-#' @inheritParams base::format
-#' @inheritParams base::print
-#' @examples
-#' library(tourr)
-#' calc_smoothness("holes", data = pipe1000)
-#'
-#' @rdname optim
-#' @export
-calc_smoothness <- function(idx, data = sine1000, n_basis = 300, n = 6, d = 2,
-                            best = matrix(c(0, 0, 0, 0, 1, 0,
-                                            0, 0, 0, 0, 0, 1), nrow = 6),
-                            start_parms = c(0.001, 0.5, 2, 2),
-                            other_gp_params = NULL, verbose = FALSE){
-
-  # sample basis
-  cli::cli_inform("sample random bases ...")
-  idx <- dplyr::sym(idx)
-  set.seed(123)
-  seed <- sample(1: 10000, size = n_basis)
-  basis_df <- tibble::tibble(basis = lapply(1:n_basis, function(i){
-    set.seed(seed[i]); tourr::basis_random(n = n, d = d)})) |>
-    dplyr::rowwise() |>
-    dplyr::mutate(proj_dist = tourr::proj_dist(best, basis),
-                  index_val = get(idx)()(as.matrix(data) %*% basis))
-
-  # construct gp
-  cli::cli_inform("fit the gaussian process ...")
-  if (verbose) {silent <- FALSE} else {silent <- TRUE}
-  gp_params <- list(y = basis_df$index_val, locs = basis_df$proj_dist,
-                    X = as.matrix(rep(1,nrow(basis_df))),
-                    start_parms = start_parms, covfun_name = "matern_isotropic",
-                    silent = silent,
-                    other_gp_params
-                    )
-  fit <- do.call("fit_model", gp_params)
-
-  cov_params <- suppressMessages(tibble::as_tibble_row(fit$covparms, .name_repair = "unique"))
-  colnames(cov_params) <- c("variance", "range", "smoothness", "nugget", "convergence")
-  cov_params <-  cov_params |> dplyr::mutate(convergence = fit$conv, idx = as.character(idx))
-
-  # return
-  res <- tibble::as_tibble(cov_params)
-  attr(res, "basis_df") <- basis_df |> dplyr::ungroup()
-  attr(res, "gp_res") <- fit
-  attr(res, "data") <- tibble::as_tibble(data)
-  attr(res, "best_basis") <- best
-
-  class(res) <- c("smoothness_res", class(res))
-  return(res)
-}
-
-
-globalVariables(c("basis", "sine1000"))
-
-
-#' @rdname optim
-#' @export
-print.smoothness_res <- function(x, width = NULL, ...){
-  writeLines(format(x, width = width, ...))
-}
-
-#' @importFrom tibble tbl_sum
-#' @rdname optim
-#' @export
-tbl_sum.smoothness_res <- function(x){
-
-  cli::cli_rule()
-  dim <- attr(x, "basis_df")$basis[[1]] |> dim()
-  line <- glue::glue("No. of basis = ", nrow(attr(x, "basis_df")),
-                     ", bases [", dim[1], " x ", dim[2], "]")
-  c("Smoothness" = line)
-}
diff --git a/R/calc-squintability.R b/R/calc-squintability.R
index 7031988..ebe338a 100644
--- a/R/calc-squintability.R
+++ b/R/calc-squintability.R
@@ -1,93 +1,134 @@
-#' @param proj_dist_threshold only for squintability, the threshold for projection
+#' Function to calculate smoothness and squintability
+#'
+#' @param idx character, the name of projection pursuit index function, e.g.
+#' "holes"
+#' @param data a matrix or data frame, the high dimensional data to be projected
+#' @param n_basis numeric, the number of random bases to generate
+#' @param best a matrix, the theoretical/ empirical best projection matrix to
+#' calculate the projection distance from the simulated random bases.
+#' @param step_size numeric, step size for interpolating from each random basis to
+#' the best basis, recommend 0.005
+#' @param seed numeric, seed for sampling random bases
+#' @param x objects with specialised printing methods
+#' @param basis_df the basis data frame returned from \code{sample_bases}
+#' @param start_params list, the starting parameters for the Gaussian process
+#' for smoothness
+#' @param other_gp_params list, additional parameters to be passed to
+#' [GpGp::fit_model()]
+#' for calculating smoothness
+#' @param verbose logical, whether to print optimisation progression when
+#' fitting the Gaussian process
+#' @param min_proj_dist only for squintability, the threshold for projection
 #' distance for the random basis to be considered in sampling
-#' @param return_early logical, whether to return early of all the bases
-#' before fitting the kernel or non-linear least square. This can be useful if
-#' the index value evaluation is time-consuming and the user wants to save a copy
-#' before fitting the kernel or non-linear least square.
-#' @param method only for squintability, the method to calculate squintability,
-#' either through kernel smoothing ("ks") or non-linear least square ("nls")
-#' @param step only for squintability, the step size for interpolation,
-#' recommend between 0.01 and 0.1
-#' @param bin_nobs_threshold numeric, only for squintability, the threshold
-#' number of observations for
-#' applying binning before fitting the kernel
-#' @param bin_width only for squintability, the bin size for binning the data
-#' before fitting the kernel, recommend to set as the same as step parameter
-#' @param sampling_seed the seed used for sampling the random basis
-#' @param basis_df a basis data frame, returned from \code{calc_squintability
-#' (..., return_early = TRUE)}
-#' @param nls_params additional parameter for fitting the nls model, see
-#' \code{stats::nls()}
+#' @param method either "ks" (kernel smoothing) or "nls" (non-linear least
+#' square) for calculating squintability.
+#' @param bin_width numeric, the bin width to average the index value
+#' before fitting the kernel, recommend to set as the same as `step` parameter
+#' @param other_params list additional parameters for fitting kernel smoothing
+#' or non-linear least square, see [stats::ksmooth()] and
+#' [stats::nls()] for details
+#' @inheritParams base::print
+#' @importFrom stats ksmooth
+#' @examples
+#' # define the holes index as per tourr::holes
+#' holes <- function() {
+#' function(mat) {
+#'   n <- nrow(mat)
+#'   d <- ncol(mat)
+#'
+#'   num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2)))
+#'   den <- 1 - exp(-d / 2)
+#'
+#'   num / den
+#' }
+#' }
+#' basis_smoothness <- sample_bases(idx = "holes")
+#' calc_smoothness(basis_smoothness)
+#' basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5)
+#' calc_squintability(basis_squint, method = "ks", bin_width = 0.01)
 #' @rdname optim
 #' @export
-calc_squintability <- function(idx, data = sine1000, return_early = FALSE,
-                               method = c("ks", "nls"), n_basis = 50, n = 6, d = 2,
-                               proj_dist_threshold = 1.5, step = 0.005,
-                               best = matrix(c(0, 0, 0, 0, 1, 0,
-                                               0, 0, 0, 0, 0, 1), nrow = 6),
-                               bin_nobs_threshold = 5000, bin_width = 0.005,
-                               sampling_seed = 123
-                               ){
-  cli::cli_inform("sample random bases ...")
-  ## sample basis
-  set.seed(sampling_seed)
-  seed <- sample(1: 10000, size = 1000)
+sample_bases <- function(idx, data = sine1000, n_basis = 300,
+                         best = matrix(c(0, 0, 0, 0, 1, 0,
+                                         0, 0, 0, 0, 0, 1), nrow = 6),
+                         min_proj_dist = NA, step_size = NA, seed = 123){
+
+  dim <- dim(best)
+  n <- dim[1]
+  d <- dim[2]
+  idx <- dplyr::sym(idx)
+  set.seed(seed)
+  seed_vec <- sample(1: 10000, size = 1000)
   bb_lst <- list()
   i <- 1
-  if (!all(dim(best) == c(n, d))){
-    cli::cli_abort("sampled bases and the best basis must have the same dimension,
-                     check the parameter {.field n}, {.field d}, and {.field best}.")
-  }
+
   while (length(bb_lst) < n_basis){
-    set.seed(seed[i])
-    bb <- tourr::basis_random(n = n, d = d)
-    if (tourr::proj_dist(best, bb) > proj_dist_threshold){
-      bb_lst <- c(bb_lst, list(bb))
+    set.seed(seed_vec[i]); bb <- tourr::basis_random(n = n, d = d)
+    if (!is.na(min_proj_dist)){
+      dist <- tourr::proj_dist(bb, best)
+      if (dist > min_proj_dist){bb_lst <- c(bb_lst, list(bb))}
+      i <- i +1
+    } else{
+      bb_lst <- c(bb_lst, list(bb)); i <- i + 1
     }
-    i <- i +1
   }
 
-  ## interpolate between the best and the random basis
-  ## TODO: progress bar here
-  cli::cli_inform("interpolate between the best and the random bases ...")
-  basis_df <- tibble::tibble(id = 1:n_basis) |>
-    dplyr::mutate(res = lapply(bb_lst, function(bb){
-      interp_bb_best(bb = bb, best = best, step = step)
-      })) |>
-    tidyr::unnest(res) |>
-    unnest(dist)
+  if (!is.na(step_size)){
+    basis_df <- tibble::tibble(id = 1:n_basis) |>
+      dplyr::mutate(basis = lapply(bb_lst, function(bb){
+        interp_bb_best(bb = bb, best = best, step = step_size)
+        })) |>
+      tidyr::unnest(basis)
+  } else{
+    basis_df <- tibble::tibble(id = 1:n_basis, basis = bb_lst)
+  }
 
   df_add_idx_val <- function(data, idx, org_data){
     pb$tick()
-    data |> dplyr::mutate(!!rlang::sym(idx) := get(idx)()(org_data %*% basis[[1]]))
+    tibble::as_tibble(data) |> dplyr::mutate(index := get(idx)()(org_data %*% basis[[1]]))
   }
 
-  idx_sym <- rlang::sym(idx)
-  cli::cli_inform("calculate index values for interpolated bases ...")
   pb <- progress::progress_bar$new(total = nrow(basis_df))
   basis_df <- basis_df |>
+    dplyr::mutate(dist = lapply(basis, function(bb){tourr::proj_dist(bb, best)})) |>
+    tidyr::unnest(dist) |>
     dplyr::group_split(aa = dplyr::row_number()) |>
     purrr::map_dfr(~df_add_idx_val(.x, idx, data)) |>
-    dplyr::mutate(!!idx_sym := if (idx %in% c("TIC")) {
-      (!!idx_sym - min(!!idx_sym)) / (max(!!idx_sym) - min(!!idx_sym))
-    } else {
-      !!idx_sym
-    }) |>
     dplyr::select(-aa)
 
- if (return_early) return(basis_df)
+  attr(basis_df, "data") <- tibble::as_tibble(data)
+  attr(basis_df, "idx") <- idx
+  attr(basis_df, "n_basis") <- n_basis
+  attr(basis_df, "best") <- best
+  attr(basis_df, "step_size") <- step_size
+  attr(basis_df, "min_proj_dist") <- min_proj_dist
+  attr(basis_df, "seed") <- seed
+  class(basis_df) <- c("basis_df", class(basis_df))
 
-  cli::cli_inform("fit kernel smoothing or non-linear least square ...")
-  res <- switch(method,
-                ks = fit_ks(basis_df, idx = idx, bin_width = bin_width,
-                            bin_nobs_threshold = bin_nobs_threshold),
-                nls = fit_nls(basis_df, idx = idx, bin_width = bin_width,
-                              bin_nobs_threshold = bin_nobs_threshold)
-                )
+  return(basis_df)
+}
 
-  tibble::tibble(basis_df = list(basis_df), measure = res) |>
-    unnest(measure)
+#' @rdname optim
+#' @export
+print.basis_df <- function(x, width = NULL, ...){
+  writeLines(format(x, width = width, ...))
+}
+
+
+#' @importFrom tibble tbl_sum
+#' @rdname optim
+#' @export
+tbl_sum.basis_df <- function(x){
 
+  dim <- x$basis[[1]] |> dim()
+  if (!is.na(attr(x, "step_size"))) {
+    line <- glue::glue(length(unique(x$id)), " -> ", nrow(x))
+  } else{
+    line <- glue::glue(nrow(x))
+    }
+  c("PP index" = attr(x, "idx"),"No. of bases" = line,
+    "interpolation step size" = attr(x, "step_size"),
+    "Min. proj. dist." = attr(x, "min_proj_dist"))
 }
 
 interp_bb_best <- function(bb, best, step = 0.02){
@@ -103,66 +144,177 @@ interp_bb_best <- function(bb, best, step = 0.02){
   class(t) <- c("history_array")
   tt <- tourr::interpolate(t, step)
   tt_mat <- lapply(1:length(tt), function(ll){
-    as.vector(tt[,,ll])|> matrix(nrow = n, ncol = d)})
-  dist <- as.vector(lapply(tt_mat,  function(mat){proj_dist(best, mat)}))
-  tibble::tibble(basis = tt_mat, dist = dist)
+    as.vector(tt[,,ll])|> matrix(nrow = n, ncol = d)
+  })
+  tibble::tibble(basis = tt_mat)
+}
+
+#' @rdname optim
+#' @export
+calc_smoothness <- function(basis_df, start_params = c(0.001, 0.5, 2, 2),
+                            other_gp_params = NULL, verbose = FALSE){
+
+  if (!inherits(basis_df, "basis_df")){
+    cli::cli_abort("Please use {.fn sample_bases} to generate the bases data frame first. See {.code ?calc_smoothness}")
+  }
+
+  # construct gp
+  if (verbose) {silent <- FALSE} else {silent <- TRUE}
+  gp_params <- list(y = basis_df[["index"]], locs = basis_df[["dist"]],
+                    X = as.matrix(rep(1,nrow(basis_df))),
+                    start_parms = start_params, covfun_name = "matern_isotropic",
+                    silent = silent,
+                    other_gp_params
+  )
+  fit <- do.call("fit_model", gp_params)
+
+  cov_params <- suppressMessages(tibble::as_tibble_row(fit$covparms, .name_repair = "unique"))
+  colnames(cov_params) <- c("variance", "range", "smoothness", "nugget", "convergence")
+  cov_params <-  cov_params |> dplyr::mutate(convergence = fit$conv)
+
+  # return
+  res <- tibble::as_tibble(cov_params)
+  attr(res, "basis_df") <- basis_df
+  attr(res, "fit_res") <- fit
+  class(res) <- c("smoothness_res", class(res))
+  return(res)
 }
 
+
+globalVariables(c("basis", "sine1000"))
+
+
+#' @rdname optim
 #' @export
+print.smoothness_res <- function(x, width = NULL, ...){
+  writeLines(format(x, width = width, ...))
+}
+
+#' @importFrom tibble tbl_sum
 #' @rdname optim
-fit_ks <- function(basis_df, idx, bin_nobs_threshold = 5000, bin_width = 0.005){
-  if (nrow(basis_df) > bin_nobs_threshold){
-    cli::cli_inform("apply binning: bin_width = {bin_width}")
-    dist_bin <- ceiling(basis_df$dist / bin_width) * bin_width
-    basis_df <- basis_df |>
+#' @export
+tbl_sum.smoothness_res <- function(x){
+
+  dim <- attr(x, "basis_df")$basis[[1]] |> dim()
+  line <- glue::glue(nrow(attr(x, "basis_df")),
+                     " [", dim[1], " x ", dim[2], "]")
+  c("PP index" = attr(attr(x, "basis_df"), "idx"), "No. of bases" = line)
+}
+
+#' @rdname optim
+#' @export
+calc_squintability <- function(basis_df, method = c("ks", "nls"),
+                               bin_width = 0.005, other_params = NULL){
+
+  if (!inherits(basis_df, "basis_df")){
+    cli::cli_abort("Please use {.fn sample_bases} to generate the bases data frame first. See {.code ?calc_smoothness}")
+  }
+
+  cli::cli_inform("Apply bin width of 0.005...")
+  if (!is.na(bin_width)){
+    dist_bin <- ceiling(basis_df[["dist"]] / bin_width) * bin_width
+    basis_new <- basis_df |>
       dplyr::bind_cols(dist_bin = dist_bin) |>
       dplyr::group_by(dist_bin) |>
-      dplyr::summarise(!!rlang::sym(idx) := mean(!!rlang::sym(idx)))
+      dplyr::summarise(index = mean(index))
   } else{
-    basis_df <- basis_df |> dplyr::mutate(dist_bin = dist)
+    basis_new <- basis_df |>
+      dplyr::mutate(dist_bin = dist,
+                    index = (index - min(index))/(max(index) - min(index)))
+  }
+
+  fit_params <- list(basis_new, other_params)
+  res <- switch(method,
+                ks = do.call("fit_ks", fit_params),
+                nls = do.call("fit_nls", fit_params)
+                )
+
+
+  if (method == "nls"){
+    max_dist <- basis_df |> dplyr::pull(dist) |> max()
+
+    res$res <- res$res  |>
+      dplyr::bind_cols(max_dist = max_dist) |>
+      dplyr::mutate(
+        dd = (1/(1 + exp(-theta3 * theta2)) - 1/(1 + exp(theta3 * (max_dist)))),
+        squint = abs((theta1 - theta4)/dd  * theta2 * theta3 / 4)) |>
+      dplyr::select(-dd, -max_dist)
   }
 
-  fit <- stats::ksmooth(basis_df$dist_bin, basis_df[[idx]])
+
+  attr(res$res, "basis_df") <- basis_df
+  attr(res$res, "fit_res") <- res$fit
+  attr(res$res, "method") <- method
+
+  class(res$res) <- c("squintability_res", class(res$res))
+  return(res$res)
+
+}
+
+#' @rdname optim
+#' @export
+print.squintability_res <- function(x, width = NULL, ...){
+  writeLines(format(x, width = width, ...))
+}
+
+#' @rdname optim
+#' @export
+tbl_sum.squintability_res <- function(x){
+
+  dim <- attr(x, "basis_df")$basis[[1]] |> dim()
+  basis_df <- attr(x, "basis_df")
+  line <- glue::glue(nrow(attr(x, "basis_df")),
+                     " [", dim[1], " x ", dim[2], "]")
+  if (length(attr(basis_df, "step_size")) != 0) {
+    line <- glue::glue(length(unique(basis_df$id)), " -> ", nrow(basis_df))
+  } else{
+    line <- glue::glue(nrow(x))
+  }
+  c("PP index" = attr(attr(x, "basis_df"), "idx"),
+    "No. of bases" = line, method = attr(x, "method"))
+}
+
+#' @export
+#' @rdname optim
+fit_ks <- function(basis_df, idx, other_params = NULL){
+
+  ks_params <- list(basis_df[["dist_bin"]], basis_df[["index"]], other_params)
+  fit <- do.call("ksmooth", ks_params)
   ks_dt <- tibble::tibble(x = fit$x, y = fit$y) |>
     dplyr::mutate(dy = c(NA, -diff(y) / diff(x)))
   largest <- ks_dt |> dplyr::filter(dy == max(dy, na.rm = TRUE))
-  tibble::tibble(idx = idx, max_x = largest$x, max_dev = largest$dy,
-         squintability = max_dev * max_x)
+  list(res = tibble::tibble(idx = idx, max_x = largest$x, max_d = largest$dy,
+                            squint = max_d * max_x),
+       fit_res = NULL)
 
 }
 
 #' @export
 #' @rdname optim
-fit_nls <- function(basis_df, idx, bin_nobs_threshold = 5000, bin_width = 0.005,
-                    nls_params = list(start = list(theta1 = 1, theta2 = 1, theta3 = 50, theta4 = 0))){
-  if (nrow(basis_df) > bin_nobs_threshold){
-    cli::cli_inform("apply binning: bin_width = {bin_width}")
-    dist_bin <- ceiling(basis_df$dist / bin_width) * bin_width
-    basis_df <- basis_df |>
-      dplyr::bind_cols(dist_bin = dist_bin ) |>
-      dplyr::group_by(dist_bin) |>
-      dplyr::summarise(idx := mean(!!rlang::sym(idx)))
-  } else{
-    dist_bin <- ceiling(basis_df$dist / bin_width) * bin_width
-    basis_df <- basis_df |> dplyr::bind_cols(dist_bin = dist_bin) |>
-       dplyr::rename(idx = !!dplyr::sym(idx))
-  }
-  ff <- function(x, theta2, theta3){
-    1 / (1 + exp(theta3 * (x - theta2)))
-  }
+fit_nls <- function(basis_df, other_params = NULL){
+
+  ff <- function(x, theta2, theta3){ 1 / (1 + exp(theta3 * (x - theta2)))}
+
   ff_ratio <- function(x, theta2, theta3){
     (ff(x, theta2, theta3) - ff(max(x), theta2, theta3))/
       (ff(0, theta2, theta3) - ff(max(x), theta2, theta3))
   }
 
   nls_prms <- list(
-    formula = idx ~ (theta1 - theta4) * ff_ratio(dist_bin, theta2, theta3) + theta4,
-    data = basis_df) |> append(nls_params)
+    formula = index ~ (theta1 - theta4) * ff_ratio(dist_bin, theta2, theta3) + theta4, data = basis_df) |>
+    append(other_params)
+
+  if (!"start" %in% names(other_params)){
+    nls_prms <- c(nls_prms, list(start = list(theta1 = 1, theta2 = 1, theta3 = 50, theta4 = 0)))
+  }
 
-  model <-  do.call("nls", nls_prms)
+  fit <-  do.call("nls", nls_prms)
+  theta_params <- stats::coef(fit)
+  list(res = tibble::as_tibble_row(theta_params),
+       fit = fit)
 
-  theta_params <- stats::coef(model)
-  tibble::tibble(idx = idx) |> dplyr::bind_cols(tibble::as_tibble_row(theta_params))
 }
 
-globalVariables(c("dist", "dist_bin", "dist", "y", "x", "dy", "max_dev", "max_x", "aa", "measure"))
+globalVariables(c("dist", "dist_bin", "dist", "y", "x", "dy",
+                  "max_d", "max_x", "aa", "measure", "index", "theta1",
+                  "theta2", "theta3", "theta4", "dd"))
diff --git a/man/optim.Rd b/man/optim.Rd
index 87ad292..94c1228 100644
--- a/man/optim.Rd
+++ b/man/optim.Rd
@@ -1,22 +1,36 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/calc-smoothness.R, R/calc-squintability.R
-\name{calc_smoothness}
+% Please edit documentation in R/calc-squintability.R
+\name{sample_bases}
+\alias{sample_bases}
+\alias{print.basis_df}
+\alias{tbl_sum.basis_df}
 \alias{calc_smoothness}
 \alias{print.smoothness_res}
 \alias{tbl_sum.smoothness_res}
 \alias{calc_squintability}
+\alias{print.squintability_res}
+\alias{tbl_sum.squintability_res}
 \alias{fit_ks}
 \alias{fit_nls}
 \title{Function to calculate smoothness and squintability}
 \usage{
-calc_smoothness(
+sample_bases(
   idx,
   data = sine1000,
   n_basis = 300,
-  n = 6,
-  d = 2,
   best = matrix(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1), nrow = 6),
-  start_parms = c(0.001, 0.5, 2, 2),
+  min_proj_dist = NA,
+  step_size = NA,
+  seed = 123
+)
+
+\method{print}{basis_df}(x, width = NULL, ...)
+
+\method{tbl_sum}{basis_df}(x)
+
+calc_smoothness(
+  basis_df,
+  start_params = c(0.001, 0.5, 2, 2),
   other_gp_params = NULL,
   verbose = FALSE
 )
@@ -26,100 +40,85 @@ calc_smoothness(
 \method{tbl_sum}{smoothness_res}(x)
 
 calc_squintability(
-  idx,
-  data = sine1000,
-  return_early = FALSE,
+  basis_df,
   method = c("ks", "nls"),
-  n_basis = 50,
-  n = 6,
-  d = 2,
-  proj_dist_threshold = 1.5,
-  step = 0.005,
-  best = matrix(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1), nrow = 6),
-  bin_nobs_threshold = 5000,
   bin_width = 0.005,
-  sampling_seed = 123
+  other_params = NULL
 )
 
-fit_ks(basis_df, idx, bin_nobs_threshold = 5000, bin_width = 0.005)
+\method{print}{squintability_res}(x, width = NULL, ...)
 
-fit_nls(
-  basis_df,
-  idx,
-  bin_nobs_threshold = 5000,
-  bin_width = 0.005,
-  nls_params = list(start = list(theta1 = 1, theta2 = 1, theta3 = 50, theta4 = 0))
-)
+\method{tbl_sum}{squintability_res}(x)
+
+fit_ks(basis_df, idx, other_params = NULL)
+
+fit_nls(basis_df, other_params = NULL)
 }
 \arguments{
 \item{idx}{character, the name of projection pursuit index function, e.g.
-"holes" (see the \pkg{tourr} package for index examples)}
-
-\item{data}{matrix, the data to be projected}
-
-\item{n_basis}{numeric, the number of random basis to generate for calculating smoothness}
-
-\item{n}{dimensionality of data}
+"holes"}
 
-\item{d}{dimensionality of target projection}
+\item{data}{a matrix or data frame, the high dimensional data to be projected}
 
-\item{best}{a matrix, the theoretical best projection matrix, used to calculate
-projection distance with the simulated random bases.}
+\item{n_basis}{numeric, the number of random bases to generate}
 
-\item{start_parms}{Optionally specified starting values for parameters. 
-If \code{NULL},
-fit_model will select default starting values.}
+\item{best}{a matrix, the theoretical/ empirical best projection matrix to
+calculate the projection distance from the simulated random bases.}
 
-\item{other_gp_params}{list, other parameters to be passed to \code{GpGp::fit_model}}
+\item{min_proj_dist}{only for squintability, the threshold for projection
+distance for the random basis to be considered in sampling}
 
-\item{verbose}{logical, whether to print optimisation progression when
-fitting the Gaussian process}
+\item{step_size}{numeric, step size for interpolating from each random basis to
+the best basis, recommend 0.005}
 
-\item{x}{any \R object (conceptually); typically numeric.}
+\item{seed}{numeric, seed for sampling random bases}
 
-\item{width}{\code{default} method: the \emph{minimum} field width or
-    \code{NULL} or \code{0} for no restriction.
+\item{x}{objects with specialised printing methods}
 
-    \code{AsIs} method: the \emph{maximum} field width for non-character
-    objects.  \code{NULL} corresponds to the default \code{12}.
-  }
+\item{width}{only used when \code{max.levels} is NULL, see above.}
 
 \item{...}{further arguments passed to or from other methods.}
 
-\item{return_early}{logical, whether to return early of all the bases
-before fitting the kernel or non-linear least square. This can be useful if
-the index value evaluation is time-consuming and the user wants to save a copy
-before fitting the kernel or non-linear least square.}
+\item{basis_df}{the basis data frame returned from \code{sample_bases}}
 
-\item{method}{only for squintability, the method to calculate squintability,
-either through kernel smoothing ("ks") or non-linear least square ("nls")}
+\item{start_params}{list, the starting parameters for the Gaussian process
+for smoothness}
 
-\item{proj_dist_threshold}{only for squintability, the threshold for projection
-distance for the random basis to be considered in sampling}
+\item{other_gp_params}{list, additional parameters to be passed to
+[GpGp::fit_model()]
+for calculating smoothness}
 
-\item{step}{only for squintability, the step size for interpolation,
-recommend between 0.01 and 0.1}
-
-\item{bin_nobs_threshold}{numeric, only for squintability, the threshold
-number of observations for
-applying binning before fitting the kernel}
-
-\item{bin_width}{only for squintability, the bin size for binning the data
-before fitting the kernel, recommend to set as the same as step parameter}
+\item{verbose}{logical, whether to print optimisation progression when
+fitting the Gaussian process}
 
-\item{sampling_seed}{the seed used for sampling the random basis}
+\item{method}{either "ks" (kernel smoothing) or "nls" (non-linear least
+square) for calculating squintability.}
 
-\item{basis_df}{a basis data frame, returned from \code{calc_squintability
-(..., return_early = TRUE)}}
+\item{bin_width}{numeric, the bin width to average the index value
+before fitting the kernel, recommend to set as the same as `step` parameter}
 
-\item{nls_params}{additional parameter for fitting the nls model, see
-\code{stats::nls()}}
+\item{other_params}{list additional parameters for fitting kernel smoothing
+or non-linear least square, see [stats::ksmooth()] and
+[stats::nls()] for details}
 }
 \description{
 Function to calculate smoothness and squintability
 }
 \examples{
-library(tourr)
-calc_smoothness("holes", data = pipe1000)
+# define the holes index as per tourr::holes
+holes <- function() {
+function(mat) {
+  n <- nrow(mat)
+  d <- ncol(mat)
 
+  num <- 1 - 1 / n * sum(exp(-0.5 * rowSums(mat^2)))
+  den <- 1 - exp(-d / 2)
+
+  num / den
+}
+}
+basis_smoothness <- sample_bases(idx = "holes")
+calc_smoothness(basis_smoothness)
+basis_squint <- sample_bases(idx = "holes", n_basis = 100, step_size = 0.01, min_proj_dist = 1.5)
+calc_squintability(basis_squint, method = "ks", bin_width = 0.01)
 }