Skip to content

Commit

Permalink
new function to calculte squintability'
Browse files Browse the repository at this point in the history
  • Loading branch information
huizezhang-sherry committed May 21, 2024
1 parent 7184403 commit 7da9d44
Show file tree
Hide file tree
Showing 47 changed files with 315 additions and 146 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Imports:
ggrepel,
ggforce,
tidyr,
GpGp
GpGp,
cli
RoxygenNote: 7.3.1
Depends:
R (>= 2.10)
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ export(bind_random_matrix)
export(bind_theoretical)
export(botanical_pal)
export(botanical_palettes)
export(calc_smoothness)
export(calc_squintability)
export(clean_method)
export(compute_pca)
export(explore_space_end)
Expand Down Expand Up @@ -46,8 +48,11 @@ export(scale_fill_continuous_botanical)
export(scale_fill_discrete_botanical)
export(theme_fern)
importFrom(GpGp,fit_model)
importFrom(cli,cli_abort)
importFrom(ggplot2,"%+replace%")
importFrom(magrittr,"%>%")
importFrom(rlang,.data)
importFrom(tidyr,unnest)
importFrom(tourr,basis_random)
importFrom(tourr,interpolate)
importFrom(tourr,proj_dist)
30 changes: 16 additions & 14 deletions R/calc-smoothness.R
Original file line number Diff line number Diff line change
@@ -1,44 +1,46 @@
#' Function to calculate smoothness
#' Function to calculate smoothness and squintability
#'
#' @param idx character, the name of projection pursuit index function, e.g.
#' "holes" (see the \pkg{tourr} package for index examples)
#' @param size numeric, the number of random basis to generate for calculating smoothness
#' @param n_basis numeric, the number of random basis to generate for calculating smoothness
#' @inheritParams tourr::basis_random
#' @param best a matrix, the theoretical best projection matrix, used to calculate
#' projection distance with the simulated random bases.
#' @param data matrix, the data to be projected
#' @inheritParams GpGp::fit_model
#' @param other_gp_params list, other parameters to be passed to \code{GpGp::fit_model}
calc_smoothness <- function(idx, data = sine1000, size = 300, n = 6, d = 2,
#' @rdname optim
#' @export
calc_smoothness <- function(idx, data = sine1000, n_basis = 300, n = 6, d = 2,
best = matrix(c(0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 1), nrow = 6),
start_parms = c(0.001, 0.5, 2, 2),
other_gp_params = list(NULL)
){

# sample basis
idx <- dplyr::sym(idx)
set.seed(123)
seed <- sample(1: 10000, size = size)
basis_df <- tibble::tibble(basis = lapply(1:size, function(i){
seed <- sample(1: 10000, size = n_basis)
basis_df <- tibble::tibble(basis = lapply(1:n_basis, function(i){
set.seed(seed[i]); tourr::basis_random(n = n, d = d)}))
dplyr::rowwise() |>
dplyr::mutate(proj_dist = tourr::proj_dist(best, basis),
index_val = get(idx)()(as.matrix(data) %*% basis))

gp_params <- list(
y = basis_df$index_val, locs = basis_df$proj_dist,
X = as.matrix(rep(1,nrow(basis_df))),
start_parms = start_parms,
covfun_name = "matern_isotropic",
other_gp_params
)

# construct gp
gp_params <- list(y = basis_df$index_val, locs = basis_df$proj_dist,
X = as.matrix(rep(1,nrow(basis_df))),
start_parms = start_parms, covfun_name = "matern_isotropic",
other_gp_params
)
fit <- do.call("GpGp::fit_model", gp_params)
cov_params <- tibble::as_tibble_row(fit$covparms, .name_repair = "unique")
colnames(cov_params) <- c("variance", "range", "smoothness", "nugget", "convergence")
cov_params <- cov_params |> dplyr::mutate(convergence = fit$conv, idx = as.character(idx))

list(basis = basis_df, gp_res = fit, cov_params = cov_params)
# return
list(basis_df = basis_df, gp_res = fit, measure = cov_params)
}


Expand Down
111 changes: 111 additions & 0 deletions R/calc-squintability.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#' @param proj_dist_threshold only for squintability, the threshold for projection
#' distance for the random basis to be considered in sampling
#' @param method only for squintability, the method to calculate squintability,
#' either through kernel smoothing ("ks") or non-linear least square ("nls")
#' @param step only for squintability, the step size for interpolation,
#' recommend between 0.01 and 0.1
#' @param bin_nobs_threshold numeric, only for squintability, the threshold
#' number of observations for
#' applying binning before fitting the kernel
#' @param bin_size only for squintability, the bin size for binning the data
#' before fitting the kernel
#' @rdname optim
#' @export
calc_squintability <- function(idx, data = sine1000,
method = c("ks", "nls"), n_basis, n = 6, d = 2,
proj_dist_threshold = 1.5, step = 0.02,
best = matrix(c(0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 1), nrow = 6),
bin_nobs_threshold = 5000, bin_size = 0.02
){

## sample basis
set.seed(123)
seed <- sample(1: 10000, size = 1000)
basis_lst <- list()
i <- 1
while (length(basis_lst) < n_basis){
set.seed(seed[i])
bb <- tourr::basis_random(n = n, d = d)
if (tourr::proj_dist(best, bb) > proj_dist_threshold){
bb_lst <- c(bb_lst, list(bb))
}
i <- i +1
}

## interpolate between the best and the random basis
## TODO: progress bar here
basis_df <- tibble::tibble(id = 1:n_basis) |>
dplyr::mutate(res = lapply(bb_lst, function(bb){
interp_bb_best(bb = bb, best = best, step = step)
})) |>
tidyr::unnest(res) |>
dplyr::rowwise() |>
dplyr::mutate(!!dplyr::sym(idx) := get(idx)()(data %*% basis)) |>
dplyr::ungroup()

res <- switch(method,
ks = fit_ks(basis_df, idx = idx, bin_nobs_threshold, bin_size),
nls = fit_nls(basis_df, idx = idx, bin_nobs_threshold, bin_size)
)

list(basis_df = basis_df, measure = res)

}

interp_bb_best <- function(bb, best, step = 0.02){

if (!identical(dim(bb), dim(best))){
cli::cli_abort("random basis and the best basis must have the same dimension")
} else{
n <- dim(bb)[1]
d <- dim(bb)[2]
}

t <- array(unlist(list(best, bb)), dim = c(n = n, d = d, 2))
class(t) <- c("history_array")
tt <- tourr::interpolate(t, step)
tt_mat <- lapply(1:length(tt), function(ll){
as.vector(tt[,,ll])|> matrix(nrow = n, ncol = d)})
dist <- as.vector(lapply(tt_mat, function(mat){proj_dist(best, mat)}))
tibble::tibble(basis = tt_mat, dist = dist)
}

fit_ks <- function(basis_df, idx, bin_nobs_threshold, bin_size){

if (nrow(basis_df) > bin_nobs_threshold){
cli::cli_abort("apply binning before fitting the kernel smoother with bin_size = {bin_size}")
basis_df <- basis_df |>
dplyr::mutate(dist_bin = ceiling(dist / bin_size) * bin_size)
} else{
basis_df <- basis_df |> dplyr::mutate(dist_bin = dist)
}

fit <- stats::ksmooth(basis_df$dist_bin, basis_df[[idx]])
ks_dt <- tibble::tibble(x = fit$x, y = fit$y) |>
dplyr::mutate(dy = c(NA, -diff(y) / diff(x)))
largest <- ks_dt |> dplyr::filter(dy == max(dy, na.rm = TRUE))
tibble::tibble(idx = idx, max_x = largest$x, max_dev = largest$dy,
squintability = max_dev * max_x)

}

fit_nls <- function(basis_df, idx, bin_nobs_threshold, bin_size){

if (nrow(basis_df) > bin_nobs_threshold){
cli::cli_abort("apply binning before fitting the kernel smoother with bin_size = {bin_size}")
basis_df <- basis_df |>
dplyr::mutate(dist_bin = ceiling(dist / bin_size) * bin_size,
dist_bin = dist_bin / pi * 180)
} else{
basis_df <- basis_df |> dplyr::mutate(dist_bin = dist / pi * 180)
}

model = stats::nls(idx ~ theta1/(1 + exp(-theta2 + theta3 * dist_bin)),
start = list(theta1 = 1, theta2 = 5, theta3 = 0.1))
theta_params <- stats::coef(model)
colnames(theta_params) <- paste0("theta", 1:length(theta_params))
tibble::tibble(idx = idx) |> dplyr::bind_cols(theta_params)
}

globalVariables(c("dist", "dist_bin", "dist", "y", "x", "dy", "max_dev", "max_x"))
4 changes: 3 additions & 1 deletion R/ferrn-package.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#' @keywords internal
#' @importFrom tourr basis_random proj_dist
#' @importFrom tourr basis_random proj_dist interpolate
#' @importFrom GpGp fit_model
#' @importFrom cli cli_abort
#' @importFrom tidyr unnest
"_PACKAGE"
3 changes: 3 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ reference:
contents:
- starts_with("boa")
- starts_with("holes")
- title: Calculate optimisation features
contents:
- starts_with("calc")
- title: Miscellaneous
desc: >
Other misc functions
Expand Down
4 changes: 2 additions & 2 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7da9d44

Please sign in to comment.