Skip to content

Commit

Permalink
Parallel bootstrap (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
FBartos authored Jan 13, 2023
1 parent e450186 commit 1708a14
Show file tree
Hide file tree
Showing 11 changed files with 272 additions and 24 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: zcurve
Title: An Implementation of Z-Curves
Version: 2.2.0
Version: 2.3.0
Authors@R: c(
person("František", "Bartoš", email = "f.bartos96@gmail.com", role = c("aut", "cre")),
person("Ulrich", "Schimmack", email = "ulrich.schimmack@utoronto.ca", role = c("aut")))
Expand All @@ -27,6 +27,7 @@ Imports:
Rdpack
LinkingTo: Rcpp
Suggests:
parallel,
spelling,
testthat,
vdiffr
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ export(significant_n)
export(summary.zcurve)
export(z_to_power)
export(zcurve)
export(zcurve.get_option)
export(zcurve.options)
export(zcurve_clustered)
export(zcurve_data)
importFrom(Rcpp,sourceCpp)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## version 2.3.0
- Implementation of parallel bootstrap.

## version 2.2.0
- Implementation of z-curve version that deals with clustered estimates.
- Fixing bootstrap of censored data -- now both the censored and uncensored data are bootstrapped.
Expand Down
17 changes: 14 additions & 3 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#' defaults to \code{"EM"}.
#' @param bootstrap the number of bootstraps for estimating CI. To skip
#' bootstrap specify \code{FALSE}.
#' @param parallel whether the bootstrap should be performed in parallel.
#' Defaults to \code{FALSE}. The implementation is not completely stable
#' and might cause a connection error.
#' @param control additional options for the fitting algorithm more details in
#' \link[=control_EM]{control EM} or \link[=control_density]{control density}.
#'
Expand Down Expand Up @@ -67,7 +70,7 @@
#' # see '?control_EM' and '?control_density' for more information about different
#' # z-curves specifications
#' @seealso [summary.zcurve()], [plot.zcurve()], [control_EM], [control_density]
zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", bootstrap = 1000, control = NULL){
zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", bootstrap = 1000, parallel = FALSE, control = NULL){

if(!method %in% c("EM", "density"))
stop("Wrong method, select a supported option")
Expand Down Expand Up @@ -253,9 +256,17 @@ zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", boot
if(bootstrap != FALSE){
# use apropriate algorithm
if(method == "EM"){
fit_boot <- .zcurve_EM_boot(z = object$data, lb = object$data_censoring$lb, ub = object$data_censoring$ub, control = control, fit = fit, bootstrap = bootstrap)
if(parallel){
fit_boot <- .zcurve_EM_boot.par(z = object$data, lb = object$data_censoring$lb, ub = object$data_censoring$ub, control = control, fit = fit, bootstrap = bootstrap)
}else{
fit_boot <- .zcurve_EM_boot(z = object$data, lb = object$data_censoring$lb, ub = object$data_censoring$ub, control = control, fit = fit, bootstrap = bootstrap)
}
}else if(method == "density"){
fit_boot <- .zcurve_density_boot(z = object$data, control = control, bootstrap = bootstrap)
if(parallel){
fit_boot <- .zcurve_density_boot.par(z = object$data, control = control, bootstrap = bootstrap)
}else{
fit_boot <- .zcurve_density_boot(z = object$data, control = control, bootstrap = bootstrap)
}
}
object$boot <- fit_boot
}
Expand Down
56 changes: 56 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#' @title Options for the zcurve package
#'
#' @description A placeholder object and functions for the zcurve package.
#' (adapted from the runjags R package).
#'
#' @param name the name of the option to get the current value of - for a list of
#' available options, see details below.
#' @param ... named option(s) to change - for a list of available options, see
#' details below.
#'
#' @return The current value of all available zcurve options (after applying any
#' changes specified) is returned invisibly as a named list.
#'
#' @export zcurve.options
#' @export zcurve.get_option
#' @name zcurve_options
#' @aliases zcurve_options zcurve.options zcurve.get_option
NULL


#' @rdname zcurve_options
zcurve.options <- function(...){

opts <- list(...)

for(i in seq_along(opts)){

if(!names(opts)[i] %in% names(zcurve.private))
stop(paste("Unmatched or ambiguous option '", names(opts)[i], "'", sep=""))

assign(names(opts)[i], opts[[i]] , envir = zcurve.private)
}

return(invisible(zcurve.private$options))
}

#' @rdname zcurve_options
zcurve.get_option <- function(name){

if(length(name)!=1)
stop("Only 1 option can be retrieved at a time")

if(!name %in% names(zcurve.private))
stop(paste("Unmatched or ambiguous option '", name, "'", sep=""))

# Use eval as some defaults are put in using 'expression' to avoid evaluating at load time:
return(eval(zcurve.private[[name]]))
}


zcurve.private <- new.env()
# Use 'expression' for functions to avoid having to evaluate before the package is fully loaded:
assign("defaultoptions", list(envir = zcurve.private))

assign("options", zcurve.private$defaultoptions, envir = zcurve.private)
assign("max_cores", parallel::detectCores(logical = TRUE) - 1, envir = zcurve.private)
27 changes: 27 additions & 0 deletions R/zcurve_EM.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,33 @@
)

}
.zcurve_EM_boot.par <- function(z, lb, ub, control, fit, bootstrap){

cores <- zcurve.get_option("max_cores")
core_load <- split(1:bootstrap, rep(1:cores, length.out = bootstrap))
core_load <- sapply(core_load, length)
initial_seed <- sample(.Machine$integer.max, 1)

cl <- parallel::makePSOCKcluster(cores)
parallel::clusterEvalQ(cl, {library("zcurve")})
parallel::clusterExport(cl, c("z", "lb", "ub", "control", "fit", "bootstrap", "core_load", "initial_seed"), envir = environment())
fit_boot <- parallel::parLapplyLB(cl, 1:cores, function(i){
set.seed(initial_seed + i)
return(.zcurve_EM_boot(z, lb, ub, control, fit, core_load[i]))
})
parallel::stopCluster(cl)


return(
list(
"mu" = do.call(rbind, lapply(fit_boot, function(x) x$mu)),
"weights" = do.call(rbind, lapply(fit_boot, function(x) x$weights)),
"Q" = do.call(c, lapply(fit_boot, function(x) x$Q)),
"prop_high" = do.call(c, lapply(fit_boot, function(x) x$prop_high)),
"iter" = do.call(c, lapply(fit_boot, function(x) x$iter))
)
)
}

#' @name control_EM
#' @title Control settings for the zcurve EM algorithm
Expand Down
114 changes: 95 additions & 19 deletions R/zcurve_clustered.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#' Defaults to \code{"w"}.
#' @param bootstrap the number of bootstraps for estimating CI. To skip
#' bootstrap specify \code{FALSE}.
#' @param parallel whether the bootstrap should be performed in parallel.
#' Defaults to \code{FALSE}. The implementation is not completely stable
#' and might cause a connection error.
#' @param control additional options for the fitting algorithm more details in
#' \link[=control_EM]{control EM}.
#'
Expand All @@ -25,14 +28,14 @@
#'
#' @seealso [zcurve()], [summary.zcurve()], [plot.zcurve()], [control_EM], [control_density]
#' @export
zcurve_clustered <- function(data, method = "b", bootstrap = 1000, control = NULL){
zcurve_clustered <- function(data, method = "b", bootstrap = 1000, parallel = FALSE, control = NULL){

warning("Please note that the clustering adjustment is an experimental feature.", immediate. = TRUE)

if(!method %in% c("w", "b"))
stop("Wrong method, select a supported option.")
if(method == "b" && is.logical(bootstrap) && !bootstrap)
stop("The boostrap method requires bootstrap.")
stop("The nested boostrap method requires bootstrap.")

# set bootstrap
if(!is.numeric(bootstrap)){
Expand Down Expand Up @@ -115,7 +118,7 @@ zcurve_clustered <- function(data, method = "b", bootstrap = 1000, control = NUL

# use appropriate algorithm
if(method == "b"){
fit_b <- .zcurve_EM_b(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control, bootstrap = bootstrap)
fit_b <- .zcurve_EM_b(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control, bootstrap = bootstrap, parallel = parallel)
fit <- fit_b$fit
}else if(method == "w"){
fit <- .zcurve_EM_w(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control)
Expand All @@ -131,7 +134,11 @@ zcurve_clustered <- function(data, method = "b", bootstrap = 1000, control = NUL
if(method == "b"){
fit_boot <- fit_b$fit_boot
}else if(method == "w"){
fit_boot <- .zcurve_EM_w_boot(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control, fit = fit, bootstrap = bootstrap)
if(parallel){
fit_boot <- .zcurve_EM_w_boot.par(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control, fit = fit, bootstrap = bootstrap)
}else{
fit_boot <- .zcurve_EM_w_boot(z = z, z_id = z_id, lb = lb, ub = ub, b_id = b_id, control = control, fit = fit, bootstrap = bootstrap)
}
}
object$boot <- fit_boot
}
Expand All @@ -153,7 +160,7 @@ zcurve_clustered <- function(data, method = "b", bootstrap = 1000, control = NUL
return(object)
}

.zcurve_EM_b <- function(z, z_id, lb, ub, b_id, control, bootstrap){
.zcurve_EM_b <- function(z, z_id, lb, ub, b_id, control, bootstrap, parallel){

# get starting value z-curves
fit_start <- .zcurve_EMc_start_fast_RCpp(x = z,
Expand Down Expand Up @@ -191,23 +198,65 @@ zcurve_clustered <- function(data, method = "b", bootstrap = 1000, control = NUL
)


fit <- list()
for(i in 1:bootstrap){

if(parallel){

cores <- zcurve.get_option("max_cores")
core_load <- split(1:bootstrap, rep(1:cores, length.out = bootstrap))
core_load <- sapply(core_load, length)
initial_seed <- sample(.Machine$integer.max, 1)

boot_data <- .boot_id(data_index)
cl <- parallel::makePSOCKcluster(cores)
parallel::clusterEvalQ(cl, {library("zcurve")})
parallel::clusterExport(cl, c("fit_start", "control", "data_index", "core_load", "initial_seed"), envir = environment())
fit <- parallel::parLapplyLB(cl, 1:cores, function(i){

set.seed(initial_seed + i)

fit <- list()
for(i in 1:core_load[i]){

boot_data <- .boot_id(data_index)

fit[[i]] <- .zcurve_EMc_fit_fast_RCpp(x = boot_data$z[boot_data$type == 1],
lb = boot_data$lb[boot_data$type == 2],
ub = boot_data$ub[boot_data$type == 2],
mu = fit_start$mu[which.max(fit_start$Q),],
sigma = control$sigma,
theta = fit_start$weights[which.max(fit_start$Q),],
a = control$a,
b = control$b,
sig_level = control$sig_level,
max_iter = control$max_iter,
criterion = control$criterion)

}

return(fit)
})
parallel::stopCluster(cl)
fit <- do.call(c, fit)

fit[[i]] <- .zcurve_EMc_fit_fast_RCpp(x = boot_data$z[boot_data$type == 1],
lb = boot_data$lb[boot_data$type == 2],
ub = boot_data$ub[boot_data$type == 2],
mu = fit_start$mu[which.max(fit_start$Q),],
sigma = control$sigma,
theta = fit_start$weights[which.max(fit_start$Q),],
a = control$a,
b = control$b,
sig_level = control$sig_level,
max_iter = control$max_iter,
criterion = control$criterion)
}else{

fit <- list()
for(i in 1:bootstrap){

boot_data <- .boot_id(data_index)

fit[[i]] <- .zcurve_EMc_fit_fast_RCpp(x = boot_data$z[boot_data$type == 1],
lb = boot_data$lb[boot_data$type == 2],
ub = boot_data$ub[boot_data$type == 2],
mu = fit_start$mu[which.max(fit_start$Q),],
sigma = control$sigma,
theta = fit_start$weights[which.max(fit_start$Q),],
a = control$a,
b = control$b,
sig_level = control$sig_level,
max_iter = control$max_iter,
criterion = control$criterion)

}
}

fit_boot = list(
Expand Down Expand Up @@ -327,6 +376,33 @@ zcurve_clustered <- function(data, method = "b", bootstrap = 1000, control = NUL
)

}
.zcurve_EM_w_boot.par <- function(z, z_id, lb, ub, b_id, control, fit, bootstrap){

cores <- zcurve.get_option("max_cores")
core_load <- split(1:bootstrap, rep(1:cores, length.out = bootstrap))
core_load <- sapply(core_load, length)
initial_seed <- sample(.Machine$integer.max, 1)

cl <- parallel::makePSOCKcluster(cores)
parallel::clusterEvalQ(cl, {library("zcurve")})
parallel::clusterExport(cl, c("z", "z_id", "lb", "ub", "b_id", "control", "fit", "bootstrap", "core_load", "initial_seed"), envir = environment())
fit_boot <- parallel::parLapplyLB(cl, 1:cores, function(i){
set.seed(initial_seed + i)
return(.zcurve_EM_w_boot(z, z_id, lb, ub, b_id, control, fit, core_load[i]))
})
parallel::stopCluster(cl)


return(
list(
"mu" = do.call(rbind, lapply(fit_boot, function(x) x$mu)),
"weights" = do.call(rbind, lapply(fit_boot, function(x) x$weights)),
"Q" = do.call(c, lapply(fit_boot, function(x) x$Q)),
"prop_high" = do.call(c, lapply(fit_boot, function(x) x$prop_high)),
"iter" = do.call(c, lapply(fit_boot, function(x) x$iter))
)
)
}

.boot_id <- function(data){

Expand Down
30 changes: 30 additions & 0 deletions R/zcurve_density.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,37 @@

return(results)
}
.zcurve_density_boot.par <- function(z, control, bootstrap){

cores <- zcurve.get_option("max_cores")
core_load <- split(1:bootstrap, rep(1:cores, length.out = bootstrap))
core_load <- sapply(core_load, length)
initial_seed <- sample(.Machine$integer.max, 1)

cl <- parallel::makePSOCKcluster(cores)
parallel::clusterEvalQ(cl, {library("zcurve")})
parallel::clusterExport(cl, c("z", "control", "bootstrap", "core_load", "initial_seed"), envir = environment())
fit_boot <- parallel::parLapplyLB(cl, 1:cores, function(i){
set.seed(initial_seed + i)
return(.zcurve_density_boot(z, control, core_load[i]))
})
parallel::stopCluster(cl)


results <- list(
"mu" = do.call(rbind, lapply(fit_boot, function(x) x$mu)),
"weights" = do.call(rbind, lapply(fit_boot, function(x) x$weights)),
"prop_high" = do.call(c, lapply(fit_boot, function(x) x$prop_high)),
"objective" = do.call(c, lapply(fit_boot, function(x) x$objective)),
"iter" = do.call(c, lapply(fit_boot, function(x) x$iter))
)

if(control$version == 2){
results$FDR_max = do.call(c, lapply(fit_boot, function(x) x$FDR_max))
}

return(results)
}


# original z-curve1.0
Expand Down
5 changes: 5 additions & 0 deletions man/zcurve.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1708a14

Please sign in to comment.