diff --git a/DESCRIPTION b/DESCRIPTION index f04bc9d1..1b6cac16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,8 +22,7 @@ Imports: MASS, boot, stringr, - lmtest, - cli + lmtest Suggests: knitr, testthat (>= 2.0), diff --git a/R/maic_anchored.R b/R/maic_anchored.R index 6199a987..fefc0470 100644 --- a/R/maic_anchored.R +++ b/R/maic_anchored.R @@ -19,6 +19,8 @@ #' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or #' 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days #' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit} +#' @param boot_ci_type a string, one of `c("norm","basic", "stud", "perc", "bca")` to select the type of bootstrap +#' confidence interval. See [boot::boot.ci] for more details. #' #' @details For time-to-event analysis, it is required that input \code{ipd} and \code{pseudo_ipd} to have the following #' columns. This function is not sensitive to upper or lower case of letters in column names. @@ -47,7 +49,8 @@ maic_anchored <- function(weights_object, eff_measure = c("HR", "OR", "RR", "RD", "Diff"), # time to event specific args time_scale = "months", - km_conf_type = "log-log") { + km_conf_type = "log-log", + boot_ci_type = c("norm", "basic", "stud", "perc", "bca")) { # ==> Initial Setup ------------------------------------------ # create the hull for the output from this function res <- list( @@ -114,6 +117,7 @@ maic_anchored <- function(weights_object, } eff_measure <- match.arg(eff_measure, choices = c("HR"), several.ok = FALSE) } + boot_ci_type <- match.arg(boot_ci_type) # else { # for continuous effect measure # if (any(!c("USUBJID", "RESPONSE") %in% names(ipd))) { # stop("ipd should have 'USUBJID', 'RESPONSE' columns at minimum") @@ -151,7 +155,10 @@ maic_anchored <- function(weights_object, # ==> Inferential output ------------------------------------------ result <- if (endpoint_type == "tte") { - maic_anchored_tte(res, ipd, km_conf_type, pseudo_ipd, time_scale, weights_object, endpoint_name, trt_ipd, trt_agd) + maic_anchored_tte( + res, ipd, km_conf_type, pseudo_ipd, time_scale, weights_object, endpoint_name, trt_ipd, trt_agd, + boot_ci_type + ) } else { stop("Endpoint type ", endpoint_type, " currently unsupported.") } @@ -170,7 +177,8 @@ maic_anchored_tte <- function(res, weights_object, endpoint_name, trt_ipd, - trt_agd) { + trt_agd, + boot_ci_type) { # Analysis table (Cox model) before and after matching, incl Median Survival Time # derive km w and w/o weights kmobj_ipd <- survfit(Surv(TIME, EVENT) ~ ARM, ipd, conf.type = km_conf_type) @@ -208,20 +216,58 @@ maic_anchored_tte <- function(res, # get bootstrapped estimates if applicable res$inferential[["boot_est"]] <- NULL if (!is.null(weights_object$boot)) { - tmp_boot_obj <- weights_object$boot - k <- dim(tmp_boot_obj)[3] - tmp_boot_est <- sapply(seq_len(k), function(ii) { - boot_x <- tmp_boot_obj[, , ii] - boot_ipd_id <- weights_object$data$USUBJID[boot_x[, 1]] - boot_ipd <- ipd[match(boot_ipd_id, ipd$USUBJID), , drop = FALSE] - boot_ipd$weights <- boot_x[, 2] + keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) + boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] + + boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) + if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients") + boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] + + stat_fun <- function(data, index, w_obj) { + r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration + if (!is.na(r)) { + if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match") + boot_ipd <- data[w_obj$boot[, 1, r], ] + boot_ipd$weights <- w_obj$boot[, 2, r] + } + boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_ipd, weights = boot_ipd$weights, robust = TRUE) + boot_res_AC <- list(est = coef(boot_coxobj_dat_adj)[1], se = sqrt(vcov(boot_coxobj_dat_adj)[1, 1])) + boot_res_AB <- bucher(boot_res_AC, res_BC, conf_lv = 0.95) + c( + est_AB = boot_res_AB$est, + var_AB = boot_res_AB$se^2, + se_AB = boot_res_AB$se, + est_AC = boot_res_AC$est, + se_AC = boot_res_AC$se, + var_AC = boot_res_AC$se^2 + ) + } - # does not matter use robust se or not, point estimate will not change and calculation would be faster - boot_coxobj_ipd_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_ipd, weights = weights) - boot_AB_est <- summary(boot_coxobj_ipd_adj)$coef[1] - summary(coxobj_agd)$coef[1] - exp(boot_AB_est) - }) - res$inferential[["boot_est"]] <- tmp_boot_est + # Revert seed to how it was for weight bootstrap sampling + old_seed <- globalenv()$.Random.seed + on.exit(suspendInterrupts(set_random_seed(old_seed))) + set_random_seed(weights_object$boot_seed) + + R <- dim(weights_object$boot)[3] + boot_res <- boot(boot_ipd, stat_fun, R = R, w_obj = weights_object, strata = weights_object$boot_strata) + boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object) + + l_u_index <- switch(boot_ci_type, + "norm" = list(2, 3, "normal"), + "basic" = list(4, 5, "basic"), + "stud" = list(4, 5, "student"), + "perc" = list(4, 5, "percent"), + "bca" = list(4, 5, "bca") + ) + + res$inferential[["boot_est"]] <- boot_res + boot_res_AB <- list( + est = exp(boot_res$t0[1]), + se = NA, + ci_l = exp(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), + ci_u = exp(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), + pval = NA + ) } # make analysis report table @@ -239,10 +285,11 @@ maic_anchored_tte <- function(res, if (is.null(res$inferential[["boot_est"]])) { res$inferential[["report_overall_bootCI"]] <- NULL } else { - boot_res_AB <- res_AB - boot_logres_se <- sd(log(res$inferential[["boot_est"]]), na.rm = TRUE) - boot_res_AB$ci_l <- exp(log(boot_res_AB$est) + qnorm(0.025) * boot_logres_se) - boot_res_AB$ci_u <- exp(log(boot_res_AB$est) + qnorm(0.975) * boot_logres_se) + temp_boot_res <- boot_res_AB + temp_boot_res$ci_l <- boot_res_AB$ci_l + temp_boot_res$ci_u <- boot_res_AB$ci_u + class(temp_boot_res) <- class(res_AB) + res$inferential[["report_overall_bootCI"]] <- rbind( report_table_tte(coxobj_ipd, medSurv_ipd, tag = paste0("IPD/", endpoint_name)), report_table_tte(coxobj_ipd_adj, medSurv_ipd_adj, tag = paste0("weighted IPD/", endpoint_name)), @@ -250,7 +297,7 @@ maic_anchored_tte <- function(res, c( paste0("** adj.", trt_ipd, " vs ", trt_agd), rep("-", 4), - print(boot_res_AB, pval_digits = 3) + print(temp_boot_res, pval_digits = 3) ) ) } diff --git a/R/maic_unanchored.R b/R/maic_unanchored.R index 7e653fcb..28df962e 100644 --- a/R/maic_unanchored.R +++ b/R/maic_unanchored.R @@ -14,16 +14,14 @@ #' @param endpoint_type a string, one out of the following "binary", "tte" (time to event) #' @param eff_measure a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; #' "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used. -#' @param boot_ci_is_quantile a logical, specify if the 95% bootstrapped confidence interval should be derived by sample -#' quantile. Default FALSE, which the estimates assumes to follow asymptotic normal (only if `eff_measure` is "RD") or -#' log-normal with a variance that can be approximated by bootstrapped sample of the estimate. This default option may -#' be handy when the number of bootstrap iterations is not big. +#' @param boot_ci_type a string, one of `c("norm","basic", "stud", "perc", "bca")` to select the type of bootstrap +#' confidence interval. See [boot::boot.ci] for more details. #' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title #' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or #' 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days #' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit} -#' @param binary_robust_cov_type a string to pass to argument `type` of [sandwich::vcovHC], see possible options -#' in the documentation of that function. Default is `"HC3"` +#' @param binary_robust_cov_type a string to pass to argument `type` of [sandwich::vcovHC], see possible options in the +#' documentation of that function. Default is `"HC3"` #' #' @details For time-to-event analysis, it is required that input \code{ipd} and \code{pseudo_ipd} to have the following #' columns. This function is not sensitive to upper or lower case of letters in column names. @@ -49,7 +47,7 @@ maic_unanchored <- function(weights_object, endpoint_type = "tte", endpoint_name = "Time to Event Endpoint", eff_measure = c("HR", "OR", "RR", "RD"), - boot_ci_is_quantile = FALSE, + boot_ci_type = c("norm", "basic", "stud", "perc", "bca"), # time to event specific args time_scale = "months", km_conf_type = "log-log", @@ -126,6 +124,7 @@ maic_unanchored <- function(weights_object, } eff_measure <- match.arg(eff_measure, choices = c("HR"), several.ok = FALSE) } + boot_ci_type <- match.arg(boot_ci_type) # ==> IPD and AgD data preparation ------------------------------------------ # : subset ipd, retain only ipd from interested trts @@ -172,12 +171,12 @@ maic_unanchored <- function(weights_object, result <- if (endpoint_type == "tte") { maic_unanchored_tte( res, res_AB, dat, ipd, pseudo_ipd, km_conf_type, time_scale, - weights_object, endpoint_name, boot_ci_is_quantile, trt_ipd, trt_agd + weights_object, endpoint_name, boot_ci_type, trt_ipd, trt_agd ) } else if (endpoint_type == "binary") { maic_unanchored_binary( res, res_AB, dat, ipd, pseudo_ipd, binary_robust_cov_type, - weights_object, endpoint_name, eff_measure, boot_ci_is_quantile, trt_ipd, trt_agd + weights_object, endpoint_name, eff_measure, boot_ci_type, trt_ipd, trt_agd ) } else { stop("Endpoint type ", endpoint_type, " currently unsupported.") @@ -198,7 +197,7 @@ maic_unanchored_tte <- function(res, time_scale, weights_object, endpoint_name, - boot_ci_is_quantile, + boot_ci_type, trt_ipd, trt_agd) { # ~~~ Descriptive table before and after matching @@ -233,30 +232,52 @@ maic_unanchored_tte <- function(res, # : get bootstrapped estimates if applicable if (!is.null(weights_object$boot)) { - tmp_boot_obj <- weights_object$boot - k <- dim(tmp_boot_obj)[3] - - cli::cli_progress_bar("Going through bootstrapped weights", total = k, .envir = .GlobalEnv) - - tmp_boot_est <- sapply(1:k, function(ii) { - cli::cli_progress_update(.envir = .GlobalEnv) - - boot_x <- tmp_boot_obj[, , ii] - boot_ipd_id <- weights_object$data$USUBJID[boot_x[, 1]] - boot_ipd <- ipd[match(boot_ipd_id, ipd$USUBJID), , drop = FALSE] - boot_ipd$weights <- boot_x[, 2] - + keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) + boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] + + boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) + if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients") + boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] + + stat_fun <- function(data, index, w_obj, pseudo_ipd) { + boot_ipd <- data[index, ] + r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration + if (!is.na(r)) { + if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match") + boot_ipd$weights <- w_obj$boot[, 2, r] + } boot_dat <- rbind(boot_ipd, pseudo_ipd) boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) - boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_dat, weights = weights) - boot_AB_est <- summary(boot_coxobj_dat_adj)$coef[1] - exp(boot_AB_est) - }) + c(est = coef(boot_coxobj_dat_adj)[1], var = vcov(boot_coxobj_dat_adj)[1, 1]) + } - cli::cli_progress_done(.envir = .GlobalEnv) + # Revert seed to how it was for weight bootstrap sampling + old_seed <- globalenv()$.Random.seed + on.exit(suspendInterrupts(set_random_seed(old_seed))) + set_random_seed(weights_object$boot_seed) + R <- dim(weights_object$boot)[3] + + boot_res <- boot(boot_ipd, stat_fun, R = R, w_obj = weights_object, pseudo_ipd = pseudo_ipd) + boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, pseudo_ipd = pseudo_ipd) + + l_u_index <- switch(boot_ci_type, + "norm" = list(2, 3, "normal"), + "basic" = list(4, 5, "basic"), + "stud" = list(4, 5, "student"), + "perc" = list(4, 5, "percent"), + "bca" = list(4, 5, "bca") + ) - res$inferential[["boot_est"]] <- tmp_boot_est + res$inferential[["boot_est"]] <- boot_res + transform_estimate <- exp + boot_res_AB <- list( + est = transform_estimate(boot_res$t0[1]), + se = NA, + ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), + ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), + pval = NA + ) } else { res$inferential[["boot_est"]] <- NULL } @@ -270,27 +291,12 @@ maic_unanchored_tte <- function(res, if (is.null(res$inferential[["boot_est"]])) { res$inferential[["report_overall_bootCI"]] <- NULL } else { - boot_res_AB <- res_AB - boot_logres_se <- sd(log(res$inferential[["boot_est"]]), na.rm = TRUE) - if (boot_ci_is_quantile) { - boot_res_AB$ci_l <- quantile(res$inferential[["boot_est"]], p = 0.025) - boot_res_AB$ci_u <- quantile(res$inferential[["boot_est"]], p = 0.975) - } else { - boot_res_AB$ci_l <- exp(log(boot_res_AB$est) + qnorm(0.025) * boot_logres_se) - boot_res_AB$ci_u <- exp(log(boot_res_AB$est) + qnorm(0.975) * boot_logres_se) - } - tmp_report_table_tte <- report_table_tte(coxobj_dat_adj, - medSurv_dat_adj, - tag = paste0("After matching/", endpoint_name) - ) - tmp_report_table_tte$`HR[95% CI]`[1] <- paste0( - format(round(boot_res_AB$est, 2), nsmall = 2), "[", - format(round(boot_res_AB$ci_l, 2), nsmall = 2), ";", - format(round(boot_res_AB$ci_u, 2), nsmall = 2), "]" - ) + report_boot <- report_table_tte(coxobj_dat_adj, medSurv_dat_adj, tag = paste0("After matching/", endpoint_name)) + report_boot$`p-Value` <- NA + report_boot$`HR[95% CI]`[1] <- do.call(sprintf, args = c(list(fmt = "%.2f[%.2f;%.2f]"), boot_res_AB[1:3])) res$inferential[["report_overall_bootCI"]] <- rbind( report_table_tte(coxobj_dat, medSurv_dat, tag = paste0("Before matching/", endpoint_name)), - tmp_report_table_tte + report_boot ) } @@ -309,7 +315,7 @@ maic_unanchored_binary <- function(res, weights_object, endpoint_name, eff_measure, - boot_ci_is_quantile, + boot_ci_type, trt_ipd, trt_agd) { # ~~~ Analysis table @@ -319,6 +325,11 @@ maic_unanchored_binary <- function(res, "RR" = poisson(link = "log"), "OR" = binomial(link = "logit") ) + transform_estimate <- switch(eff_measure, + "RD" = function(x) x * 100, + "RR" = exp, + "OR" = exp + ) # : fit glm for binary outcome and robust estimate with weights binobj_dat <- glm(RESPONSE ~ ARM, dat, family = glm_link) @@ -330,57 +341,56 @@ maic_unanchored_binary <- function(res, res$inferential[["model_before"]] <- binobj_dat res$inferential[["model_after"]] <- binobj_dat_adj - mu <- bin_robust_coef[2, "Estimate"] - sig <- bin_robust_coef[2, "Std. Error"] - res_AB$ci_l <- bin_robust_ci[2, "2.5 %"] - res_AB$ci_u <- bin_robust_ci[2, "97.5 %"] + res_AB$est <- transform_estimate(bin_robust_coef[2, "Estimate"]) + res_AB$ci_l <- transform_estimate(bin_robust_ci[2, "2.5 %"]) + res_AB$ci_u <- transform_estimate(bin_robust_ci[2, "97.5 %"]) res_AB$pval <- bin_robust_coef[2, "Pr(>|z|)"] - if (eff_measure %in% c("RR", "OR")) { - res_AB$est <- exp(mu) - res_AB$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parameterization - res_AB$ci_l <- exp(res_AB$ci_l) - res_AB$ci_u <- exp(res_AB$ci_u) - } else if (eff_measure == "RD") { - res_AB$est <- mu * 100 - res_AB$se <- sig * 100 - res_AB$ci_l <- res_AB$ci_l * 100 - res_AB$ci_u <- res_AB$ci_u * 100 - } - # : get bootstrapped estimates if applicable if (!is.null(weights_object$boot)) { - tmp_boot_obj <- weights_object$boot - k <- dim(tmp_boot_obj)[3] - - cli::cli_progress_bar("Going through bootstrapped weights", total = k, .envir = .GlobalEnv) - - tmp_boot_est <- sapply(1:k, function(ii) { - cli::cli_progress_update(.envir = .GlobalEnv) - - boot_x <- tmp_boot_obj[, , ii] - boot_ipd_id <- weights_object$data$USUBJID[boot_x[, 1]] - boot_ipd <- ipd[match(boot_ipd_id, ipd$USUBJID), , drop = FALSE] - boot_ipd$weights <- boot_x[, 2] - + keep_rows <- setdiff(seq_len(nrow(weights_object$data)), weights_object$rows_with_missing) + boot_ipd_id <- weights_object$data[keep_rows, "USUBJID", drop = FALSE] + + boot_ipd <- merge(boot_ipd_id, ipd, by = "USUBJID", all.x = TRUE) + if (nrow(boot_ipd) != nrow(boot_ipd_id)) stop("ipd has multiple observations for some patients") + boot_ipd <- boot_ipd[match(boot_ipd$USUBJID, boot_ipd_id$USUBJID), ] + + stat_fun <- function(data, index, w_obj, pseudo_ipd) { + boot_ipd <- data[index, ] + r <- dynGet("r", ifnotfound = NA) # Get bootstrap iteration + if (!is.na(r)) { + if (!all(index == w_obj$boot[, 1, r])) stop("Bootstrap and weight indices don't match") + boot_ipd$weights <- w_obj$boot[, 2, r] + } boot_dat <- rbind(boot_ipd, pseudo_ipd) boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) - - # does not matter use robust se or not, point estimate will not change and calculation would be faster boot_binobj_dat_adj <- glm(RESPONSE ~ ARM, boot_dat, weights = weights, family = glm_link) - boot_bin_robust_cov <- sandwich::vcovHC(binobj_dat_adj, type = binary_robust_cov_type) - boot_bin_robust_coef <- lmtest::coeftest(boot_binobj_dat_adj, vcov. = boot_bin_robust_cov) - boot_AB_est <- boot_bin_robust_coef[2, "Estimate"] - if (eff_measure %in% c("RR", "OR")) { - boot_AB_est <- exp(boot_AB_est) - } else if (eff_measure == "RD") { - boot_AB_est <- boot_AB_est * 100 - } - }) + c(est = coef(boot_binobj_dat_adj)[2], var = vcov(boot_binobj_dat_adj)[2, 2]) + } - cli::cli_progress_done(.envir = .GlobalEnv) + # Revert seed to how it was for weight bootstrap sampling + old_seed <- globalenv()$.Random.seed + on.exit(suspendInterrupts(set_random_seed(old_seed))) + set_random_seed(weights_object$boot_seed) + R <- dim(weights_object$boot)[3] + boot_res <- boot(boot_ipd, stat_fun, R = R, w_obj = weights_object, pseudo_ipd = pseudo_ipd) + boot_ci <- boot.ci(boot_res, type = boot_ci_type, w_obj = weights_object, pseudo_ipd = pseudo_ipd) + + l_u_index <- switch(boot_ci_type, + "norm" = list(2, 3, "normal"), + "basic" = list(4, 5, "basic"), + "stud" = list(4, 5, "student"), + "perc" = list(4, 5, "percent"), + "bca" = list(4, 5, "bca") + ) - res$inferential[["boot_est"]] <- tmp_boot_est + res$inferential[["boot_est"]] <- boot_res + boot_res_AB <- list( + est = transform_estimate(boot_res$t0[1]), + ci_l = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[1]]]), + ci_u = transform_estimate(boot_ci[[l_u_index[[3]]]][l_u_index[[2]]]), + pval = NA + ) } else { res$inferential[["boot_est"]] <- NULL } @@ -399,42 +409,22 @@ maic_unanchored_binary <- function(res, eff_measure = eff_measure ) ) - if (is.null(res$inferential[["boot_est"]])) { res$inferential[["report_overall_bootCI"]] <- NULL } else { - boot_res_AB <- res_AB - boot_logres_se <- sd(log(res$inferential[["boot_est"]]), na.rm = TRUE) - if (boot_ci_is_quantile) { - boot_res_AB$ci_l <- quantile(res$inferential[["boot_est"]], p = 0.025) - boot_res_AB$ci_u <- quantile(res$inferential[["boot_est"]], p = 0.975) - } else { - boot_res_AB$ci_l <- exp(log(boot_res_AB$est) + qnorm(0.025) * boot_logres_se) - boot_res_AB$ci_u <- exp(log(boot_res_AB$est) + qnorm(0.975) * boot_logres_se) - } - tmp_report_table_binary <- report_table_binary( - binobj_dat_adj, - res_AB, - tag = paste0("After matching/", endpoint_name), - eff_measure = eff_measure - ) - tmp_report_table_binary[[paste0(eff_measure, "[95% CI]")]][1] <- paste0( - format(round(boot_res_AB$est, 2), nsmall = 2), - "[", - format(round(boot_res_AB$ci_l, 2), nsmall = 2), - ";", - format(round(boot_res_AB$ci_u, 2), nsmall = 2), - "]" - ) res$inferential[["report_overall_bootCI"]] <- rbind( report_table_binary( binobj_dat, tag = paste0("Before matching/", endpoint_name), eff_measure = eff_measure ), - tmp_report_table_binary + report_table_binary( + binobj_dat_adj, + boot_res_AB, + tag = paste0("After matching/", endpoint_name), + eff_measure = eff_measure + ) ) } - # output res } diff --git a/R/matching.R b/R/matching.R index 648b9320..fcf24f9d 100644 --- a/R/matching.R +++ b/R/matching.R @@ -95,6 +95,7 @@ estimate_weights <- function(data, EM <- data[, centered_colnames, drop = FALSE] ind <- apply(EM, 1, function(xx) any(is.na(xx))) nr_missing <- sum(ind) + rows_with_missing <- which(ind) EM <- as.matrix(EM[!ind, , drop = FALSE]) # estimate weights @@ -105,30 +106,27 @@ estimate_weights <- function(data, # bootstrapping outboot <- if (is.null(n_boot_iteration)) { + boot_seed <- NULL + boot_strata <- NULL NULL } else { # Make sure to leave '.Random.seed' as-is on exit - genv <- globalenv() - old_seed <- genv$.Random.seed - on.exit(suspendInterrupts({ - if (is.null(old_seed)) { - rm(".Random.seed", envir = genv, inherits = FALSE) - } else { - assign(".Random.seed", value = old_seed, envir = genv, inherits = FALSE) - } - })) + old_seed <- globalenv()$.Random.seed + on.exit(suspendInterrupts(set_random_seed(old_seed))) set.seed(set_seed_boot) + rowid_in_data <- which(!ind) - vapply( - seq_len(n_boot_iteration), - FUN.VALUE = matrix(0, nrow = nrow(EM), ncol = 2), - FUN = function(i) { - boot_rows <- sample(x = nrow(EM), size = nrow(EM), replace = TRUE) - boot_EM <- EM[boot_rows, , drop = FALSE] - boot_opt <- optimise_weights(matrix = boot_EM, par = alpha, method = method, ...) - cbind("rowid" = rowid_in_data[boot_rows], "weight" = boot_opt$wt[, 1]) - } - ) + arms <- factor(data$ARM[rowid_in_data]) + boot_statistic <- function(d, w) optimise_weights(d[w, ], par = alpha, method = method, ...)$wt[, 1] + boot_out <- boot(EM, statistic = boot_statistic, R = n_boot_iteration, strata = arms) + + boot_array <- array(dim = list(nrow(EM), 2, n_boot_iteration)) + dimnames(boot_array) <- list(sampled_patient = NULL, c("rowid", "weight"), bootstrap_iteration = NULL) + boot_array[, 1, ] <- t(boot.array(boot_out, TRUE)) + boot_array[, 2, ] <- t(boot_out$t) + boot_seed <- boot_out$seed + boot_strata <- boot_out$strata + boot_array } # append weights to data @@ -147,7 +145,10 @@ estimate_weights <- function(data, nr_missing = nr_missing, ess = sum(wt)^2 / sum(wt^2), opt = opt1$opt, - boot = outboot + boot = outboot, + boot_seed = boot_seed, + boot_strata = boot_strata, + rows_with_missing = rows_with_missing ) class(outdata) <- c("maicplus_estimate_weights", "list") diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 00000000..3c05aea7 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,21 @@ +# Restore the RNG back to a previous state using the global .Random.seed +set_random_seed <- function(old_seed) { + if (is.null(old_seed)) { + rm(".Random.seed", envir = globalenv(), inherits = FALSE) + } else { + assign(".Random.seed", value = old_seed, envir = globalenv(), inherits = FALSE) + } +} + +construct_boot_data <- function(weighted_data, i = 1) { + if (is.null(weighted_data$boot)) stop("Must contain bootstrap results from estimate_weights()") + i <- as.integer(i) + R <- dim(weighted_data$boot)[3] + if (i < 1 || i > R) stop("i must be integer between 1 and ", R) + + boot_data <- weighted_data$boot[, , i] + weighted_data$data <- weighted_data$data[boot_data[, 1], ] + weighted_data$data$weights <- boot_data[, 2] + weighted_data$data$scaled_weights <- boot_data[, 2] / sum(boot_data[, 2]) + weighted_data +} diff --git a/inst/examples/maic_anchored_ex.R b/inst/examples/maic_anchored_ex.R index 489298ea..2cabe7e1 100644 --- a/inst/examples/maic_anchored_ex.R +++ b/inst/examples/maic_anchored_ex.R @@ -10,6 +10,7 @@ adsl <- read.csv(system.file("extdata", "adsl.csv", adsl$USUBJID <- paste0("xx", adsl$USUBJID) adsl2 <- adsl adsl2$USUBJID <- sample(size = nrow(adsl2), paste0("yy", adsl2$USUBJID), replace = FALSE) +adsl2$ARM <- "C" adsl2 <- adsl2[order(adsl2$USUBJID), ] adsl <- rbind(adsl, adsl2) @@ -29,6 +30,7 @@ tmp <- simulate(fit_C, nsim = 1, seed = 1234, newdata = adtte2, censtime = max(a adtte2$TIME <- tmp$time_1 adtte2$EVENT <- tmp$event_1 adtte2$USUBJID <- paste0("yy", adtte2$USUBJID) +adtte2 <- adtte2[order(adtte2$USUBJID), ] adtte <- rbind(adtte, adtte2) ### AgD diff --git a/inst/examples/maic_unanchored_binary_ex.R b/inst/examples/maic_unanchored_binary_ex.R index e44b25f6..f6ff1646 100644 --- a/inst/examples/maic_unanchored_binary_ex.R +++ b/inst/examples/maic_unanchored_binary_ex.R @@ -8,6 +8,7 @@ weighted_data2 <- estimate_weights(data = centered_ipd_sat, centered_colnames = # binary IPD adrs_sat + # get dummy binary IPD pseudo_adrs <- get_pseudo_ipd_binary( binary_agd = data.frame( diff --git a/man/maic_anchored.Rd b/man/maic_anchored.Rd index abe14f69..d616248f 100644 --- a/man/maic_anchored.Rd +++ b/man/maic_anchored.Rd @@ -17,7 +17,8 @@ maic_anchored( endpoint_name = "Time to Event Endpoint", eff_measure = c("HR", "OR", "RR", "RD", "Diff"), time_scale = "months", - km_conf_type = "log-log" + km_conf_type = "log-log", + boot_ci_type = c("norm", "basic", "stud", "perc", "bca") ) } \arguments{ @@ -49,6 +50,9 @@ from contingency table (for binary endpoint)} 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days} \item{km_conf_type}{a string, pass to \code{conf.type} of \code{survfit}} + +\item{boot_ci_type}{a string, one of \code{c("norm","basic", "stud", "perc", "bca")} to select the type of bootstrap +confidence interval. See \link[boot:boot.ci]{boot::boot.ci} for more details.} } \value{ A list, contains 'descriptive' and 'inferential' diff --git a/man/maic_unanchored.Rd b/man/maic_unanchored.Rd index fbce19ae..0cc6fdb8 100644 --- a/man/maic_unanchored.Rd +++ b/man/maic_unanchored.Rd @@ -15,7 +15,7 @@ maic_unanchored( endpoint_type = "tte", endpoint_name = "Time to Event Endpoint", eff_measure = c("HR", "OR", "RR", "RD"), - boot_ci_is_quantile = FALSE, + boot_ci_type = c("norm", "basic", "stud", "perc", "bca"), time_scale = "months", km_conf_type = "log-log", binary_robust_cov_type = "HC3" @@ -44,18 +44,16 @@ from contingency table (for binary endpoint)} \item{eff_measure}{a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used.} -\item{boot_ci_is_quantile}{a logical, specify if the 95\% bootstrapped confidence interval should be derived by sample -quantile. Default FALSE, which the estimates assumes to follow asymptotic normal (only if \code{eff_measure} is "RD") or -log-normal with a variance that can be approximated by bootstrapped sample of the estimate. This default option may -be handy when the number of bootstrap iterations is not big.} +\item{boot_ci_type}{a string, one of \code{c("norm","basic", "stud", "perc", "bca")} to select the type of bootstrap +confidence interval. See \link[boot:boot.ci]{boot::boot.ci} for more details.} \item{time_scale}{a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days} \item{km_conf_type}{a string, pass to \code{conf.type} of \code{survfit}} -\item{binary_robust_cov_type}{a string to pass to argument \code{type} of \link[sandwich:vcovHC]{sandwich::vcovHC}, see possible options -in the documentation of that function. Default is \code{"HC3"}} +\item{binary_robust_cov_type}{a string to pass to argument \code{type} of \link[sandwich:vcovHC]{sandwich::vcovHC}, see possible options in the +documentation of that function. Default is \code{"HC3"}} } \value{ A list, contains 'descriptive' and 'inferential' diff --git a/tests/testthat/data/test_binary_unanchored_expected.RData b/tests/testthat/data/test_binary_unanchored_expected.RData index cabeb380..5412e747 100644 Binary files a/tests/testthat/data/test_binary_unanchored_expected.RData and b/tests/testthat/data/test_binary_unanchored_expected.RData differ diff --git a/tests/testthat/data/test_tte_unanchored_expected.RData b/tests/testthat/data/test_tte_unanchored_expected.RData index aebf1a65..662e39e4 100644 Binary files a/tests/testthat/data/test_tte_unanchored_expected.RData and b/tests/testthat/data/test_tte_unanchored_expected.RData differ diff --git a/tests/testthat/test-maic_anchored.R b/tests/testthat/test-maic_anchored.R index 1cfbfb1c..7aa2848e 100644 --- a/tests/testthat/test-maic_anchored.R +++ b/tests/testthat/test-maic_anchored.R @@ -1,4 +1,4 @@ -test_that("maic_unanchored works for TTE using robust SE", { +test_that("maic_anchored works for TTE using robust SE", { library(flexsurv) ### IPD set.seed(1234) @@ -97,22 +97,16 @@ test_that("maic_unanchored works for TTE using robust SE", { ) expect_equal( result$inferential$report_overall_robustCI$`median[95% CI]`, - c( - "7.6[6.3;10.3]", "1.8[1.6; 2.0]", "12.2[10.2; NA]", " 1.8[ 1.5;2.4]", - "2.7[2.3;3.3]", "1.9[1.7;2.0]", "-" - ) + c("7.6[6.3;10.3]", "1.8[1.6; 2.0]", "12.2[10.2; NA]", " 1.8[ 1.5;2.4]", "2.7[2.3;3.3]", "1.9[1.7;2.0]", "-") ) expect_equal( result$inferential$report_overall_robustCI$`HR[95% CI]`, - c( - "0.22[0.19;0.26]", "", "0.16[0.11;0.24]", "", "0.57[0.48;0.68]", - "", "0.29 [0.19; 0.44]" - ) + c("0.22[0.19;0.26]", "", "0.16[0.11;0.24]", "", "0.57[0.48;0.68]", "", "0.29 [0.19; 0.44]") ) }) -test_that("maic_unanchored works for TTE using bootstrap SE", { +test_that("maic_anchored works for TTE using bootstrap SE", { # anchored example using maic_anchored for tte library(flexsurv) ### IPD @@ -197,22 +191,24 @@ test_that("maic_unanchored works for TTE using bootstrap SE", { expect_equal( result$inferential$report_overall_bootCI$`median[95% CI]`, - c( - "7.6[6.3;10.3]", "1.8[1.6; 2.0]", "12.2[10.2; NA]", " 1.8[ 1.5;2.4]", - "2.7[2.3;3.3]", "1.9[1.7;2.0]", "-" - ) + c("7.6[6.3;10.3]", "1.8[1.6; 2.0]", "12.2[10.2; NA]", " 1.8[ 1.5;2.4]", "2.7[2.3;3.3]", "1.9[1.7;2.0]", "-") ) expect_equal( result$inferential$report_overall_bootCI$`HR[95% CI]`, - c( - "0.22[0.19;0.26]", "", "0.16[0.11;0.24]", "", "0.57[0.48;0.68]", - "", "0.29 [0.22; 0.38]" - ) + c("0.22[0.19;0.26]", "", "0.16[0.11;0.24]", "", "0.57[0.48;0.68]", "", "0.29 [0.26; 0.44]") ) - expect_length(result$inferential$boot_est, 5) - expect_equal( - quantile(result$inferential$boot_est, p = c(0.025, 0.5, 0.975)), - c(`2.5%` = 0.224505423383486, `50%` = 0.255147246438007, `97.5%` = 0.313359331451625) + t_matrix_expected <- matrix( + c( + c(-1.24294307251997, -1.33792890159168, -1.57070441400606, -1.52456404275395, -1.40942511608976), + c(0.0471386881163452, 0.0413517501082777, 0.0410548744406873, 0.0390747454633873, 0.04529038893612), + c(0.217114458561251, 0.203351297286931, 0.202620024777136, 0.197673330177309, 0.212815386981581), + c(-1.80190829720242, -1.89689412627413, -2.12966963868851, -2.0835292674364, -1.96839034077221), + c(0.197156067436005, 0.181888913677451, 0.181070984012272, 0.175518011252044, 0.192411579034645), + c(0.0388705149268306, 0.0330835769187631, 0.0327867012511726, 0.0308065722738727, 0.0370222157466054) + ), + byrow = FALSE, + ncol = 6 ) + expect_equal(result$inferential$boot_est$t, t_matrix_expected) }) diff --git a/tests/testthat/test-maic_unanchored.R b/tests/testthat/test-maic_unanchored.R index c628e492..470b3b0f 100644 --- a/tests/testthat/test-maic_unanchored.R +++ b/tests/testthat/test-maic_unanchored.R @@ -9,7 +9,10 @@ test_that("test binary case", { centered_colnames <- paste0(centered_colnames, "_CENTERED") weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames) - weighted_data2 <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 20) + weighted_data2 <- estimate_weights( + data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 20, + set_seed_boot = 1234 + ) # get dummy binary IPD adrs <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE)) @@ -66,10 +69,18 @@ test_that("test binary case", { } load(test_path("data", "test_binary_unanchored_expected.RData")) + + # Compare robust outputs expect_equal(testout$descriptive, expectout$descriptive) expect_equal(testout$inferential, expectout$inferential) + expect_equal(testout$inferential$model_before, expectout$inferential$model_before) + expect_equal(testout$inferential$model_after, expectout$inferential$model_after) + + # Compare bootstrap outputs expect_equal(testout2$descriptive, expectout2$descriptive) - expect_equal(testout2$inferential, expectout2$inferential) + expect_equal(testout2$inferential$boot_est["t"], expectout2$inferential$boot_est["t"]) + expect_equal(testout2$inferential$boot_est["seed"], expectout2$inferential$boot_est["seed"]) + expect_equal(testout2$inferential$report_overall_bootCI, expectout2$inferential$report_overall_bootCI) }) @@ -164,8 +175,15 @@ test_that("test time to event case", { } load(test_path("data", "test_tte_unanchored_expected.RData")) + # Compare robust outputs expect_equal(testout$descriptive, expectout$descriptive) expect_equal(testout$inferential, expectout$inferential) + expect_equal(testout$inferential$model_before, expectout$inferential$model_before) + expect_equal(testout$inferential$model_after, expectout$inferential$model_after) + + # Compare bootstrap outputs expect_equal(testout2$descriptive, expectout2$descriptive) - expect_equal(testout2$inferential, expectout2$inferential) + expect_equal(testout2$inferential$boot_est["t"], expectout2$inferential$boot_est["t"]) + expect_equal(testout2$inferential$boot_est["seed"], expectout2$inferential$boot_est["seed"]) + expect_equal(testout2$inferential$report_overall_bootCI, expectout2$inferential$report_overall_bootCI) }) diff --git a/tests/testthat/test-matching.R b/tests/testthat/test-matching.R index 75cb8086..b87c387d 100644 --- a/tests/testthat/test-matching.R +++ b/tests/testthat/test-matching.R @@ -86,11 +86,11 @@ test_that("estimate_weights works as expected with bootstrapping", { expected_matrix <- structure( c( - c(411, 324, 61, 71, 361), - c(0.0001657791105167, 0.382238891368213, 1.41902681600543e-10, 5.04595364860806e-08, 0.434732317134287) + c(411, 71, 321, 150, 440), + c(8.71745219703408e-05, 1.93529165335151e-08, 0.886392116942025, 1.42604077815808e-13, 0.802986096194834) ), dim = c(5L, 2L), - dimnames = list(c("411", "324", "61", "71", "361"), c("rowid", "weight")) + dimnames = list("sampled_patient" = NULL, c("rowid", "weight")) ) expect_equal(result$boot[1:5, , 1], expected_matrix) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R new file mode 100644 index 00000000..73cb1cb2 --- /dev/null +++ b/tests/testthat/test-utils.R @@ -0,0 +1,12 @@ +test_that("set_random_seed works", { + original_seed <- globalenv()$.Random.seed + set.seed(123) + seed_123 <- globalenv()$.Random.seed + sample(10) + # Back to a user specified state + set_random_seed(seed_123) + expect_equal(seed_123, globalenv()$.Random.seed) + # Back to the original state + set_random_seed(original_seed) + expect_equal(original_seed, globalenv()$.Random.seed) +})