diff --git a/DESCRIPTION b/DESCRIPTION index 4410b250..d5787793 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,13 +21,14 @@ Imports: DescTools, MASS, boot, - stringr + stringr, + lmtest, + cli Suggests: knitr, testthat (>= 2.0), ggplot2, rmarkdown, - clubSandwich, dplyr, sandwich, survminer, diff --git a/NAMESPACE b/NAMESPACE index 05863175..081263e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(dummize_ipd) export(estimate_weights) export(find_SE_from_CI) export(generate_survival_data) +export(get_pseudo_ipd_binary) export(get_time_conversion) export(kmplot) export(kmplot2) @@ -25,7 +26,8 @@ export(ph_diagplot_schoenfeld) export(plot_weights_base) export(plot_weights_ggplot) export(process_agd) -export(report_table) +export(report_table_binary) +export(report_table_tte) export(set_time_conversion) export(survfit_makeup) import(DescTools) @@ -38,6 +40,8 @@ import(stringr) import(survival) importFrom(grDevices,col2rgb) importFrom(grDevices,rgb) +importFrom(lmtest,coefci) +importFrom(lmtest,coeftest) importFrom(survival,Surv) importFrom(survival,survfit) importFrom(utils,stack) diff --git a/R/binary-helper.R b/R/binary-helper.R new file mode 100644 index 00000000..be762e61 --- /dev/null +++ b/R/binary-helper.R @@ -0,0 +1,86 @@ +#' Create pseudo IPD given aggregated binary data +#' +#' @param binary_agd a data.frame that take different formats depending on \code{format} +#' @param format a string, "stacked" or "unstacked" +#' +#' @return a data.frame of pseudo binary IPD, with columns USUBJID, ARM, RESPONSE +#' @example inst/examples/get_pseudo_ipd_binary_ex.R +#' @export + +get_pseudo_ipd_binary <- function(binary_agd, format = c("stacked", "unstacked")) { + # pre check + if (format == "stacked") { + if (!is.data.frame(binary_agd)) { + stop("stacked binary_agd should be data.frame with columns 'ARM', 'RESPONSE', 'COUNT'") + } + names(binary_agd) <- toupper(names(binary_agd)) + if (!all(c("ARM", "RESPONSE", "COUNT") %in% names(binary_agd))) { + stop("stacked binary_agd should be data.frame with columns 'ARM', 'Response', 'Count'") + } + if (!is.logical(binary_agd$RESPONSE) && !all(toupper(binary_agd$RESPONSE) %in% c("YES", "NO"))) { + stop("'RESPONSE' column in stacked binary_agd should be either logical vector or character vector of 'Yes'/'No'") + } + if (nrow(binary_agd) %% 2 != 0) { + stop("nrow(binary_agd) is not even number, you may miss to provide 1 level of binary response to certain arm") + } + } else if (format == "unstacked") { + if (!(is.data.frame(binary_agd) || is.matrix(binary_agd))) { + stop("unstacked binary_agd should be either a 1x2 or 2x2 data frame or matrix") + } + if (ncol(binary_agd) != 2 || !nrow(binary_agd) %in% c(1, 2)) { + stop("unstacked binary_agd should be either a 1x2 or 2x2 data frame or matrix") + } + bin_res <- toupper(colnames(binary_agd)) + bin_res <- sort(bin_res) + if (!(identical(bin_res, c("FALSE", "TRUE")) || identical(bin_res, c("NO", "YES")))) { + stop("column names of unstacked binary_agd should be either TRUE/FALSE or Yes/No") + } + } + + # pre process binary_agd, depending on format + use_binary_agd <- switch(format, + "stacked" = { + names(binary_agd) <- toupper(names(binary_agd)) + if (!is.logical(binary_agd$RESPONSE)) { + binary_agd$RESPONSE <- toupper(binary_agd$RESPONSE) + binary_agd$RESPONSE <- binary_agd$RESPONSE == "YES" + } + binary_agd + }, + "unstacked" = { + trt_names <- rownames(binary_agd) + bin_res <- toupper(colnames(binary_agd)) + if ("YES" %in% bin_res) { + bin_res <- ifelse(bin_res == "YES", "TRUE", "FALSE") + colnames(binary_agd) <- bin_res + } + tmpout <- utils::stack(binary_agd) + tmpout <- cbind(ARM = rep(trt_names, each = 2), tmpout) + names(tmpout) <- c("ARM", "COUNT", "RESPONSE") + rownames(tmpout) <- NULL + tmpout$RESPONSE <- as.logical(tmpout$RESPONSE) + tmpout + } + ) + + # create pseudo binary IPD + use_binary_agd$ARM <- factor(use_binary_agd$ARM, levels = unique(use_binary_agd$ARM)) + n_per_arm <- tapply(use_binary_agd$COUNT, use_binary_agd$ARM, sum) + n_yes_per_arm <- use_binary_agd$COUNT[use_binary_agd$RESPONSE] # use_binary_agd is already ordered as per factor ARM + + tmpipd <- data.frame( + USUBJID = NA, + ARM = unlist( + mapply(rep, x = levels(use_binary_agd$ARM), each = n_per_arm, SIMPLIFY = FALSE, USE.NAMES = FALSE) + ), + RESPONSE = unlist( + lapply(seq_along(n_per_arm), function(ii) { + c(rep(TRUE, n_yes_per_arm[ii]), rep(FALSE, n_per_arm[ii] - n_yes_per_arm[ii])) + }) + ) + ) + tmpipd$USUBJID <- paste0("pseudo_binary_subj_", seq_len(nrow(tmpipd))) + + # output + tmpipd +} diff --git a/R/maic_anchored.R b/R/maic_anchored.R index b0956f50..6199a987 100644 --- a/R/maic_anchored.R +++ b/R/maic_anchored.R @@ -7,7 +7,7 @@ #' @param ipd a data frame that meet format requirements in 'Details', individual patient data (IPD) of internal trial #' @param pseudo_ipd a data frame, pseudo IPD from digitized KM curve of external trial (for time-to-event endpoint) or #' from contingency table (for binary endpoint) -#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD) +#' @param trt_ipd a string, name of the interested investigation arm in internal trial \code{ipd} (internal IPD) #' @param trt_agd a string, name of the interested investigation arm in external trial \code{pseudo_ipd} (pseudo IPD) #' @param trt_common a string, name of the common comparator in internal and external trial #' @param trt_var_ipd a string, column name in \code{ipd} that contains the treatment assignment @@ -226,9 +226,9 @@ maic_anchored_tte <- function(res, # make analysis report table res$inferential[["report_overall_robustCI"]] <- rbind( - report_table(coxobj_ipd, medSurv_ipd, tag = paste0("IPD/", endpoint_name)), - report_table(coxobj_ipd_adj, medSurv_ipd_adj, tag = paste0("weighted IPD/", endpoint_name)), - report_table(coxobj_agd, medSurv_agd, tag = paste0("Agd/", endpoint_name)), + report_table_tte(coxobj_ipd, medSurv_ipd, tag = paste0("IPD/", endpoint_name)), + report_table_tte(coxobj_ipd_adj, medSurv_ipd_adj, tag = paste0("weighted IPD/", endpoint_name)), + report_table_tte(coxobj_agd, medSurv_agd, tag = paste0("Agd/", endpoint_name)), c( paste0("** adj.", trt_ipd, " vs ", trt_agd), rep("-", 4), @@ -244,9 +244,9 @@ maic_anchored_tte <- function(res, boot_res_AB$ci_l <- exp(log(boot_res_AB$est) + qnorm(0.025) * boot_logres_se) boot_res_AB$ci_u <- exp(log(boot_res_AB$est) + qnorm(0.975) * boot_logres_se) res$inferential[["report_overall_bootCI"]] <- rbind( - report_table(coxobj_ipd, medSurv_ipd, tag = paste0("IPD/", endpoint_name)), - report_table(coxobj_ipd_adj, medSurv_ipd_adj, tag = paste0("weighted IPD/", endpoint_name)), - report_table(coxobj_agd, medSurv_agd, tag = paste0("Agd/", endpoint_name)), + report_table_tte(coxobj_ipd, medSurv_ipd, tag = paste0("IPD/", endpoint_name)), + report_table_tte(coxobj_ipd_adj, medSurv_ipd_adj, tag = paste0("weighted IPD/", endpoint_name)), + report_table_tte(coxobj_agd, medSurv_agd, tag = paste0("Agd/", endpoint_name)), c( paste0("** adj.", trt_ipd, " vs ", trt_agd), rep("-", 4), diff --git a/R/maic_unanchored.R b/R/maic_unanchored.R index 0f8cd437..7e653fcb 100644 --- a/R/maic_unanchored.R +++ b/R/maic_unanchored.R @@ -12,13 +12,18 @@ #' @param trt_var_ipd a string, column name in \code{ipd} that contains the treatment assignment #' @param trt_var_agd a string, column name in \code{ipd} that contains the treatment assignment #' @param endpoint_type a string, one out of the following "binary", "tte" (time to event) -#' @param eff_measure a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) -#' for a binary endpoint; "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, -#' otherwise "HR" is used. +#' @param eff_measure a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; +#' "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used. +#' @param boot_ci_is_quantile a logical, specify if the 95% bootstrapped confidence interval should be derived by sample +#' quantile. Default FALSE, which the estimates assumes to follow asymptotic normal (only if `eff_measure` is "RD") or +#' log-normal with a variance that can be approximated by bootstrapped sample of the estimate. This default option may +#' be handy when the number of bootstrap iterations is not big. #' @param endpoint_name a string, name of time to event endpoint, to be show in the last line of title #' @param time_scale a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or #' 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days #' @param km_conf_type a string, pass to \code{conf.type} of \code{survfit} +#' @param binary_robust_cov_type a string to pass to argument `type` of [sandwich::vcovHC], see possible options +#' in the documentation of that function. Default is `"HC3"` #' #' @details For time-to-event analysis, it is required that input \code{ipd} and \code{pseudo_ipd} to have the following #' columns. This function is not sensitive to upper or lower case of letters in column names. @@ -43,11 +48,29 @@ maic_unanchored <- function(weights_object, trt_var_agd = "ARM", endpoint_type = "tte", endpoint_name = "Time to Event Endpoint", - eff_measure = c("HR", "OR", "RR", "RD", "Diff"), + eff_measure = c("HR", "OR", "RR", "RD"), + boot_ci_is_quantile = FALSE, # time to event specific args time_scale = "months", - km_conf_type = "log-log") { - # ==> Setup and Precheck ------------------------------------------ + km_conf_type = "log-log", + # binary specific args + binary_robust_cov_type = "HC3") { + # ==> Initial Setup ------------------------------------------ + # ~~~ Create the hull for the output from this function + res <- list( + descriptive = list(), + inferential = list() + ) + + res_AB <- list( + est = NA, + se = NA, + ci_l = NA, + ci_u = NA, + pval = NA + ) + + # ~~~ Initial colname process and precheck on effect measure names(ipd) <- toupper(names(ipd)) names(pseudo_ipd) <- toupper(names(pseudo_ipd)) trt_var_ipd <- toupper(trt_var_ipd) @@ -56,7 +79,7 @@ maic_unanchored <- function(weights_object, if (length(eff_measure) > 1) eff_measure <- NULL if (is.null(eff_measure)) eff_measure <- list(binary = "OR", tte = "HR")[[endpoint_type]] - # setup ARM column and precheck + # ~~~ Setup ARM column and make related pre-checks if (!trt_var_ipd %in% names(ipd)) stop("cannot find arm indicator column trt_var_ipd in ipd") if (!trt_var_agd %in% names(pseudo_ipd)) stop("cannot find arm indicator column trt_var_agd in pseudo_ipd") if (trt_var_ipd != "ARM") ipd$ARM <- ipd[[trt_var_ipd]] @@ -66,7 +89,7 @@ maic_unanchored <- function(weights_object, if (!trt_ipd %in% ipd$ARM) stop("trt_ipd does not exist in ipd$ARM") if (!trt_agd %in% pseudo_ipd$ARM) stop("trt_agd does not exist in pseudo_ipd$ARM") - # more pre-checks + # ~~~ More pre-checks endpoint_type <- match.arg(endpoint_type, c("binary", "tte")) if (!"maicplus_estimate_weights" %in% class(weights_object)) { stop("weights_object should be an object returned by estimate_weights") @@ -88,6 +111,11 @@ maic_unanchored <- function(weights_object, if (any(!c("USUBJID", "RESPONSE") %in% names(ipd))) stop("ipd should have 'USUBJID', 'RESPONSE' columns at minimum") eff_measure <- match.arg(eff_measure, choices = c("OR", "RD", "RR"), several.ok = FALSE) + + binary_robust_cov_type <- match.arg( + binary_robust_cov_type, + choices = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5") + ) } else if (endpoint_type == "tte") { # for time to event effect measure if (!all(c("USUBJID", "TIME", "EVENT", trt_var_ipd) %in% names(ipd))) { @@ -98,28 +126,23 @@ maic_unanchored <- function(weights_object, } eff_measure <- match.arg(eff_measure, choices = c("HR"), several.ok = FALSE) } - # else { # for continuous effect measure - # - # if (any(!c("USUBJID", "RESPONSE") %in% names(ipd))) { - # stop("ipd should have 'USUBJID', 'RESPONSE' columns at minimum") - # } - # eff_measure <- match.arg(eff_measure, choices = c("Diff"), several.ok = FALSE) - # } - - # create the hull for the output from this function - res <- list( - descriptive = list(), - inferential = list() - ) - # prepare ipd and agd data for analysis, part 1/2 + # ==> IPD and AgD data preparation ------------------------------------------ + # : subset ipd, retain only ipd from interested trts ipd <- ipd[ipd$ARM == trt_ipd, , drop = TRUE] pseudo_ipd <- pseudo_ipd[pseudo_ipd$ARM == trt_agd, , drop = TRUE] + + # : assign weights to real and pseudo ipd ipd$weights <- weights_object$data$weights[match(weights_object$data$USUBJID, ipd$USUBJID)] pseudo_ipd$weights <- 1 + + # : necessary formatting for pseudo ipd if (!"USUBJID" %in% names(pseudo_ipd)) pseudo_ipd$USUBJID <- paste0("ID", seq_len(nrow(pseudo_ipd))) + if ("RESPONSE" %in% names(pseudo_ipd) && is.logical(pseudo_ipd$RESPONSE)) { + pseudo_ipd$RESPONSE <- as.numeric(pseudo_ipd$RESPONSE) + } - # give warning when individual pts in IPD has no weights + # : give warning when individual pts in IPD has no weights if (any(is.na(ipd$weights))) { ipd <- ipd[!is.na(ipd$weights), , drop = FALSE] warning( @@ -131,7 +154,7 @@ maic_unanchored <- function(weights_object, if (nrow(ipd) == 0) stop("there is no pts with weight in IPD!!") } - # prepare ipd and agd data for analysis, part 2/2 + # : retain necessary columns if (endpoint_type == "tte") { retain_cols <- c("USUBJID", "ARM", "TIME", "EVENT", "weights") } else { @@ -139,99 +162,279 @@ maic_unanchored <- function(weights_object, } ipd <- ipd[, retain_cols, drop = FALSE] pseudo_ipd <- pseudo_ipd[, retain_cols, drop = FALSE] + + # : merge real and pseudo ipds dat <- rbind(ipd, pseudo_ipd) dat$ARM <- factor(dat$ARM, levels = c(trt_agd, trt_ipd)) # ==> Inferential output ------------------------------------------ - if (endpoint_type == "tte") { - # Analysis table (Cox model) before and after matching, incl Median Survival Time - # derive km w and w/o weights - kmobj_dat <- survfit(Surv(TIME, EVENT) ~ ARM, dat, conf.type = km_conf_type) - kmobj_dat_adj <- survfit(Surv(TIME, EVENT) ~ ARM, dat, weights = dat$weights, conf.type = km_conf_type) - res$descriptive[["survfit_before"]] <- survfit_makeup(kmobj_dat) - res$descriptive[["survfit_after"]] <- survfit_makeup(kmobj_dat_adj) - # derive median survival time - medSurv_dat <- medSurv_makeup(kmobj_dat, legend = "Before matching", time_scale = time_scale) - medSurv_dat_adj <- medSurv_makeup(kmobj_dat_adj, legend = "After matching", time_scale = time_scale) - medSurv_out <- rbind(medSurv_dat, medSurv_dat_adj) - - res$inferential[["report_median_surv"]] <- medSurv_out - - # fit PH Cox regression model - coxobj_dat <- coxph(Surv(TIME, EVENT) ~ ARM, dat, robust = TRUE) - coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, dat, weights = weights, robust = TRUE) - - res$inferential[["coxph_before"]] <- coxobj_dat - res$inferential[["coxph_after"]] <- coxobj_dat_adj - - # derive ipd exp arm vs agd exp arm via bucher - res_AB <- list( - est = NA, - se = NA, - ci_l = NA, - ci_u = NA, - pval = NA + + result <- if (endpoint_type == "tte") { + maic_unanchored_tte( + res, res_AB, dat, ipd, pseudo_ipd, km_conf_type, time_scale, + weights_object, endpoint_name, boot_ci_is_quantile, trt_ipd, trt_agd ) - res_AB$est <- summary(coxobj_dat_adj)$conf.int[1] - mu <- summary(coxobj_dat_adj)$coef[1] - sig <- summary(coxobj_dat_adj)$coef[4] - res_AB$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parameterization - res_AB$ci_l <- summary(coxobj_dat_adj)$conf.int[3] - res_AB$ci_u <- summary(coxobj_dat_adj)$conf.int[4] - res_AB$pval <- summary(coxobj_dat_adj)$coef[6] - - # get bootstrapped estimates if applicable - if (!is.null(weights_object$boot)) { - tmp_boot_obj <- weights_object$boot - k <- dim(tmp_boot_obj)[3] - tmp_boot_est <- sapply(1:k, function(ii) { - boot_x <- tmp_boot_obj[, , ii] - boot_ipd_id <- weights_object$data$USUBJID[boot_x[, 1]] - boot_ipd <- ipd[match(boot_ipd_id, ipd$USUBJID), , drop = FALSE] - boot_ipd$weights <- boot_x[, 2] - - boot_dat <- rbind(boot_ipd, pseudo_ipd) - boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) - - # does not matter use robust se or not, point estimate will not change and calculation would be faster - boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_dat, weights = weights) - boot_AB_est <- summary(boot_coxobj_dat_adj)$coef[1] - exp(boot_AB_est) - }) - res$inferential[["boot_est"]] <- tmp_boot_est + } else if (endpoint_type == "binary") { + maic_unanchored_binary( + res, res_AB, dat, ipd, pseudo_ipd, binary_robust_cov_type, + weights_object, endpoint_name, eff_measure, boot_ci_is_quantile, trt_ipd, trt_agd + ) + } else { + stop("Endpoint type ", endpoint_type, " currently unsupported.") + } + + # output + result +} + +# MAIC inference functions for TTE outcome type ------------ + +maic_unanchored_tte <- function(res, + res_AB, + dat, + ipd, + pseudo_ipd, + km_conf_type, + time_scale, + weights_object, + endpoint_name, + boot_ci_is_quantile, + trt_ipd, + trt_agd) { + # ~~~ Descriptive table before and after matching + # : derive km w and w/o weights + kmobj_dat <- survfit(Surv(TIME, EVENT) ~ ARM, dat, conf.type = km_conf_type) + kmobj_dat_adj <- survfit(Surv(TIME, EVENT) ~ ARM, dat, weights = dat$weights, conf.type = km_conf_type) + res$descriptive[["survfit_before"]] <- survfit_makeup(kmobj_dat) + res$descriptive[["survfit_after"]] <- survfit_makeup(kmobj_dat_adj) + # : derive median survival time + medSurv_dat <- medSurv_makeup(kmobj_dat, legend = "Before matching", time_scale = time_scale) + medSurv_dat_adj <- medSurv_makeup(kmobj_dat_adj, legend = "After matching", time_scale = time_scale) + medSurv_out <- rbind(medSurv_dat, medSurv_dat_adj) + + res$inferential[["report_median_surv"]] <- medSurv_out + + # ~~~ Analysis table (Cox model) before and after matching + # : fit PH Cox regression model + coxobj_dat <- coxph(Surv(TIME, EVENT) ~ ARM, dat) + coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, dat, weights = weights, robust = TRUE) + + res$inferential[["coxph_before"]] <- coxobj_dat + res$inferential[["coxph_after"]] <- coxobj_dat_adj + + # : derive ipd exp arm vs agd exp arm + res_AB$est <- summary(coxobj_dat_adj)$conf.int[1] + mu <- summary(coxobj_dat_adj)$coef[1] + sig <- summary(coxobj_dat_adj)$coef[4] + res_AB$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parametrization + res_AB$ci_l <- summary(coxobj_dat_adj)$conf.int[3] + res_AB$ci_u <- summary(coxobj_dat_adj)$conf.int[4] + res_AB$pval <- summary(coxobj_dat_adj)$coef[6] + + # : get bootstrapped estimates if applicable + if (!is.null(weights_object$boot)) { + tmp_boot_obj <- weights_object$boot + k <- dim(tmp_boot_obj)[3] + + cli::cli_progress_bar("Going through bootstrapped weights", total = k, .envir = .GlobalEnv) + + tmp_boot_est <- sapply(1:k, function(ii) { + cli::cli_progress_update(.envir = .GlobalEnv) + + boot_x <- tmp_boot_obj[, , ii] + boot_ipd_id <- weights_object$data$USUBJID[boot_x[, 1]] + boot_ipd <- ipd[match(boot_ipd_id, ipd$USUBJID), , drop = FALSE] + boot_ipd$weights <- boot_x[, 2] + + boot_dat <- rbind(boot_ipd, pseudo_ipd) + boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) + + boot_coxobj_dat_adj <- coxph(Surv(TIME, EVENT) ~ ARM, boot_dat, weights = weights) + boot_AB_est <- summary(boot_coxobj_dat_adj)$coef[1] + exp(boot_AB_est) + }) + + cli::cli_progress_done(.envir = .GlobalEnv) + + res$inferential[["boot_est"]] <- tmp_boot_est + } else { + res$inferential[["boot_est"]] <- NULL + } + + # : make analysis report table + res$inferential[["report_overall_robustCI"]] <- rbind( + report_table_tte(coxobj_dat, medSurv_dat, tag = paste0("Before matching/", endpoint_name)), + report_table_tte(coxobj_dat_adj, medSurv_dat_adj, tag = paste0("After matching/", endpoint_name)) + ) + + if (is.null(res$inferential[["boot_est"]])) { + res$inferential[["report_overall_bootCI"]] <- NULL + } else { + boot_res_AB <- res_AB + boot_logres_se <- sd(log(res$inferential[["boot_est"]]), na.rm = TRUE) + if (boot_ci_is_quantile) { + boot_res_AB$ci_l <- quantile(res$inferential[["boot_est"]], p = 0.025) + boot_res_AB$ci_u <- quantile(res$inferential[["boot_est"]], p = 0.975) } else { - res$inferential[["boot_est"]] <- NULL + boot_res_AB$ci_l <- exp(log(boot_res_AB$est) + qnorm(0.025) * boot_logres_se) + boot_res_AB$ci_u <- exp(log(boot_res_AB$est) + qnorm(0.975) * boot_logres_se) } + tmp_report_table_tte <- report_table_tte(coxobj_dat_adj, + medSurv_dat_adj, + tag = paste0("After matching/", endpoint_name) + ) + tmp_report_table_tte$`HR[95% CI]`[1] <- paste0( + format(round(boot_res_AB$est, 2), nsmall = 2), "[", + format(round(boot_res_AB$ci_l, 2), nsmall = 2), ";", + format(round(boot_res_AB$ci_u, 2), nsmall = 2), "]" + ) + res$inferential[["report_overall_bootCI"]] <- rbind( + report_table_tte(coxobj_dat, medSurv_dat, tag = paste0("Before matching/", endpoint_name)), + tmp_report_table_tte + ) + } - # make analysis report table - res$inferential[["report_overall_robustCI"]] <- rbind( - report_table(coxobj_dat, medSurv_dat, tag = paste0("Before matching/", endpoint_name)), - report_table(coxobj_dat_adj, medSurv_dat_adj, tag = paste0("After matching/", endpoint_name)) + # output + res +} + +# MAIC inference functions for Binary outcome type ------------ + +maic_unanchored_binary <- function(res, + res_AB, + dat, + ipd, + pseudo_ipd, + binary_robust_cov_type, + weights_object, + endpoint_name, + eff_measure, + boot_ci_is_quantile, + trt_ipd, + trt_agd) { + # ~~~ Analysis table + # : set up proper link + glm_link <- switch(eff_measure, + "RD" = poisson(link = "identity"), + "RR" = poisson(link = "log"), + "OR" = binomial(link = "logit") + ) + + # : fit glm for binary outcome and robust estimate with weights + binobj_dat <- glm(RESPONSE ~ ARM, dat, family = glm_link) + binobj_dat_adj <- glm(RESPONSE ~ ARM, dat, weights = weights, family = glm_link) + bin_robust_cov <- sandwich::vcovHC(binobj_dat_adj, type = binary_robust_cov_type) + bin_robust_coef <- lmtest::coeftest(binobj_dat_adj, vcov. = bin_robust_cov) + bin_robust_ci <- lmtest::coefci(binobj_dat_adj, vcov. = bin_robust_cov) + + res$inferential[["model_before"]] <- binobj_dat + res$inferential[["model_after"]] <- binobj_dat_adj + + mu <- bin_robust_coef[2, "Estimate"] + sig <- bin_robust_coef[2, "Std. Error"] + res_AB$ci_l <- bin_robust_ci[2, "2.5 %"] + res_AB$ci_u <- bin_robust_ci[2, "97.5 %"] + res_AB$pval <- bin_robust_coef[2, "Pr(>|z|)"] + + if (eff_measure %in% c("RR", "OR")) { + res_AB$est <- exp(mu) + res_AB$se <- sqrt((exp(sig^2) - 1) * exp(2 * mu + sig^2)) # log normal parameterization + res_AB$ci_l <- exp(res_AB$ci_l) + res_AB$ci_u <- exp(res_AB$ci_u) + } else if (eff_measure == "RD") { + res_AB$est <- mu * 100 + res_AB$se <- sig * 100 + res_AB$ci_l <- res_AB$ci_l * 100 + res_AB$ci_u <- res_AB$ci_u * 100 + } + + # : get bootstrapped estimates if applicable + if (!is.null(weights_object$boot)) { + tmp_boot_obj <- weights_object$boot + k <- dim(tmp_boot_obj)[3] + + cli::cli_progress_bar("Going through bootstrapped weights", total = k, .envir = .GlobalEnv) + + tmp_boot_est <- sapply(1:k, function(ii) { + cli::cli_progress_update(.envir = .GlobalEnv) + + boot_x <- tmp_boot_obj[, , ii] + boot_ipd_id <- weights_object$data$USUBJID[boot_x[, 1]] + boot_ipd <- ipd[match(boot_ipd_id, ipd$USUBJID), , drop = FALSE] + boot_ipd$weights <- boot_x[, 2] + + boot_dat <- rbind(boot_ipd, pseudo_ipd) + boot_dat$ARM <- factor(boot_dat$ARM, levels = c(trt_agd, trt_ipd)) + + # does not matter use robust se or not, point estimate will not change and calculation would be faster + boot_binobj_dat_adj <- glm(RESPONSE ~ ARM, boot_dat, weights = weights, family = glm_link) + boot_bin_robust_cov <- sandwich::vcovHC(binobj_dat_adj, type = binary_robust_cov_type) + boot_bin_robust_coef <- lmtest::coeftest(boot_binobj_dat_adj, vcov. = boot_bin_robust_cov) + boot_AB_est <- boot_bin_robust_coef[2, "Estimate"] + if (eff_measure %in% c("RR", "OR")) { + boot_AB_est <- exp(boot_AB_est) + } else if (eff_measure == "RD") { + boot_AB_est <- boot_AB_est * 100 + } + }) + + cli::cli_progress_done(.envir = .GlobalEnv) + + res$inferential[["boot_est"]] <- tmp_boot_est + } else { + res$inferential[["boot_est"]] <- NULL + } + + # : make analysis report table + res$inferential[["report_overall_robustCI"]] <- rbind( + report_table_binary( + binobj_dat, + tag = paste0("Before matching/", endpoint_name), + eff_measure = eff_measure + ), + report_table_binary( + binobj_dat_adj, + res_AB, + tag = paste0("After matching/", endpoint_name), + eff_measure = eff_measure ) + ) - if (is.null(res$inferential[["boot_est"]])) { - res$inferential[["report_overall_bootCI"]] <- NULL + if (is.null(res$inferential[["boot_est"]])) { + res$inferential[["report_overall_bootCI"]] <- NULL + } else { + boot_res_AB <- res_AB + boot_logres_se <- sd(log(res$inferential[["boot_est"]]), na.rm = TRUE) + if (boot_ci_is_quantile) { + boot_res_AB$ci_l <- quantile(res$inferential[["boot_est"]], p = 0.025) + boot_res_AB$ci_u <- quantile(res$inferential[["boot_est"]], p = 0.975) } else { - boot_res_AB <- res_AB - boot_logres_se <- sd(log(res$inferential[["boot_est"]]), na.rm = TRUE) boot_res_AB$ci_l <- exp(log(boot_res_AB$est) + qnorm(0.025) * boot_logres_se) boot_res_AB$ci_u <- exp(log(boot_res_AB$est) + qnorm(0.975) * boot_logres_se) - # boot_res_AB$ci_l <- quantile(res$inferential[["boot_est"]],p=0.025) - # boot_res_AB$ci_u <- quantile(res$inferential[["boot_est"]],p=0.975) - - tmp_report_table <- report_table(coxobj_dat_adj, medSurv_dat_adj, tag = paste0("After matching/", endpoint_name)) - tmp_report_table$`HR[95% CI]`[1] <- paste0( - format(round(boot_res_AB$est, 2), nsmall = 2), "[", - format(round(boot_res_AB$ci_l, 2), nsmall = 2), ";", - format(round(boot_res_AB$ci_u, 2), nsmall = 2), "]" - ) - res$inferential[["report_overall_bootCI"]] <- rbind( - report_table(coxobj_dat, medSurv_dat, tag = paste0("Before matching/", endpoint_name)), - tmp_report_table - ) } + tmp_report_table_binary <- report_table_binary( + binobj_dat_adj, + res_AB, + tag = paste0("After matching/", endpoint_name), + eff_measure = eff_measure + ) + tmp_report_table_binary[[paste0(eff_measure, "[95% CI]")]][1] <- paste0( + format(round(boot_res_AB$est, 2), nsmall = 2), + "[", + format(round(boot_res_AB$ci_l, 2), nsmall = 2), + ";", + format(round(boot_res_AB$ci_u, 2), nsmall = 2), + "]" + ) + res$inferential[["report_overall_bootCI"]] <- rbind( + report_table_binary( + binobj_dat, + tag = paste0("Before matching/", endpoint_name), + eff_measure = eff_measure + ), + tmp_report_table_binary + ) } - # output res } diff --git a/R/maicplus-package.R b/R/maicplus-package.R index daf98db2..47cf8d2e 100644 --- a/R/maicplus-package.R +++ b/R/maicplus-package.R @@ -17,4 +17,5 @@ NULL #' @import stringr #' @import lubridate #' @importFrom utils stack +#' @importFrom lmtest coeftest coefci NULL diff --git a/R/reporting.R b/R/reporting.R index bd3ca623..466d482e 100644 --- a/R/reporting.R +++ b/R/reporting.R @@ -1,5 +1,4 @@ #' helper function: sort out a nice report table to summarize survival analysis results -#' for `maic_tte_unanchor` #' #' @param coxobj returned object from \code{\link[survival]{coxph}} #' @param medSurvobj returned object from \code{\link{medSurv_makeup}} @@ -10,29 +9,104 @@ #' #' @export -report_table <- function(coxobj, medSurvobj, tag = NULL) { +report_table_tte <- function(coxobj, medSurvobj, tag = NULL) { + # descriptive part + N <- medSurvobj$n.max + N.EVNT <- medSurvobj$events + N.EVNT.PERC <- round(N.EVNT * 100 / N, 1) + N <- round(N, 1) # not necessary integer + N.EVNT <- round(N.EVNT, 1) # not necessary integer + meds_report <- format(round(medSurvobj[, c("median", "0.95LCL", "0.95UCL")], 1), nsmall = 1) + meds_report <- apply(meds_report, 1, function(xx) paste0(xx[1], "[", xx[2], ";", xx[3], "]")) + + # inferential part hr_res <- format(round(summary(coxobj)$conf.int[-2], 2), nsmall = 2) hr_res <- paste0(hr_res[1], "[", hr_res[2], ";", hr_res[3], "]") hr_pval <- format(round(summary(coxobj)$waldtest[3], 3), nsmall = 3) if (hr_pval == "0.000") hr_pval <- "<0.001" - meds_report <- format(round(medSurvobj[, c("median", "0.95LCL", "0.95UCL")], 1), nsmall = 1) - meds_report <- apply(meds_report, 1, function(xx) paste0(xx[1], "[", xx[2], ";", xx[3], "]")) + # assemble the table + desc_res <- cbind( + medSurvobj[, "treatment", drop = FALSE], + data.frame( + "N" = N, + "n.events(%)" = paste0(N.EVNT, "(", format(N.EVNT.PERC, nsmall = 1), ")"), + "median[95% CI]" = meds_report, + check.names = FALSE + ) + ) + desc_res <- cbind(desc_res[c(2, 1), ], "HR[95% CI]" = c(hr_res, ""), "p-Value" = c(hr_pval, "")) + names(desc_res)[3] <- "n.events(%)" # cbind changes column name + + # add first tag column if applicable + if (!is.null(tag)) { + desc_res <- cbind(data.frame(Matching = rep(tag, nrow(desc_res))), desc_res) + desc_res$Matching[duplicated(desc_res$Matching)] <- "" + } + + # output + desc_res +} + +#' helper function: sort out a nice report table to summarize binary analysis results +#' +#' @param binobj object from glm() +#' @param weighted_result object res_AB +#' @param eff_measure a string, binary effect measure, could be "OR", "RR", "RD" +#' @param tag a string, by default NULL, if specified, an extra 1st column is created in the output +#' +#' @return a data frame with sample size, incidence rate, estimate of binary effect measure with +#' 95% CI and Wald test of hazard ratio +#' +#' @export + +report_table_binary <- function(binobj, weighted_result = NULL, eff_measure = c("OR", "RD", "RR"), tag = NULL) { + weighted <- ifelse(is.null(weighted_result), FALSE, TRUE) + # descriptive part + ARM <- levels(binobj$data$ARM) + if (!weighted) { + N <- tapply(binobj$data$USUBJID, binobj$data$ARM, length) + N.EVNT <- tapply(binobj$data$RESPONSE, binobj$data$ARM, sum) + } else { + N <- tapply(binobj$data$weights, binobj$data$ARM, length) + N.EVNT <- tapply(binobj$data$weights * binobj$data$RESPONSE, binobj$data$ARM, sum) + } + N.EVNT.PERC <- round(N.EVNT * 100 / N, 1) + N.EVNT <- round(N.EVNT, 1) - desc_res <- cbind(medSurvobj[, "treatment", drop = FALSE], - data.frame(N = round(medSurvobj$n.max, 1)), - "n.events(%)" = paste0( - round(medSurvobj$events, 1), "(", - format(round(medSurvobj$events * 100 / medSurvobj$n.max, 1), nsmall = 1), ")" - ), - "median[95% CI]" = meds_report + # inferential part + if (!weighted) { + bin_res_est <- coef(binobj)[2] + bin_res_ci <- confint(binobj, parm = 2, test = "LRT", trace = FALSE) + bin_res <- c(bin_res_est, bin_res_ci) + if (eff_measure != "RD") bin_res <- exp(bin_res) + bin_res <- round(bin_res, 2) |> format(nsmall = 2) + bin_res <- paste0(bin_res[1], "[", bin_res[2], ";", bin_res[3], "]") + bin_pval <- round(summary(binobj)$coefficients[2, 4], 3) |> format(nsmall = 3) + } else { + bin_res <- round(unlist(weighted_result[c("est", "ci_l", "ci_u")]), 2) |> format(nsmall = 2) + bin_res <- paste0(bin_res[1], "[", bin_res[2], ";", bin_res[3], "]") + bin_pval <- round(weighted_result$pval, 3) |> format(nsmall = 3) + } + if (bin_pval == "0.000") bin_pval <- "<0.001" + + # assemble the table + desc_res <- data.frame( + "treatment" = ARM, + "N" = N, + "n.events(%)" = paste0(N.EVNT, "(", format(N.EVNT.PERC, nsmall = 1), ")") ) - desc_res <- cbind(desc_res[c(2, 1), ], "HR[95% CI]" = c(hr_res, ""), "p-Value" = c(hr_pval, "")) + desc_res <- cbind(desc_res[c(2, 1), ], "xx[95% CI]" = c(bin_res, ""), "p-Value" = c(bin_pval, "")) + names(desc_res)[ncol(desc_res) - 1] <- paste0(eff_measure, "[95% CI]") + names(desc_res)[3] <- "n.events(%)" # cbind changes column name + # add first tag column if applicable if (!is.null(tag)) { desc_res <- cbind(data.frame(Matching = rep(tag, nrow(desc_res))), desc_res) desc_res$Matching[duplicated(desc_res$Matching)] <- "" } + + # output desc_res } diff --git a/inst/WORDLIST b/inst/WORDLIST index cac9494b..c919a6e2 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -3,7 +3,6 @@ ADaM AgD Azocar Bucher -Bucher's Comparator DSU ECOG @@ -29,7 +28,6 @@ agd agg al binarize -ci comparator confounders csv @@ -38,6 +36,7 @@ dummized et frac ggplot +glm hta initializer ipd @@ -52,3 +51,5 @@ signorovitch tte unanchored unscaled +unstacked +vcovHC diff --git a/inst/examples/get_pseudo_ipd_binary_ex.R b/inst/examples/get_pseudo_ipd_binary_ex.R new file mode 100644 index 00000000..1c192c69 --- /dev/null +++ b/inst/examples/get_pseudo_ipd_binary_ex.R @@ -0,0 +1,17 @@ +# example of unstacked +testdat <- data.frame(Yes = 280, No = 120) +rownames(testdat) <- "B" +get_pseudo_ipd_binary( + binary_agd = testdat, + format = "unstacked" +) + +# example of stacked +get_pseudo_ipd_binary( + binary_agd = data.frame( + ARM = rep("B", 2), + RESPONSE = c("YES", "NO"), + COUNT = c(280, 120) + ), + format = "stacked" +) diff --git a/inst/examples/maic_unanchored_binary_ex.R b/inst/examples/maic_unanchored_binary_ex.R new file mode 100644 index 00000000..31bcd7e0 --- /dev/null +++ b/inst/examples/maic_unanchored_binary_ex.R @@ -0,0 +1,56 @@ +# load in prognostic IPD data and AgD +load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE)) +load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) +ipd_centered <- center_ipd(ipd = ipd, agd = agd) + +# estimate weights +centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN") +centered_colnames <- paste0(centered_colnames, "_CENTERED") + +weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames) +weighted_data2 <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 500) + +# get dummy binary IPD +adrs <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE)) +adrs$RESPONSE <- adrs$AVAL + +pseudo_adrs <- get_pseudo_ipd_binary( + binary_agd = data.frame( + ARM = rep("B", 2), + RESPONSE = c("YES", "NO"), + COUNT = c(280, 120) + ), + format = "stacked" +) + +# unanchored binary MAIC, with CI based on sandwich estimator +maic_unanchored( + weights_object = weighted_data, + ipd = adrs, + pseudo_ipd = pseudo_adrs, + trt_ipd = "A", + trt_agd = "B", + trt_var_ipd = "ARM", + trt_var_agd = "ARM", + endpoint_type = "binary", + endpoint_name = "Binary Endpoint", + eff_measure = "RR", + # binary specific args + binary_robust_cov_type = "HC3" +) + +# unanchored binary MAIC, with bootstrapped CI +maic_unanchored( + weights_object = weighted_data2, + ipd = adrs, + pseudo_ipd = pseudo_adrs, + trt_ipd = "A", + trt_agd = "B", + trt_var_ipd = "ARM", + trt_var_agd = "ARM", + endpoint_type = "binary", + endpoint_name = "Binary Endpoint", + eff_measure = "RR", + # binary specific args + binary_robust_cov_type = "HC3" +) diff --git a/inst/examples/maic_unanchored_ex.R b/inst/examples/maic_unanchored_ex.R index 62427623..0a700386 100644 --- a/inst/examples/maic_unanchored_ex.R +++ b/inst/examples/maic_unanchored_ex.R @@ -1,7 +1,6 @@ # anchored example using maic_anchored for tte library(flexsurv) -### IPD -set.seed(1234) + # Read in relevant ADaM data and rename variables of interest adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", @@ -46,7 +45,7 @@ match_res_boot <- estimate_weights( centered_colnames = grep("_CENTERED$", names(use_adsl)), start_val = 0, method = "BFGS", - n_boot_iteration = 1000, + n_boot_iteration = 500, set_seed_boot = 1234 ) diff --git a/man/get_pseudo_ipd_binary.Rd b/man/get_pseudo_ipd_binary.Rd new file mode 100644 index 00000000..8f2d1721 --- /dev/null +++ b/man/get_pseudo_ipd_binary.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary-helper.R +\name{get_pseudo_ipd_binary} +\alias{get_pseudo_ipd_binary} +\title{Create pseudo IPD given aggregated binary data} +\usage{ +get_pseudo_ipd_binary(binary_agd, format = c("stacked", "unstacked")) +} +\arguments{ +\item{binary_agd}{a data.frame that take different formats depending on \code{format}} + +\item{format}{a string, "stacked" or "unstacked"} +} +\value{ +a data.frame of pseudo binary IPD, with columns USUBJID, ARM, RESPONSE +} +\description{ +Create pseudo IPD given aggregated binary data +} +\examples{ +# example of unstacked +testdat <- data.frame(Yes = 280, No = 120) +rownames(testdat) <- "B" +get_pseudo_ipd_binary( + binary_agd = testdat, + format = "unstacked" +) + +# example of stacked +get_pseudo_ipd_binary( + binary_agd = data.frame( + ARM = rep("B", 2), + RESPONSE = c("YES", "NO"), + COUNT = c(280, 120) + ), + format = "stacked" +) +} diff --git a/man/maic_anchored.Rd b/man/maic_anchored.Rd index 8ba2f286..abe14f69 100644 --- a/man/maic_anchored.Rd +++ b/man/maic_anchored.Rd @@ -28,7 +28,7 @@ maic_anchored( \item{pseudo_ipd}{a data frame, pseudo IPD from digitized KM curve of external trial (for time-to-event endpoint) or from contingency table (for binary endpoint)} -\item{trt_ipd}{a string, name of the interested investigation arm in internal trial \code{dat_igd} (real IPD)} +\item{trt_ipd}{a string, name of the interested investigation arm in internal trial \code{ipd} (internal IPD)} \item{trt_agd}{a string, name of the interested investigation arm in external trial \code{pseudo_ipd} (pseudo IPD)} diff --git a/man/maic_unanchored.Rd b/man/maic_unanchored.Rd index 1bc8eb08..fbce19ae 100644 --- a/man/maic_unanchored.Rd +++ b/man/maic_unanchored.Rd @@ -14,9 +14,11 @@ maic_unanchored( trt_var_agd = "ARM", endpoint_type = "tte", endpoint_name = "Time to Event Endpoint", - eff_measure = c("HR", "OR", "RR", "RD", "Diff"), + eff_measure = c("HR", "OR", "RR", "RD"), + boot_ci_is_quantile = FALSE, time_scale = "months", - km_conf_type = "log-log" + km_conf_type = "log-log", + binary_robust_cov_type = "HC3" ) } \arguments{ @@ -39,14 +41,21 @@ from contingency table (for binary endpoint)} \item{endpoint_name}{a string, name of time to event endpoint, to be show in the last line of title} -\item{eff_measure}{a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) -for a binary endpoint; "HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, -otherwise "HR" is used.} +\item{eff_measure}{a string, "RD" (risk difference), "OR" (odds ratio), "RR" (relative risk) for a binary endpoint; +"HR" for a time-to-event endpoint. By default is \code{NULL}, "OR" is used for binary case, otherwise "HR" is used.} + +\item{boot_ci_is_quantile}{a logical, specify if the 95\% bootstrapped confidence interval should be derived by sample +quantile. Default FALSE, which the estimates assumes to follow asymptotic normal (only if \code{eff_measure} is "RD") or +log-normal with a variance that can be approximated by bootstrapped sample of the estimate. This default option may +be handy when the number of bootstrap iterations is not big.} \item{time_scale}{a string, time unit of median survival time, taking a value of 'years', 'months', 'weeks' or 'days'. NOTE: it is assumed that values in TIME column of \code{ipd} and \code{pseudo_ipd} is in the unit of days} \item{km_conf_type}{a string, pass to \code{conf.type} of \code{survfit}} + +\item{binary_robust_cov_type}{a string to pass to argument \code{type} of \link[sandwich:vcovHC]{sandwich::vcovHC}, see possible options +in the documentation of that function. Default is \code{"HC3"}} } \value{ A list, contains 'descriptive' and 'inferential' diff --git a/man/report_table_binary.Rd b/man/report_table_binary.Rd new file mode 100644 index 00000000..1e6dd2dd --- /dev/null +++ b/man/report_table_binary.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reporting.R +\name{report_table_binary} +\alias{report_table_binary} +\title{helper function: sort out a nice report table to summarize binary analysis results} +\usage{ +report_table_binary( + binobj, + weighted_result = NULL, + eff_measure = c("OR", "RD", "RR"), + tag = NULL +) +} +\arguments{ +\item{binobj}{object from glm()} + +\item{weighted_result}{object res_AB} + +\item{eff_measure}{a string, binary effect measure, could be "OR", "RR", "RD"} + +\item{tag}{a string, by default NULL, if specified, an extra 1st column is created in the output} +} +\value{ +a data frame with sample size, incidence rate, estimate of binary effect measure with +95\% CI and Wald test of hazard ratio +} +\description{ +helper function: sort out a nice report table to summarize binary analysis results +} diff --git a/man/report_table.Rd b/man/report_table_tte.Rd similarity index 79% rename from man/report_table.Rd rename to man/report_table_tte.Rd index 548cac66..b543d761 100644 --- a/man/report_table.Rd +++ b/man/report_table_tte.Rd @@ -1,11 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/reporting.R -\name{report_table} -\alias{report_table} -\title{helper function: sort out a nice report table to summarize survival analysis results -for \code{maic_tte_unanchor}} +\name{report_table_tte} +\alias{report_table_tte} +\title{helper function: sort out a nice report table to summarize survival analysis results} \usage{ -report_table(coxobj, medSurvobj, tag = NULL) +report_table_tte(coxobj, medSurvobj, tag = NULL) } \arguments{ \item{coxobj}{returned object from \code{\link[survival]{coxph}}} @@ -20,5 +19,4 @@ a data frame with sample size, incidence rate, median survival time with 95\% CI } \description{ helper function: sort out a nice report table to summarize survival analysis results -for \code{maic_tte_unanchor} } diff --git a/tests/testthat/data/test_binary_unanchored_expected.RData b/tests/testthat/data/test_binary_unanchored_expected.RData new file mode 100644 index 00000000..cabeb380 Binary files /dev/null and b/tests/testthat/data/test_binary_unanchored_expected.RData differ diff --git a/tests/testthat/data/test_tte_unanchored_expected.RData b/tests/testthat/data/test_tte_unanchored_expected.RData new file mode 100644 index 00000000..aebf1a65 Binary files /dev/null and b/tests/testthat/data/test_tte_unanchored_expected.RData differ diff --git a/tests/testthat/test-get_pseudo_ipd_binary.R b/tests/testthat/test-get_pseudo_ipd_binary.R new file mode 100644 index 00000000..e016976b --- /dev/null +++ b/tests/testthat/test-get_pseudo_ipd_binary.R @@ -0,0 +1,36 @@ +test_that("generate pseudo binary IPD given unstacked table as input", { + # example of unstacked + testdat <- data.frame(Yes = 280, No = 120) + rownames(testdat) <- "B" + testout <- get_pseudo_ipd_binary( + binary_agd = testdat, + format = "unstacked" + ) + expectout <- data.frame( + USUBJID = paste0("pseudo_binary_subj_", 1:400), + ARM = rep("B", 400), + RESPONSE = c(rep(TRUE, 280), rep(FALSE, 120)) + ) + + expect_equal(testout, expectout) +}) + +test_that("generate pseudo binary IPD given stacked table as input", { + # example of stacked + testout <- get_pseudo_ipd_binary( + binary_agd = data.frame( + ARM = rep("B", 2), + RESPONSE = c("YES", "NO"), + COUNT = c(280, 120) + ), + format = "stacked" + ) + + expectout <- data.frame( + USUBJID = paste0("pseudo_binary_subj_", 1:400), + ARM = rep("B", 400), + RESPONSE = c(rep(TRUE, 280), rep(FALSE, 120)) + ) + + expect_equal(testout, expectout) +}) diff --git a/tests/testthat/test-maic_unanchored.R b/tests/testthat/test-maic_unanchored.R new file mode 100644 index 00000000..c628e492 --- /dev/null +++ b/tests/testthat/test-maic_unanchored.R @@ -0,0 +1,171 @@ +test_that("test binary case", { + # load in prognostic IPD data and AgD + load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE)) + load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) + ipd_centered <- center_ipd(ipd = ipd, agd = agd) + + # estimate weights + centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN") + centered_colnames <- paste0(centered_colnames, "_CENTERED") + + weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames) + weighted_data2 <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 20) + + # get dummy binary IPD + adrs <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE)) + adrs$RESPONSE <- adrs$AVAL + + pseudo_adrs <- get_pseudo_ipd_binary( + binary_agd = data.frame( + ARM = rep("B", 2), + RESPONSE = c("YES", "NO"), + COUNT = c(280, 120) + ), + format = "stacked" + ) + + # unanchored binary MAIC, with CI based on sandwich estimator + testout <- + maic_unanchored( + weights_object = weighted_data, + ipd = adrs, + pseudo_ipd = pseudo_adrs, + trt_ipd = "A", + trt_agd = "B", + trt_var_ipd = "ARM", + trt_var_agd = "ARM", + endpoint_type = "binary", + endpoint_name = "Binary Endpoint", + eff_measure = "RR", + # binary specific args + binary_robust_cov_type = "HC3" + ) + + # unanchored binary MAIC, with bootstrapped CI + testout2 <- + maic_unanchored( + weights_object = weighted_data2, + ipd = adrs, + pseudo_ipd = pseudo_adrs, + trt_ipd = "A", + trt_agd = "B", + trt_var_ipd = "ARM", + trt_var_agd = "ARM", + endpoint_type = "binary", + endpoint_name = "Binary Endpoint", + eff_measure = "RR", + # binary specific args + binary_robust_cov_type = "HC3" + ) + + if (FALSE) { + # Manual snapshot of results + expectout <- testout + expectout2 <- testout2 + save(list = c("expectout", "expectout2"), file = test_path("data", "test_binary_unanchored_expected.RData")) + } + + load(test_path("data", "test_binary_unanchored_expected.RData")) + expect_equal(testout$descriptive, expectout$descriptive) + expect_equal(testout$inferential, expectout$inferential) + expect_equal(testout2$descriptive, expectout2$descriptive) + expect_equal(testout2$inferential, expectout2$inferential) +}) + + +test_that("test time to event case", { + # anchored example using maic_anchored for tte + # library(flexsurv) + + # Read in relevant ADaM data and rename variables of interest + adsl <- read.csv(system.file("extdata", "adsl.csv", + package = "maicplus", + mustWork = TRUE + )) + adtte <- read.csv(system.file("extdata", "adtte.csv", + package = "maicplus", + mustWork = TRUE + )) + adtte$TIME <- adtte$AVAL + adtte$EVENT <- adtte$EVNT + adtte <- adtte[adtte$ARM == "A", , drop = FALSE] + adsl <- adsl[adsl$USUBJID %in% adtte$USUBJID, , drop = FALSE] + + ### AgD + # Baseline aggregate data for the comparator population + target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv", + package = "maicplus", mustWork = TRUE + )) + # for time-to-event endpoints, pseudo IPD from digitalized KM + pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv", + package = "maicplus", + mustWork = TRUE + )) + pseudo_ipd$ARM <- "B" + + #### prepare data + target_pop <- process_agd(target_pop) + adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) + use_adsl <- center_ipd(ipd = adsl, agd = target_pop) + + #### derive weights + match_res <- estimate_weights( + data = use_adsl, + centered_colnames = grep("_CENTERED$", names(use_adsl)), + start_val = 0, + method = "BFGS" + ) + + match_res_boot <- estimate_weights( + data = use_adsl, + centered_colnames = grep("_CENTERED$", names(use_adsl)), + start_val = 0, + method = "BFGS", + n_boot_iteration = 500, + set_seed_boot = 1234 + ) + + # inferential result + testout <- maic_unanchored( + weights_object = match_res, + ipd = adtte, + pseudo_ipd = pseudo_ipd, + trt_var_ipd = "ARM", + trt_var_agd = "ARM", + trt_ipd = "A", + trt_agd = "B", + endpoint_name = "Overall Survival", + endpoint_type = "tte", + eff_measure = "HR", + time_scale = "month", + km_conf_type = "log-log" + ) + + testout2 <- maic_unanchored( + weights_object = match_res_boot, + ipd = adtte, + pseudo_ipd = pseudo_ipd, + trt_var_ipd = "ARM", + trt_var_agd = "ARM", + trt_ipd = "A", + trt_agd = "B", + endpoint_name = "Overall Survival", + endpoint_type = "tte", + eff_measure = "HR", + time_scale = "month", + km_conf_type = "log-log" + ) + + if (FALSE) { + # Manual snapshot of results + expectout <- testout + expectout2 <- testout2 + save(list = c("expectout", "expectout2"), file = test_path("data", "test_tte_unanchored_expected.RData")) + } + + load(test_path("data", "test_tte_unanchored_expected.RData")) + expect_equal(testout$descriptive, expectout$descriptive) + expect_equal(testout$inferential, expectout$inferential) + expect_equal(testout2$descriptive, expectout2$descriptive) + expect_equal(testout2$inferential, expectout2$inferential) +}) diff --git a/vignettes/using_maicplus.Rmd b/vignettes/using_maicplus.Rmd index bfda6671..9c619627 100644 --- a/vignettes/using_maicplus.Rmd +++ b/vignettes/using_maicplus.Rmd @@ -25,7 +25,7 @@ The methods described in this document are based on those originally described b described in the National Institute for Health and Care Excellence (NICE) Decision Support Unit (DSU) Technical Support Document (TSD) 18. [signorovitch2010; phillippo2016a] -MAIC methods are often required when: +MAIC methods are often required when: * There is no common comparator treatment to link a clinical trial of a new intervention to clinical trials of other treatments in a given disease area. For example if the only study of a new intervention is a single arm trial with no @@ -43,7 +43,7 @@ patients. Treatment effect modifiers are those variables that influence the rela to another. For example patients with a better performance status may experience a larger treatment benefit than those with a worse performance status. Under this assumption, every prognostic variable and every treatment effect modifier that is imbalanced between the two studies must be available. This assumption is generally considered very difficult to -meet. [phillippo2016a] There are several ways of identifying prognostic variables/treatment effect modifiers to be +meet. [phillippo2016a] There are several ways of identifying prognostic variables/treatment effect modifiers to be used in the MAIC analyses, some of which include: * Clinical expertise (when available to a project) @@ -55,7 +55,7 @@ treatment effect # Theory behind MAIC -We will briefly go over the theory behind MAIC. For detailed information, see Signorovitch et al. 2010. +We will briefly go over the theory behind MAIC. For detailed information, see Signorovitch et al. 2010. Let us define $t_i$ to be the treatment patient $i$ received. We assume $t_i=0$ if the patient received intervention (IPD) and $t_i=1$ if the patient received comparator treatment. The causal effect of treatment $T=0$ vs $T=1$ on the @@ -92,7 +92,7 @@ consistent estimate of the causal effect of intervention vs comparator treatment \] We could transform transform IPD by subtracting the aggregate data means (this is why centering is needed when -preprocessing). +preprocessing). \[ 0=\sum_{i=1}^{n}x_{i}exp(x_{i}^{T}\hat{\beta}) @@ -111,7 +111,7 @@ Q''(\beta)=\sum_{i=1}^{n}x_ix_i^Texp(x_{i}^{T}\hat{\beta}) \] Since $Q''(\beta)$ is positive-definite for all $\beta$, $Q(\beta)$ is convex and any finite solution from -the equation is unique and corresponds to the global minimum of $Q(\beta)$. Thus, we can use optimization +the equation is unique and corresponds to the global minimum of $Q(\beta)$. Thus, we can use optimization methods to calculate $\beta$. # Example scenario @@ -134,7 +134,7 @@ Additional suggested packages for this vignette: ```{r} library(dplyr) # this is used for data merging/cleaning. Package itself does not depend on dplyr -library(clubSandwich) # For robust standard error in logistic regression +# library(clubSandwich) # For robust standard error in logistic regression library(sandwich) library(survival) @@ -233,9 +233,9 @@ head(ipd_centered) ### How to handle standard deviation aggregate summary As described by Phillippo et al. 2016, balancing on both mean and standard deviation for continuous variables (where -possible) may be considered in some cases. If a standard deviation is provided in the comparator population, -preprocessing is done so that in the target population, $E(X^2)$ is calculated using the variance formula -$Var(X)=E(X^{2})-E(X)^{2}$. This $E(X^2)$ in the target population is matched with the IPD level data, which is why +possible) may be considered in some cases. If a standard deviation is provided in the comparator population, +preprocessing is done so that in the target population, $E(X^2)$ is calculated using the variance formula +$Var(X)=E(X^{2})-E(X)^{2}$. This $E(X^2)$ in the target population is matched with the IPD level data, which is why $X^{2}$ was calculated during the preprocessing stage of IPD. ### How to handle median aggregate summary @@ -265,7 +265,7 @@ plot(weighted_data) plot(weighted_data, ggplot = TRUE) ``` -Another check after the weights are calculated is to look at how the weighted covariates match with the aggregate data +Another check after the weights are calculated is to look at how the weighted covariates match with the aggregate data summary. ```{r} @@ -276,7 +276,7 @@ outdata # Time to event analysis -We first need to combine internal IPD data with pseudo comparator IPD. To obtain pseudo comparator IPD, we would +We first need to combine internal IPD data with pseudo comparator IPD. To obtain pseudo comparator IPD, we would digitize Kaplan Meier curves from the comparator study. ```{r} @@ -335,8 +335,8 @@ There is also a `"ggplot"` option for Kaplan-Meier curves using `survminer` R pa We can then fit a cox regression model using the combined dataset. For the weight adjusted cox regression, we fit the model with robust standard errors. -Along with the hazard ratios, we can also find median survival time using `medSurv_makeup` function. Then, -`report_table` function nicely combines the information together and create a result table. +Along with the hazard ratios, we can also find median survival time using `medSurv_makeup` function. Then, +`report_table_tte` function nicely combines the information together and create a result table. ```{r} # Fit a Cox model with/without weights to estimate the HR @@ -352,8 +352,8 @@ medSurv_out <- rbind(medSurv, medSurv_adj) medSurv_out rbind( - report_table(unweighted_cox, medSurv, tag = paste0("Before/", "Overall survival")), - report_table(weighted_cox, medSurv_adj, tag = paste0("After/", "Overall survival")) + report_table_tte(unweighted_cox, medSurv, tag = paste0("Before/", "Overall survival")), + report_table_tte(weighted_cox, medSurv_adj, tag = paste0("After/", "Overall survival")) ) ``` @@ -482,13 +482,14 @@ names(OR_CI_wtd) <- c("OR", "OR_low_CI", "OR_upp_CI") OR_CI_wtd # Robust standard error -vmod <- clubSandwich::vcovCR(weighted_OR, cluster = seq_len(dim(combined_data_binary)[1]), type = "CR2") -coef_res <- clubSandwich::conf_int(weighted_OR, vmod, coef = 2) - -OR_CI_robust <- exp(with(coef_res, c(beta, CI_L, CI_U))) -names(OR_CI_robust) <- c("Estimate", "Lower 95% CI", "Upper 95% CI") -OR_CI_robust - +if (requireNamespace("clubSandwich")) { + vmod <- clubSandwich::vcovCR(weighted_OR, cluster = seq_len(dim(combined_data_binary)[1]), type = "CR2") + coef_res <- clubSandwich::conf_int(weighted_OR, vmod, coef = 2) + + OR_CI_robust <- exp(with(coef_res, c(beta, CI_L, CI_U))) + names(OR_CI_robust) <- c("Estimate", "Lower 95% CI", "Upper 95% CI") + OR_CI_robust +} # Using sandwich package V.sw <- sandwich::vcovHC(weighted_OR) # white's estimator SD <- sqrt(V.sw[2, 2])