diff --git a/R/bootstrapping.R b/R/bootstrapping.R new file mode 100644 index 00000000..84d041f4 --- /dev/null +++ b/R/bootstrapping.R @@ -0,0 +1,56 @@ +#' Bootstrapping for MAIC weighted hazard ratios +#' +#' @param intervention_data A data frame containing individual patient data from the internal IPD study including columns: +#' treatment, time, status, centered baseline characteristics. +#' @param centered_colnames A character vector giving the names of the covariates to use +#' in matching. These names must match the column names in intervention_data. +#' @param i Index used to select a sample within \code{\link{boot}}. +#' @param model A model formula in the form 'Surv(Time, Event==1) ~ ARM'. +#' Variable names need to match the corresponding columns in intervention_data. +#' @param comparator_data A data frame containing pseudo individual patient data +#' from the comparator study needed to derive the relative treatment effect +#' including columns: time, event, weight=1, scaled_weight=1. +#' The outcome variables names must match intervention_data. +#' @param min_weight A numeric value that defines the minimum weight allowed. +#' This value (default 0.0001) will replace weights estimated at 0 in a sample. +#' @param trt_ext A character giving the name of the comparator treatment to be used as reference +#' +#' @details This function is intended to be used in conjunction with the +#' \code{\link{boot}} function to return the statistic to be +#' bootstrapped. In this case by performing MAIC weighting using +#' {\link{estimate_weights}} and returning a weighted hazard ratio (HR) from a +#' Cox proportional hazards model. This is used as the 'statistic' argument in +#' the boot function. +#' +#' @return The HR as a numeric value. +#' @seealso \code{\link{estimate_weights}}, \code{\link{boot}} +#' @example inst/examples/MAIC_example_analysis.R +#' @export + + + +bootstrap_HR <- function(intervention_data, centered_colnames, i, model, comparator_data, min_weight = 0.0001, trt_ext) { + # create a visible binding for R CMD check + wt <- NULL + + # Samples the data + bootstrap_data <- intervention_data[i, ] + + # Estimates weights + perform_wt <- estimate_weights(data = bootstrap_data, centered_colnames = centered_colnames) + + + # Give comparator data weights of 1 + comparator_data_wts <- comparator_data %>% dplyr::mutate(weights = 1, scaled_weights = 1) + + # Add the comparator data + combined_data <- dplyr::bind_rows(perform_wt$data, comparator_data_wts) + combined_data$treatment <- stats::relevel(as.factor(combined_data$treatment), ref = trt_ext) + + # set weights that are below min_weight to min_weight to avoid issues with 0 values + combined_data$wt <- ifelse(combined_data$weights < min_weight, min_weight, combined_data$weights) + + # survival data stat + cox_model <- survival::coxph(model, data = combined_data, weights = wt) + HR <- exp(cox_model$coefficients) +} diff --git a/R/maic_unanchored_tte.R b/R/maic_unanchored_tte.R index 86583ff1..d9c7df08 100644 --- a/R/maic_unanchored_tte.R +++ b/R/maic_unanchored_tte.R @@ -2,6 +2,8 @@ #' #' @param useWt a numeric vector of individual MAIC weights, length should the same as \code{nrow(dat)} #' @param dat a data frame that meet format requirements in 'Details', individual patient data (IPD) of internal trial +#' @param dat_centered a data frame containing individual patient data from the internal IPD study including columns: +#' treatment, time, status, centered baseline characteristics. #' @param dat_ext a data frame, pseudo IPD from digitized KM curve of external trial #' @param trt a character string, name of the interested treatment in internal trial (real IPD) #' @param trt_ext character string, name of the interested comparator in external trial used to subset \code{dat_ext} (pseudo IPD) @@ -22,7 +24,7 @@ #' @export #' #' @examples -maic_tte_unanchor <- function(useWt, dat, dat_ext, trt, trt_ext, +maic_tte_unanchor <- function(useWt, dat, dat_centered, dat_ext, trt, trt_ext, time_scale = "month", endpoint_name = "OS", transform = "log") { timeUnit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1) @@ -32,6 +34,17 @@ maic_tte_unanchor <- function(useWt, dat, dat_ext, trt, trt_ext, res <- list() + # Bootstrap the confidence interval of the weighted HR + + HR_bootstraps <- boot( + data = dat_centered, # intervention data + statistic = bootstrap_HR, # bootstrap the HR (defined in the maicplus package) + R = 1000, # number of bootstrap samples + comparator_data = dat_ext, # comparator pseudo data + centered_colnames = grep("_CENTERED$", names(dat_centered)), # matching variables + model = Surv(time, status == 1) ~ treatment # model to fit + ) + # set up IPD 'dat' with maic weights dat$weight <- useWt dat <- dat[, c("treatment", "time", "status", "weight")] @@ -129,6 +142,30 @@ maic_tte_unanchor <- function(useWt, dat, dat_ext, trt, trt_ext, ) res[["plot_resid_after"]] <- grDevices::recordPlot() + + + # ==> Report 4: Bootstrapping + + ## Bootstrapping diagnostics + # Summarize bootstrap estimates in a histogram + # Vertical lines indicate the median and upper and lower CIs + hist(HR_bootstraps$t, main = "", xlab = "Boostrapped HR") + abline(v = quantile(HR_bootstraps$t, probs = c(0.025, 0.5, 0.975)), lty = 2) + + # Median of the bootstrap samples + HR_median <- median(HR_bootstraps$t) + + # Bootstrap CI - Percentile CI + boot_ci_HR <- boot.ci(boot.out = HR_bootstraps, index = 1, type = "perc") + + # Bootstrap CI - BCa CI + boot_ci_HR_BCA <- boot.ci(boot.out = HR_bootstraps, index = 1, type = "bca") + + res[["boot_hist"]] <- grDevices::recordPlot() + res[["HR_median"]] <- HR_median + res[["boot_ci_HR"]] <- boot_ci_HR + res[["boot_ci_HR_BCA"]] <- boot_ci_HR_BCA + # output res } diff --git a/man/bootstrap_HR.Rd b/man/bootstrap_HR.Rd new file mode 100644 index 00000000..f4102e7a --- /dev/null +++ b/man/bootstrap_HR.Rd @@ -0,0 +1,332 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bootstrapping.R +\name{bootstrap_HR} +\alias{bootstrap_HR} +\title{Bootstrapping for MAIC weighted hazard ratios} +\usage{ +bootstrap_HR( + intervention_data, + centered_colnames, + i, + model, + comparator_data, + min_weight = 1e-04, + trt_ext +) +} +\arguments{ +\item{intervention_data}{A data frame containing individual patient data from the internal IPD study including columns: +treatment, time, status, centered baseline characteristics.} + +\item{centered_colnames}{A character vector giving the names of the covariates to use +in matching. These names must match the column names in intervention_data.} + +\item{i}{Index used to select a sample within \code{\link{boot}}.} + +\item{model}{A model formula in the form 'Surv(Time, Event==1) ~ ARM'. +Variable names need to match the corresponding columns in intervention_data.} + +\item{comparator_data}{A data frame containing pseudo individual patient data +from the comparator study needed to derive the relative treatment effect +including columns: time, event, weight=1, scaled_weight=1. +The outcome variables names must match intervention_data.} + +\item{min_weight}{A numeric value that defines the minimum weight allowed. +This value (default 0.0001) will replace weights estimated at 0 in a sample.} + +\item{trt_ext}{A character giving the name of the comparator treatment to be used as reference} +} +\value{ +The HR as a numeric value. +} +\description{ +Bootstrapping for MAIC weighted hazard ratios +} +\details{ +This function is intended to be used in conjunction with the +\code{\link{boot}} function to return the statistic to be +bootstrapped. In this case by performing MAIC weighting using +{\link{estimate_weights}} and returning a weighted hazard ratio (HR) from a +Cox proportional hazards model. This is used as the 'statistic' argument in +the boot function. +} +\examples{ + +# This example code combines weighted individual patient data for 'intervention' +# and pseudo individual patient data for 'comparator' and performs analyses for +# two endpoints: overall survival (a time to event outcome) and objective +# response (a binary outcome) + +library(dplyr) +library(boot) +library(survival) +library(MAIC) +library(ggplot2) +library(survminer) +library(flextable) +library(officer) + +# load intervention data with weights saved in est_weights +data(est_weights, package = "MAIC") + +# Combine data ----------------------------------------------------------------- + +# Combine the the comparator pseudo data with the analysis data, outputted from +# the estimate_weights function + +# Read in digitised pseudo survival data for the comparator +comparator_surv <- read.csv(system.file("extdata", "psuedo_IPD.csv", + package = "MAIC", mustWork = TRUE)) \%>\% + rename(Time=Time, Event=Event) + + +# Simulate response data based on the known proportion of responders +comparator_n <- nrow(comparator_surv) # total number of patients in the comparator data +comparator_prop_events <- 0.4 # proportion of responders +# Calculate number with event +# Use round() to ensure we end up with a whole number of people +# number without an event = Total N - number with event to ensure we keep the +# same number of patients +n_with_event <- round(comparator_n*comparator_prop_events, digits = 0) +comparator_binary <- data.frame("response"= c(rep(1, n_with_event), rep(0, comparator_n - n_with_event))) + + +# Join survival and response comparator data +# Note the rows do not represent observations from a particular patient +# i.e. there is no relationship between the survival time and response status +# for a given row since this is simulated data +# In a real data set this relationship would be present +comparator_input <- cbind(comparator_surv, comparator_binary) \%>\% + mutate(wt=1, wt_rs=1, ARM="Comparator") # All patients have weight = 1 +head(comparator_input) + +# Join comparator data with the intervention data (naming of variables should be +# consistent between both datasets) +# Set factor levels to ensure "Comparator" is the reference treatment +combined_data <- bind_rows(est_weights$analysis_data, comparator_input) +combined_data$ARM <- relevel(as.factor(combined_data$ARM), ref="Comparator") + + +#### Estimating the relative effect -------------------------------------------- + +### Example for survival data -------------------------------------------------- + +## Kaplan-Meier plot + +# Unweighted intervention data +KM_int <- survfit(formula = Surv(Time, Event==1) ~ 1 , + data = est_weights$analysis_data, + type="kaplan-meier") + +# Weighted intervention data +KM_int_wtd <- survfit(formula = Surv(Time, Event==1) ~ 1 , + data = est_weights$analysis_data, + weights = wt, + type="kaplan-meier") + +# Comparator data +KM_comp <- survfit(formula = Surv(Time, Event==1) ~ 1 , + data = comparator_input, + type="kaplan-meier") + +# Combine the survfit objects ready for ggsurvplot +KM_list <- list(Intervention = KM_int, + Intervention_weighted = KM_int_wtd, + Comparator = KM_comp) + +#Produce the Kaplan-Meier plot +KM_plot <- ggsurvplot(KM_list, + combine = TRUE, + risk.table=TRUE, # numbers at risk displayed on the plot + break.x.by=50, + xlab="Time (days)", + censor=FALSE, + legend.title = "Treatment", + title = "Kaplan-Meier plot of overall survival", + legend.labs=c("Intervention", + "Intervention weighted", + "Comparator"), + font.legend = list(size = 10)) + + guides(colour=guide_legend(nrow = 2)) + + +## Estimating the hazard ratio (HR) + +# Fit a Cox model without weights to estimate the unweighted HR +unweighted_cox <- coxph(Surv(Time, Event==1) ~ ARM, data = combined_data) + +HR_CI_cox <- summary(unweighted_cox)$conf.int \%>\% + as.data.frame() \%>\% + transmute("HR" = `exp(coef)`, + "HR_low_CI" = `lower .95`, + "HR_upp_CI" = `upper .95`) + +# Fit a Cox model with weights to estimate the weighted HR +weighted_cox <- coxph(Surv(Time, Event==1) ~ ARM, + data = combined_data, + weights = wt) + +HR_CI_cox_wtd <- summary(weighted_cox)$conf.int \%>\% + as.data.frame() \%>\% + transmute("HR" = `exp(coef)`, + "HR_low_CI" = `lower .95`, + "HR_upp_CI" = `upper .95`) + +## Bootstrap the confidence interval of the weighted HR + +HR_bootstraps <- boot(data = est_weights$analysis_data, # intervention data + statistic = bootstrap_HR, # bootstrap the HR (defined in the MAIC package) + R=1000, # number of bootstrap samples + comparator_data = comparator_input, # comparator pseudo data + matching = est_weights$matching_vars, # matching variables + model = Surv(Time, Event==1) ~ ARM # model to fit + ) + +## Bootstrapping diagnostics +# Summarize bootstrap estimates in a histogram +# Vertical lines indicate the median and upper and lower CIs +hist(HR_bootstraps$t, main = "", xlab = "Boostrapped HR") +abline(v= quantile(HR_bootstraps$t, probs = c(0.025, 0.5, 0.975)), lty=2) + +# Median of the bootstrap samples +HR_median <- median(HR_bootstraps$t) + +# Bootstrap CI - Percentile CI +boot_ci_HR <- boot.ci(boot.out = HR_bootstraps, index=1, type="perc") + +# Bootstrap CI - BCa CI +boot_ci_HR_BCA <- boot.ci(boot.out = HR_bootstraps, index=1, type="bca") + +## Summary + +# Produce a summary of HRs and CIs +HR_summ <- rbind(HR_CI_cox, HR_CI_cox_wtd) \%>\% # Unweighted and weighted HRs and CIs from Cox models + mutate(Method = c("HR (95\% CI) from unadjusted Cox model", + "HR (95\% CI) from weighted Cox model")) \%>\% + + # Median bootstrapped HR and 95\% percentile CI + rbind(data.frame("HR" = HR_median, + "HR_low_CI" = boot_ci_HR$percent[4], + "HR_upp_CI" = boot_ci_HR$percent[5], + "Method"="Bootstrap median HR (95\% percentile CI)")) \%>\% + + # Median bootstrapped HR and 95\% bias-corrected and accelerated bootstrap CI + rbind(data.frame("HR" = HR_median, + "HR_low_CI" = boot_ci_HR_BCA$bca[4], + "HR_upp_CI" = boot_ci_HR_BCA$bca[5], + "Method"="Bootstrap median HR (95\% BCa CI)")) \%>\% + #apply rounding for numeric columns + mutate_if(.predicate = is.numeric, sprintf, fmt = "\%.3f") \%>\% + #format for output + transmute(Method, HR_95_CI = paste0(HR, " (", HR_low_CI, " to ", HR_upp_CI, ")")) + +# Summarize the results in a table suitable for word/ powerpoint +HR_table <- HR_summ \%>\% + regulartable() \%>\% #make it a flextable object + set_header_labels(Method = "Method", HR_95_CI = "Hazard ratio (95\% CI)") \%>\% + font(font = 'Arial', part = 'all') \%>\% + fontsize(size = 14, part = 'all') \%>\% + bold(part = 'header') \%>\% + align(align = 'center', part = 'all') \%>\% + align(j = 1, align = 'left', part = 'all') \%>\% + border_outer(border = fp_border()) \%>\% + border_inner_h(border = fp_border()) \%>\% + border_inner_v(border = fp_border()) \%>\% + autofit(add_w = 0.2, add_h = 2) + + +### Example for response data -------------------------------------------------- + +## Estimating the odds ratio (OR) + +# Fit a logistic regression model without weights to estimate the unweighted OR +unweighted_OR <- glm(formula = response~ARM, + family = binomial(link="logit"), + data = combined_data) + +# Log odds ratio +log_OR_CI_logit <- cbind(coef(unweighted_OR), confint.default(unweighted_OR, level = 0.95))[2,] + +# Exponentiate to get Odds ratio +OR_CI_logit <- exp(log_OR_CI_logit) +#tidy up naming +names(OR_CI_logit) <- c("OR", "OR_low_CI", "OR_upp_CI") + +# Fit a logistic regression model with weights to estimate the weighted OR +weighted_OR <- suppressWarnings(glm(formula = response~ARM, + family = binomial(link="logit"), + data = combined_data, + weight = wt)) + +# Weighted log odds ratio +log_OR_CI_logit_wtd <- cbind(coef(weighted_OR), confint.default(weighted_OR, level = 0.95))[2,] + +# Exponentiate to get weighted odds ratio +OR_CI_logit_wtd <- exp(log_OR_CI_logit_wtd) +#tidy up naming +names(OR_CI_logit_wtd) <- c("OR", "OR_low_CI", "OR_upp_CI") + +## Bootstrap the confidence interval of the weighted OR +OR_bootstraps <- boot(data = est_weights$analysis_data, # intervention data + statistic = bootstrap_OR, # bootstrap the OR + R = 1000, # number of bootstrap samples + comparator_data = comparator_input, # comparator pseudo data + matching = est_weights$matching_vars, # matching variables + model = 'response ~ ARM' # model to fit + ) + +## Bootstrapping diagnostics +# Summarize bootstrap estimates in a histogram +# Vertical lines indicate the median and upper and lower CIs +hist(OR_bootstraps$t, main = "", xlab = "Boostrapped OR") +abline(v= quantile(OR_bootstraps$t, probs = c(0.025,0.5,0.975)), lty=2) + +# Median of the bootstrap samples +OR_median <- median(OR_bootstraps$t) + +# Bootstrap CI - Percentile CI +boot_ci_OR <- boot.ci(boot.out = OR_bootstraps, index=1, type="perc") + +# Bootstrap CI - BCa CI +boot_ci_OR_BCA <- boot.ci(boot.out = OR_bootstraps, index=1, type="bca") + +## Summary +# Produce a summary of ORs and CIs +OR_summ <- rbind(OR_CI_logit, OR_CI_logit_wtd) \%>\% # Unweighted and weighted ORs and CIs + as.data.frame() \%>\% + mutate(Method = c("OR (95\% CI) from unadjusted logistic regression model", + "OR (95\% CI) from weighted logistic regression model")) \%>\% + + # Median bootstrapped HR and 95\% percentile CI + rbind(data.frame("OR" = OR_median, + "OR_low_CI" = boot_ci_OR$percent[4], + "OR_upp_CI" = boot_ci_OR$percent[5], + "Method"="Bootstrap median HR (95\% percentile CI)")) \%>\% + + # Median bootstrapped HR and 95\% bias-corrected and accelerated bootstrap CI + rbind(data.frame("OR" = OR_median, + "OR_low_CI" = boot_ci_OR_BCA$bca[4], + "OR_upp_CI" = boot_ci_OR_BCA$bca[5], + "Method"="Bootstrap median HR (95\% BCa CI)")) \%>\% + #apply rounding for numeric columns + mutate_if(.predicate = is.numeric, sprintf, fmt = "\%.3f") \%>\% + #format for output + transmute(Method, OR_95_CI = paste0(OR, " (", OR_low_CI, " to ", OR_upp_CI, ")")) + +# turns the results to a table suitable for word/ powerpoint +OR_table <- OR_summ \%>\% + regulartable() \%>\% #make it a flextable object + set_header_labels(Method = "Method", OR_95_CI = "Odds ratio (95\% CI)") \%>\% + font(font = 'Arial', part = 'all') \%>\% + fontsize(size = 14, part = 'all') \%>\% + bold(part = 'header') \%>\% + align(align = 'center', part = 'all') \%>\% + align(j = 1, align = 'left', part = 'all') \%>\% + border_outer(border = fp_border()) \%>\% + border_inner_h(border = fp_border()) \%>\% + border_inner_v(border = fp_border()) \%>\% + autofit(add_w = 0.2) +} +\seealso{ +\code{\link{estimate_weights}}, \code{\link{boot}} +} diff --git a/man/maic_tte_unanchor.Rd b/man/maic_tte_unanchor.Rd index 1cdd650a..486eaf08 100644 --- a/man/maic_tte_unanchor.Rd +++ b/man/maic_tte_unanchor.Rd @@ -7,6 +7,7 @@ maic_tte_unanchor( useWt, dat, + dat_centered, dat_ext, trt, trt_ext, @@ -20,6 +21,9 @@ maic_tte_unanchor( \item{dat}{a data frame that meet format requirements in 'Details', individual patient data (IPD) of internal trial} +\item{dat_centered}{a data frame containing individual patient data from the internal IPD study including columns: +treatment, time, status, centered baseline characteristics.} + \item{dat_ext}{a data frame, pseudo IPD from digitized KM curve of external trial} \item{trt}{a character string, name of the interested treatment in internal trial (real IPD)}