update

hta-pharma · Aug 18, 2023 · 776ebd4 · 776ebd4
1 parent 410fdba
commit 776ebd4
Show file tree

Hide file tree

Showing 15 changed files with 173 additions and 146 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,9 +5,9 @@ export(bucher)
 export(center_ipd)
 export(check_weights)
 export(dummize_ipd)
-export(ess_footnote_text)
 export(estimate_weights)
 export(generate_survival_data)
+export(km_plot)
 export(log_cum_haz_plot)
 export(maic_tte_unanchor)
 export(medSurv_makeup)

diff --git a/R/matching.R b/R/matching.R
@@ -1,10 +1,12 @@
 # Functions for matching step: estimation of individual weights
 
+# functions to be exported ---------------------------------------
+
 #' Derive individual weights in the matching step of MAIC
 #'
-#' This function takes individual patient data (IPD) with centered covariates
-#' (effect modifiers and/or prognostic variables) as input and generates
-#' weights for each individual in IPD trial to match the covariates in aggregate data.
+#' Assuming data is properly processed, this function takes individual patient data (IPD) with centered covariates
+#' (effect modifiers and/or prognostic variables) as input, and generates weights for each individual in IPD trial
+#' to match the covariates in aggregate data.
 #'
 #' @param data a numeric matrix, centered covariates of IPD, no missing value in any cell is allowed
 #' @param centered_colnames a character or numeric vector (column indicators) of centered covariates
@@ -24,6 +26,14 @@
 #' \item{ess}{effective sample size, square of sum divided by sum of squares}
 #' \item{opt}{R object returned by \code{base::optim()}, for assess convergence and other details}
 #' }
+#'
+#' @examples
+#' load(system.file("extdata", "ipd_centered.rda", package = "maicplus", mustWork = TRUE))
+#'
+#' centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
+#' centered_colnames <- paste0(centered_colnames, "_CENTERED")
+#' match_res <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
+#'
 #' @export
 
 estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, method = "BFGS", ...) {
@@ -112,8 +122,15 @@ estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, meth
 #' @param vline_col a string, color for the vertical line in the histogram
 #' @param main_title a character string, main title of the plot
 #'
-#' @return a plot of unscaled or scaled weights
 #' @examples
+#' load(system.file("extdata", "match_res.rda", package = "maicplus", mustWork = TRUE))
+#' wt <- match_res$data$weights
+#' wt_scaled <- match_res$data$scaled_weights
+#' par(mfrow = c(1, 2))
+#' plot_weights(wt, bin_col = "orange", vline_col = "darkblue")
+#' plot_weights(wt_scaled, main_title = "Scaled Individual Weights")
+#'
+#' @return a plot of unscaled or scaled weights
 #' @export
 
 plot_weights <- function(wt, bin_col = "#6ECEB2", vline_col = "#688CE8", main_title = "Unscaled Individual Weights") {
@@ -152,66 +169,25 @@ plot_weights <- function(wt, bin_col = "#6ECEB2", vline_col = "#688CE8", main_ti
 #' This function checks to see if the optimization is done properly by checking the covariate averages
 #' before and after adjustment.
 #'
-#' @param weighted_data object returned after calculating weights using \code{\link{estimate_weights}}
+#' @param match_res object returned after calculating weights using \code{\link{estimate_weights}}
 #' @param processed_agd a data frame, object returned after using \code{\link{process_agd}} or
 #' aggregated data following the same naming convention
+#' @param mean_digits number of digits for rounding mean columns in the output
+#' @param prop_digits number of digits for rounding proportion columns in the output
+#' @param sd_digits number of digits for rounding mean columns in the output
+#'
+#' @examples
+#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
+#' load(system.file("extdata", "match_res.rda", package = "maicplus", mustWork = TRUE))
+#' outdata <- check_weights(match_res, processed_agd = agd)
+#' print(outdata)
 #'
 #' @import DescTools
 #'
 #' @return data.frame of weighted and unweighted covariate averages of the IPD, and average of aggregate data
 #' @export
-#'
-#' @examples
-#' adsl <- read.csv(system.file("extdata", "adsl.csv",
-#' package = "maicplus",
-#' mustWork = TRUE
-#' ))
-#' adrs <- read.csv(system.file("extdata", "adrs.csv",
-#' package = "maicplus",
-#' mustWork = TRUE
-#' ))
-#' adtte <- read.csv(system.file("extdata", "adtte.csv",
-#' package = "maicplus",
-#' mustWork = TRUE
-#' ))
-#'
-#' ### AgD
-#' # Baseline aggregate data for the comparator population
-#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
-#' package = "maicplus", mustWork = TRUE
-#' ))
-#' # target_pop2 <- read.csv(system.file("extdata", "aggregate_data_example_2.csv",
-#' # package = "maicplus", mustWork = TRUE))
-#' # target_pop3 <- read.csv(system.file("extdata", "aggregate_data_example_3.csv",
-#' # package = "maicplus", mustWork = TRUE))
-#'
-#' # for time-to-event endpoints, pseudo IPD from digitalized KM
-#' pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
-#' package = "maicplus",
-#' mustWork = TRUE
-#' ))
-#'
-#' #### prepare data ----------------------------------------------------------
-#' target_pop <- process_agd(target_pop)
-#' # target_pop2 <- process_agd(target_pop2) # demo of process_agd in different scenarios
-#' # target_pop3 <- process_agd(target_pop3) # demo of process_agd in different scenarios
-#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
-#' use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
-#'
-#' match_res <- estimate_weights(
-#' data = use_adsl,
-#' centered_colnames = grep("_CENTERED$", names(use_adsl)),
-#' start_val = 0,
-#' method = "BFGS"
-#' )
-#'
-#' check <- check_weights(
-#' weighted_data = match_res,
-#' processed_agd = target_pop
-#' )
-#'
-#' print(check)
-#'
+
+
 check_weights <- function(weighted_data, processed_agd) {
  ipd_with_weights <- weighted_data$data
  match_cov <- weighted_data$centered_colnames
@@ -292,9 +268,9 @@ check_weights <- function(weighted_data, processed_agd) {
 #' @param sd_digits number of digits for rounding mean columns in the output
 #' @param digits minimal number of significant digits, see [print.default].
 #' @param ... further arguments to [print.data.frame]
-#'
 #' @describeIn check_weights Print method for check_weights objects
 #' @export
+
 print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd_digits = 3, digits = getOption("digits"), ...) {
  round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
  round_digits[is.na(round_digits)] <- digits
@@ -313,15 +289,12 @@ print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd
  }
 }
 
-#' Prints Note on Expected Sample Size Reduction
+#' Note on Expected Sample Size Reduction
 #'
 #' @param width Number of characters to break string into new lines (`\n`).
 #'
 #' @return A character string
-#' @export
-#'
-#' @examples
-#' ess_footnote_text(width = 80)
+
 ess_footnote_text <- function(width = 0.9 * getOption("width")) {
  text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals
 (https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common

diff --git a/R/process_data.R b/R/process_data.R
@@ -113,6 +113,10 @@ process_agd <- function(raw_agd) {
 #' @param dummize_cols vector of column names to binarize
 #' @param dummize_ref_level vector of reference level of the variables to binarize
 #'
+#' @examples
+#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
+#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+#'
 #' @return ipd with dummized columns
 #' @export
 
@@ -144,6 +148,13 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
 #' In this case, SEX_MALE should also be available in the aggregate data.
 #' @param agd pre-processed aggregate data which contain STUDY, ARM, and N. Variable names should be followed
 #' by legal suffixes (i.e. MEAN, MEDIAN, SD, or PROP). Note that COUNT suffix is no longer accepted.
+#' @examples
+#' ipd <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
+#' ipd <- dummize_ipd(ipd, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv", package = "maicplus", mustWork = TRUE))
+#' agd <- process_agd(target_pop)
+#'
+#' ipd_centered <- center_ipd(ipd = ipd, agd = agd)
 #'
 #' @return centered ipd using aggregate level data averages
 #' @export

diff --git a/R/survival-helper.R b/R/survival-helper.R
@@ -1,13 +1,19 @@
 #' helper function: makeup to get median survival time from a `survival::survfit` object
 #'
-#' Extract and display median survival time with confidence interval
+#' extract and display median survival time with confidence interval
 #'
 #' @param km_fit returned object from \code{survival::survfit}
 #' @param legend a character string, name used in 'type' column in returned data frame
 #' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
 #'
+#' @examples
+#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
+#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
+#' medSurv <- medSurv_makeup(kmobj, legend = "before matching", time_scale = "day")
+#'
 #' @return a data frame with a index column 'type', median survival time and confidence interval
 #' @export
+
 medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
  time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)
 
@@ -29,11 +35,16 @@ medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
 }
 
 
-
-#' Helper function to select a set of variables used for Kaplan-Meier plot
+#' Helper function to select set of variables used for Kaplan-Meier plot
 #'
 #' @param km_fit returned object from \code{survival::survfit}
-#' @return a list of data frames of variables from \code{survival::survfit}. Data frames are divided by treatment.
+#'
+#' @examples
+#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
+#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
+#' survobj <- survfit_makeup(kmobj)
+#'
+#' @return a list of data frames of variables from survfit. Data frame is divided by treatment.
 #' @export
 
 survfit_makeup <- function(km_fit) {
@@ -57,13 +68,21 @@ survfit_makeup <- function(km_fit) {
 #'
 #' @param km_fit_before returned object from \code{survival::survfit} before adjustment
 #' @param km_fit_after returned object from \code{survival::survfit} after adjustment
-#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
-#' @param trt  a character string, name of the interested treatment in internal trial (real IPD)
-#' @param trt_ext character string, name of the interested comparator in external trial used to
-#' subset \code{dat_ext} (pseudo IPD)
-#' @param endpoint_name a character string, name of the endpoint
+#' @param time_scale time unit of median survival time, taking a value of 'year', 'month', 'week' or 'day'
+#' @param trt internal trial treatment
+#' @param trt_ext external trial treatment
+#' @param endpoint_name name of the endpoint
+#' @param line_col color of the line curves with the order of external, internal unadjusted, and internal adjusted
 #'
-#' @return a KM plot
+#' @return a Kaplan-Meier plot
+#' @examples
+#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
+#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
+#' kmobj_adj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, weights = combined_data_tte$weight, conf.type = "log-log")
+#' par(cex.main = 0.85)
+#' km_plot(kmobj, kmobj_adj, time_scale = "month", trt = "A", trt_ext = "B", endpoint_name = "OS")
+#' @export
+
 km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext, endpoint_name = "") {
  time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)
 
@@ -152,15 +171,20 @@ km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext
 #'
 #' a diagnosis plot for proportional hazard assumption, versus log-time (default) or time
 #'
-#' @param clldat object returned from \code{\link{survfit_makeup}}
+#' @param km_fit returned object from \code{survival::survfit}
 #' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
 #' @param log_time logical, TRUE (default) or FALSE
 #' @param endpoint_name a character string, name of the endpoint
 #' @param subtitle a character string, subtitle of the plot
 #' @param exclude_censor logical, should censored data point be plotted
+#' @examples
+#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
+#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
+#' log_cum_haz_plot(kmobj, time_scale = "month", log_time = TRUE, endpoint_name = "OS", subtitle = "(Before Matching)")
 #'
 #' @return a plot
 #' @export
+
 log_cum_haz_plot <- function(clldat,
  time_scale,
  log_time = TRUE,
@@ -222,6 +246,10 @@ log_cum_haz_plot <- function(clldat,
 #' @param log_time logical, TRUE (default) or FALSE
 #' @param endpoint_name a character string, name of the endpoint
 #' @param subtitle a character string, subtitle of the plot
+#' @examples
+#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
+#' unweighted_cox <- coxph(Surv(TIME, EVENT == 1) ~ ARM, data = combined_data_tte)
+#' resid_plot(unweighted_cox, time_scale = "month", log_time = TRUE, endpoint_name = "OS", subtitle = "(Before Matching)")
 #'
 #' @return a plot
 #' @export

diff --git a/man/center_ipd.Rd b/man/center_ipd.Rd
diff --git a/man/check_weights.Rd b/man/check_weights.Rd
diff --git a/man/dummize_ipd.Rd b/man/dummize_ipd.Rd