hta-pharma · gravesti · Sep 1, 2023 · Aug 18, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -21,4 +21,5 @@ images
 ^maicplus.*\.tar\.gz$
 ^maicplus.*\.tgz$
 ^inst/dev/MAIC/MAIC-main/.Rbuildignore
+^data-raw$
 ^design$
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,13 +3,12 @@
 S3method(plot,maicplus_estimate_weights)
 S3method(print,maicplus_check_weights)
 export(bucher)
-export(calculate_weights_legend)
 export(center_ipd)
 export(check_weights)
 export(dummize_ipd)
-export(ess_footnote_text)
 export(estimate_weights)
 export(generate_survival_data)
+export(km_plot)
 export(log_cum_haz_plot)
 export(maic_tte_unanchor)
 export(medSurv_makeup)

diff --git a/R/matching.R b/R/matching.R
@@ -1,10 +1,12 @@
 # Functions for matching step: estimation of individual weights
 
+# functions to be exported ---------------------------------------
+
 #' Derive individual weights in the matching step of MAIC
 #'
-#' This function takes individual patient data (IPD) with centered covariates
-#' (effect modifiers and/or prognostic variables) as input and generates
-#' weights for each individual in IPD trial to match the covariates in aggregate data.
+#' Assuming data is properly processed, this function takes individual patient data (IPD) with centered covariates
+#' (effect modifiers and/or prognostic variables) as input, and generates weights for each individual in IPD trial
+#' to match the covariates in aggregate data.
 #'
 #' @param data a numeric matrix, centered covariates of IPD, no missing value in any cell is allowed
 #' @param centered_colnames a character or numeric vector (column indicators) of centered covariates
@@ -24,6 +26,8 @@
 #'   \item{ess}{effective sample size, square of sum divided by sum of squares}
 #'   \item{opt}{R object returned by \code{base::optim()}, for assess convergence and other details}
 #' }
+#'
+#' @examples
 #' @export
 
 estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, method = "BFGS", ...) {
@@ -51,16 +55,16 @@

  # prepare data for optimization
  if (is.null(centered_colnames)) centered_colnames <- seq_len(ncol(data))
  EM <- data[, centered_colnames, drop = FALSE]
  ind <- apply(EM, 1, function(xx) any(is.na(xx)))
  nr_missing <- sum(ind)
  EM <- as.matrix(EM[!ind, , drop = FALSE])

  # objective and gradient functions
  objfn <- function(alpha, X) {
    sum(exp(X %*% alpha))
  }
  gradfn <- function(alpha, X) {
    colSums(sweep(X, 1, exp(X %*% alpha), "*"))
  }

@@ -99,19 +103,23 @@
 }
 
 
-#' Plot MAIC weights in a histogram with key statistics in legend
+#' Calculate Statistics for Weight Plot Legend
 #'
 #' Calculates ESS reduction and median weights which is used to create legend for weights plot
 #'
 #' @param weighted_data object returned after calculating weights using [estimate_weights]
 #'
 #' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count
 #' @examples
+#' \dontrun{
 #' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
 #' calculate_weights_legend(weighted_data)
-#' @export
-
+#' }
+#' @keywords internal
 calculate_weights_legend <- function(weighted_data) {
+  if (!inherits(weighted_data, "maicplus_estimate_weights")) {
+    stop("weighted_data must be class `maicplus_estimate_weights` generated by estimate_weights()")
+  }
   ess <- weighted_data$ess
   wt <- weighted_data$data$weights
   wt_scaled <- weighted_data$data$scaled_weights
@@ -231,7 +239,10 @@
     ) +
     ggplot2::theme_bw() +
     ggplot2::facet_wrap(~ind, nrow = 1) +
-    ggplot2::geom_text(data = legend_data, ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3) +
+    ggplot2::geom_text(
+      data = legend_data,
+      ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3
+    ) +
     ggplot2::theme(
       axis.title = ggplot2::element_text(size = 12),
       axis.text = ggplot2::element_text(size = 12)
@@ -294,62 +305,13 @@
 #' @param processed_agd a data frame, object returned after using \code{\link{process_agd}} or
 #' aggregated data following the same naming convention
 #'
+#' @examples
 #' @import DescTools
 #'
 #' @return data.frame of weighted and unweighted covariate averages of the IPD, and average of aggregate data
 #' @export
-#'
-#' @examples
-#' adsl <- read.csv(system.file("extdata", "adsl.csv",
-#'   package = "maicplus",
-#'   mustWork = TRUE
-#' ))
-#' adrs <- read.csv(system.file("extdata", "adrs.csv",
-#'   package = "maicplus",
-#'   mustWork = TRUE
-#' ))
-#' adtte <- read.csv(system.file("extdata", "adtte.csv",
-#'   package = "maicplus",
-#'   mustWork = TRUE
-#' ))
-#'
-#' ### AgD
-#' # Baseline aggregate data for the comparator population
-#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
-#'   package = "maicplus", mustWork = TRUE
-#' ))
-#' # target_pop2 <- read.csv(system.file("extdata", "aggregate_data_example_2.csv",
-#' #                                     package = "maicplus", mustWork = TRUE))
-#' # target_pop3 <- read.csv(system.file("extdata", "aggregate_data_example_3.csv",
-#' #                                     package = "maicplus", mustWork = TRUE))
-#'
-#' # for time-to-event endpoints, pseudo IPD from digitalized KM
-#' pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
-#'   package = "maicplus",
-#'   mustWork = TRUE
-#' ))
-#'
-#' #### prepare data ----------------------------------------------------------
-#' target_pop <- process_agd(target_pop)
-#' # target_pop2 <- process_agd(target_pop2) # demo of process_agd in different scenarios
-#' # target_pop3 <- process_agd(target_pop3) # demo of process_agd in different scenarios
-#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
-#' use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
-#'
-#' match_res <- estimate_weights(
-#'   data = use_adsl,
-#'   centered_colnames = grep("_CENTERED$", names(use_adsl)),
-#'   start_val = 0,
-#'   method = "BFGS"
-#' )
-#'
-#' check <- check_weights(
-#'   weighted_data = match_res,
-#'   processed_agd = target_pop
-#' )
-#'
-#' print(check)
-#'
+
+
 check_weights <- function(weighted_data, processed_agd) {
   ipd_with_weights <- weighted_data$data
   match_cov <- weighted_data$centered_colnames
@@ -368,7 +330,8 @@
     sum_centered_IPD_with_weights = as.vector(num_check)
   )
   attr(outdata, "footer") <- list()
-  ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE) # find item that was matched by mean
+  # find item that was matched by mean
+  ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE)
   ind_mean <- sapply(ind_mean, function(ii) any(grepl("_MEAN$", ii)))
   outdata$match_stat <- ifelse(grepl("_MEDIAN$", outdata$covariate), "Median",
     ifelse(grepl("_SQUARED$", outdata$covariate), "SD",
@@ -380,11 +343,14 @@
   outdata$external_trial <- unlist(processed_agd[paste(outdata$covariate, toupper(outdata$match_stat), sep = "_")])
 
   # fill in stat from unweighted and weighted IPD
-  for (ii in 1:nrow(outdata)) {
+  for (ii in seq_len(nrow(outdata))) {
     covname <- outdata$covariate[ii]
     if (outdata$match_stat[ii] %in% c("Mean", "Prop")) {
       outdata$internal_trial[ii] <- mean(ipd_with_weights[[covname]], na.rm = TRUE)
-      outdata$internal_trial_after_weighted[ii] <- weighted.mean(ipd_with_weights[[covname]], w = ipd_with_weights$weights, na.rm = TRUE)
+      outdata$internal_trial_after_weighted[ii] <- weighted.mean(
+        ipd_with_weights[[covname]],
+        w = ipd_with_weights$weights, na.rm = TRUE
+      )
     } else if (outdata$match_stat[ii] == "Median") {
       outdata$internal_trial[ii] <- quantile(ipd_with_weights[[covname]],
         probs = 0.5,
@@ -430,10 +396,14 @@
 #' @param sd_digits number of digits for rounding mean columns in the output
 #' @param digits minimal number of significant digits, see [print.default].
 #' @param ... further arguments to [print.data.frame]
-#'
 #' @describeIn check_weights Print method for check_weights objects
 #' @export
-print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd_digits = 3, digits = getOption("digits"), ...) {
+
+print.maicplus_check_weights <- function(x,
+                                         mean_digits = 2,
+                                         prop_digits = 2,
+                                         sd_digits = 3,
+                                         digits = getOption("digits"), ...) {
   round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
   round_digits[is.na(round_digits)] <- digits
 
@@ -451,15 +421,12 @@
   }
 }
 
-#' Prints Note on Expected Sample Size Reduction
+#' Note on Expected Sample Size Reduction
 #'
 #' @param width Number of characters to break string into new lines (`\n`).
 #'
 #' @return A character string
-#' @export
-#'
-#' @examples
-#' ess_footnote_text(width = 80)
+#' @keywords internal
 ess_footnote_text <- function(width = 0.9 * getOption("width")) {
   text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals
 (https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common

diff --git a/R/process_data.R b/R/process_data.R
@@ -113,6 +113,10 @@
 #' @param dummize_cols vector of column names to binarize
 #' @param dummize_ref_level vector of reference level of the variables to binarize
 #'
+#' @examples
+#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
+#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+#'
 #' @return ipd with dummized columns
 #' @export
 
@@ -140,10 +144,20 @@
 #'
 #' @param ipd IPD variable names should match the aggregate data names without the suffix.
 #' This would involve either changing the aggregate data name or the ipd name.
-#' For instance, if we binarize SEX variable with MALE as a reference using [dummize_ipd], function names the new variable as SEX_MALE.
+#' For instance, if we binarize SEX variable with MALE as a reference using [dummize_ipd],
+#' function names the new variable as SEX_MALE.
 #' In this case, SEX_MALE should also be available in the aggregate data.
 #' @param agd pre-processed aggregate data which contain STUDY, ARM, and N. Variable names should be followed
 #' by legal suffixes (i.e. MEAN, MEDIAN, SD, or PROP). Note that COUNT suffix is no longer accepted.
+#' @examples
+#' ipd <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
+#' ipd <- dummize_ipd(ipd, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
+#' target_pop <- read.csv(
+#'   system.file("extdata", "aggregate_data_example_1.csv", package = "maicplus", mustWork = TRUE)
+#' )
+#' agd <- process_agd(target_pop)
+#'
+#' ipd_centered <- center_ipd(ipd = ipd, agd = agd)
 #'
 #' @return centered ipd using aggregate level data averages
 #' @export
@@ -200,13 +214,13 @@

  if (nrow(use_agd) < 2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ")

  rowId <- nrow(use_agd) + 1
  use_agd[rowId, ] <- NA
  use_agd$STUDY[rowId] <- use_agd$STUDY[1]
  use_agd$ARM[rowId] <- "total"

  # complete N and count
  NN <- use_agd[["N"]][rowId] <- sum(use_agd[["N"]], na.rm = TRUE)
  nn <- use_agd[["N"]][-rowId]
  for (i in grep("_COUNT$", names(use_agd), value = TRUE)) {
    use_agd[[i]][rowId] <- sum(use_agd[[i]][-rowId], na.rm = TRUE)

diff --git a/R/survival-helper.R b/R/survival-helper.R
@@ -1,14 +1,16 @@
-#' helper function: makeup to get median survival time from a `survival::survfit` object
+#' Helper function: makeup to get median survival time from a `survival::survfit` object
 #'
 #' Extract and display median survival time with confidence interval
 #'
 #' @param km_fit returned object from \code{survival::survfit}
 #' @param legend a character string, name used in 'type' column in returned data frame
 #' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
 #'
+#' @examples
 #' @return a data frame with a index column 'type', median survival time and confidence interval
 #' @export
+
 medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
   time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)
 
  if (!time_scale %in% names(time_unit)) stop("time_scale has to be 'year', 'month', 'week' or 'day'")
@@ -29,11 +31,12 @@
 }
 
 
-
-#' Helper function to select a set of variables used for Kaplan-Meier plot
+#' Helper function to select set of variables used for Kaplan-Meier plot
 #'
 #' @param km_fit returned object from \code{survival::survfit}
-#' @return a list of data frames of variables from \code{survival::survfit}. Data frames are divided by treatment.
+#'
+#' @examples
+#' @return a list of data frames of variables from survfit. Data frame is divided by treatment.
 #' @export
 
 survfit_makeup <- function(km_fit) {
@@ -57,14 +60,18 @@
 #'
 #' @param km_fit_before returned object from \code{survival::survfit} before adjustment
 #' @param km_fit_after returned object from \code{survival::survfit} after adjustment
-#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
-#' @param trt  a character string, name of the interested treatment in internal trial (real IPD)
-#' @param trt_ext character string, name of the interested comparator in external trial used to
-#' subset \code{dat_ext} (pseudo IPD)
-#' @param endpoint_name a character string, name of the endpoint
+#' @param time_scale time unit of median survival time, taking a value of 'year', 'month', 'week' or 'day'
+#' @param trt internal trial treatment
+#' @param trt_ext external trial treatment
+#' @param endpoint_name name of the endpoint
+#' @param line_col color of the line curves with the order of external, internal unadjusted, and internal adjusted
 #'
-#' @return a KM plot
-km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext, endpoint_name = "") {
+#' @return a Kaplan-Meier plot
+#' @examples
+#' @export
+
+km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext, endpoint_name = "",
+                    line_col = c("#5450E4", "#00857C", "#6ECEB2")) {
   time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)
 
   if (!time_scale %in% names(time_unit)) stop("time_scale has to be 'year', 'month', 'week' or 'day'")
@@ -97,48 +104,48 @@
   # add km lines from external trial
   lines(
     y = pd_be[[trt_ext]]$surv,
-    x = (pd_be[[trt_ext]]$time / time_unit[[time_scale]]), col = "#5450E4",
+    x = (pd_be[[trt_ext]]$time / time_unit[[time_scale]]), col = line_col[1],
     type = "s"
   )
   tmpid <- pd_be[[trt_ext]]$censor == 1
   points(
     y = pd_be[[trt_ext]]$surv[tmpid],
     x = (pd_be[[trt_ext]]$time[tmpid] / time_unit[[time_scale]]),
-    col = "#5450E4", pch = 3, cex = 0.7
+    col = line_col[1], pch = 3, cex = 0.7
   )
 
   # add km lines from internal trial before adjustment
   lines(
     y = pd_be[[trt]]$surv,
-    x = (pd_be[[trt]]$time / time_unit[[time_scale]]), col = "#00857C",
+    x = (pd_be[[trt]]$time / time_unit[[time_scale]]), col = line_col[2],
     type = "s"
   )
   tmpid <- pd_be[[trt]]$censor == 1
   points(
     y = pd_be[[trt]]$surv[tmpid],
     x = (pd_be[[trt]]$time[tmpid] / time_unit[[time_scale]]),
-    col = "#00857C", pch = 3, cex = 0.7
+    col = line_col[2], pch = 3, cex = 0.7
   )
 
   # add km lines from internal trial after adjustment
   if (!is.null(km_fit_after)) {
     lines(
       y = pd_af[[trt]]$surv,
-      x = (pd_af[[trt]]$time / time_unit[[time_scale]]), col = "#6ECEB2", lty = 2,
+      x = (pd_af[[trt]]$time / time_unit[[time_scale]]), col = line_col[3], lty = 2,
       type = "s"
     )
     tmpid <- pd_af[[trt]]$censor == 1
     points(
       y = pd_af[[trt]]$surv[tmpid],
       x = (pd_af[[trt]]$time[tmpid] / time_unit[[time_scale]]),
-      col = "#6ECEB2", pch = 3, cex = 0.7
+      col = line_col[3], pch = 3, cex = 0.7
     )
   }
 
   use_leg <- 1:ifelse(is.null(km_fit_after), 2, 3)
   # add legend
   legend("topright",
-    bty = "n", lty = c(1, 1, 2)[use_leg], cex = 0.8, col = c("#5450E4", "#00857C", "#6ECEB2")[use_leg],
+    bty = "n", lty = c(1, 1, 2)[use_leg], cex = 0.8, col = line_col[use_leg],
     legend = c(
       paste0("Comparator: ", trt_ext),
       paste0("Treatment: ", trt),
@@ -152,16 +159,17 @@
 #'
 #' a diagnosis plot for proportional hazard assumption, versus log-time (default) or time
 #'
-#' @param clldat object returned from \code{\link{survfit_makeup}}
+#' @param km_fit returned object from \code{survival::survfit}
 #' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
 #' @param log_time logical, TRUE (default) or FALSE
 #' @param endpoint_name a character string, name of the endpoint
 #' @param subtitle a character string, subtitle of the plot
 #' @param exclude_censor logical, should censored data point be plotted
-#'
+#' @examples
 #' @return a plot
 #' @export
-log_cum_haz_plot <- function(clldat,
+
+log_cum_haz_plot <- function(km_fit,
                              time_scale,
                              log_time = TRUE,
                              endpoint_name = "",
@@ -170,12 +178,14 @@
   time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)
   if (!time_scale %in% names(time_unit)) stop("time_scale has to be 'year', 'month', 'week' or 'day'")
 
+  clldat <- survfit_makeup(km_fit)
+
   if (exclude_censor) {
     clldat <- lapply(clldat, function(xxt) xxt[xxt$censor == 0, , drop = FALSE])
   }

  all.times <- do.call(rbind, clldat)$time / time_unit[[time_scale]]
  if (log_time) all.times <- log(all.times)
  t_range <- range(all.times)
  y_range <- range(log(do.call(rbind, clldat)$cumhaz))

@@ -222,7 +232,7 @@
 #' @param log_time logical, TRUE (default) or FALSE
 #' @param endpoint_name a character string, name of the endpoint
 #' @param subtitle a character string, subtitle of the plot
-#'
+#' @examples
 #' @return a plot
 #' @export