hta-pharma · gravesti · Aug 21, 2023 · Aug 15, 2023 · Aug 15, 2023 · Aug 15, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,7 +18,8 @@ Imports:
     survival,
     DescTools,
     MASS,
-    boot
+    boot,
+    stringr
 Suggests:
     knitr,
     testthat (>= 2.0),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(plot,maicplus_estimate_weights)
 S3method(print,maicplus_check_weights)
 export(bucher)
+export(calculate_weights_legend)
 export(center_ipd)
 export(check_weights)
 export(dummize_ipd)
@@ -11,7 +13,8 @@ export(generate_survival_data)
 export(log_cum_haz_plot)
 export(maic_tte_unanchor)
 export(medSurv_makeup)
-export(plot_weights)
+export(plot_weights_base)
+export(plot_weights_ggplot)
 export(process_agd)
 export(resid_plot)
 export(survfit_makeup)
@@ -20,4 +23,6 @@ import(MASS)
 import(boot)
 import(graphics)
 import(stats)
+import(stringr)
 import(survival)
+importFrom(utils,stack)
diff --git a/R/maicplus-package.R b/R/maicplus-package.R
@@ -13,4 +13,6 @@ NULL
 #' @import DescTools
 #' @import MASS
 #' @import boot
+#' @import stringr
+#' @importFrom utils stack
 NULL
diff --git a/R/matching.R b/R/matching.R
@@ -51,16 +51,16 @@

  # prepare data for optimization
  if (is.null(centered_colnames)) centered_colnames <- seq_len(ncol(data))
  EM <- data[, centered_colnames, drop = FALSE]
  ind <- apply(EM, 1, function(xx) any(is.na(xx)))
  nr_missing <- sum(ind)
  EM <- as.matrix(EM[!ind, , drop = FALSE])

  # objective and gradient functions
  objfn <- function(alpha, X) {
    sum(exp(X %*% alpha))
  }
  gradfn <- function(alpha, X) {
    colSums(sweep(X, 1, exp(X %*% alpha), "*"))
  }

@@ -86,56 +86,92 @@
   if (is.numeric(centered_colnames)) centered_colnames <- names(data)[centered_colnames]
 
   # Output
-  list(
+  outdata <- list(
     data = data,
     centered_colnames = centered_colnames,
     nr_missing = nr_missing,
     ess = sum(wt)^2 / sum(wt^2),
     opt = opt1
   )
+
+  class(outdata) <- c("maicplus_estimate_weights", "list")
+  outdata
 }
 
 
 #' Plot MAIC weights in a histogram with key statistics in legend
 #'
-#' Generates a plot given the individuals weights with key summary in top right legend that includes
-#' median weight, effective sample size (ESS), and reduction percentage (what percent ESS is reduced from the
-#' original sample size).
-#' There are two options of weights provided in \code{\link{estimate_weights}}: unscaled or scaled.
-#' Scaled weights are relative to the original unit weights of each individual.
-#' In other words, a scaled weight greater than 1 means that an individual carries more weight in the
-#' re-weighted population than the original data and a scaled weight less than 1 means that an individual carries
-#' less weight in the re-weighted population than the original data.
-#'
-#' @param wt a numeric vector of individual MAIC weights (derived using \code{\link{estimate_weights}})
-#' @param bin_col a string, color for the bins of histogram
-#' @param vline_col a string, color for the vertical line in the histogram
-#' @param main_title a character string, main title of the plot
+#' Calculates ESS reduction and median weights which is used to create legend for weights plot
 #'
-#' @return a plot of unscaled or scaled weights
+#' @param weighted_data object returned after calculating weights using [estimate_weights]
+#'
+#' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count
 #' @examples
+#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
+#' calculate_weights_legend(weighted_data)
 #' @export
 
-plot_weights <- function(wt, bin_col = "#6ECEB2", vline_col = "#688CE8", main_title = "Unscaled Individual Weights") {
+calculate_weights_legend <- function(weighted_data) {
+  ess <- weighted_data$ess
+  wt <- weighted_data$data$weights
+  wt_scaled <- weighted_data$data$scaled_weights
+
   # calculate sample size and exclude NA from wt
   nr_na <- sum(is.na(wt))
   n <- length(wt) - nr_na
   wt <- na.omit(wt)
+  wt_scaled <- na.omit(wt_scaled)
+
+  # calculate ess reduction and median weights
+  ess_reduction <- (1 - (ess / n)) * 100
+  wt_median <- median(wt)
+  wt_scaled_median <- median(wt_scaled)
+
+  list(
+    ess = round(ess, 2),
+    ess_reduction = round(ess_reduction, 2),
+    wt_median = round(wt_median, 4),
+    wt_scaled_median = round(wt_scaled_median, 4),
+    nr_na = nr_na
+  )
+}
+
+#' Plot MAIC weights in a histogram with key statistics in legend
+#'
+#' Generates a base R histogram of weights. Default is to plot either unscaled or scaled weights and not both.
+#'
+#' @param weighted_data object returned after calculating weights using [estimate_weights]
+#' @param bin_col a string, color for the bins of histogram
+#' @param vline_col a string, color for the vertical line in the histogram
+#' @param main_title title of the plot
+#' @param scaled_weights an indicator for using scaled weights instead of regular weights
+#'
+#' @return a plot of unscaled or scaled weights
+#' @export
 
-  # calculate effective sample size (ESS) and reduction of original sample
-  ess <- (sum(wt)^2) / sum(wt^2)
-  ess_reduct <- round((1 - (ess / n)) * 100, 2)
+plot_weights_base <- function(weighted_data,
+                              bin_col, vline_col, main_title,
+                              scaled_weights) {
+  weights_stat <- calculate_weights_legend(weighted_data)
+
+  if (scaled_weights) {
+    wt <- weighted_data$data$scaled_weights
+    median_wt <- weights_stat$wt_scaled_median
+  } else {
+    wt <- weighted_data$data$weights
+    median_wt <- weights_stat$wt_median
+  }
 
   # prepare legend
   plot_legend <- c(
-    paste0("Median = ", round(median(wt), 4)),
-    paste0("ESS = ", round(ess, 2)),
-    paste0("Reduction% = ", ess_reduct)
+    paste0("Median = ", median_wt),
+    paste0("ESS = ", weights_stat$ess),
+    paste0("Reduction% = ", weights_stat$ess_reduct)
   )
   plot_lty <- c(2, NA, NA)
 
-  if (nr_na > 0) {
-    plot_legend <- c(plot_legend, paste0("#Missing Weights = ", nr_na))
+  if (weights_stat$nr_na > 0) {
+    plot_legend <- c(plot_legend, paste0("#Missing Weights = ", weights_stat$nr_na))
     plot_lty <- c(plot_lty, NA)
   }
 
@@ -147,6 +183,108 @@
   legend("topright", bty = "n", lty = plot_lty, cex = 0.8, legend = plot_legend)
 }
 
+#' Plot MAIC weights in a histogram with key statistics in legend using ggplot
+#'
+#' Generates a ggplot histogram of weights. Default is to plot both unscaled and scaled weights on a same graph.
+#'
+#' @param weighted_data object returned after calculating weights using [estimate_weights]
+#' @param bin_col a string, color for the bins of histogram
+#' @param vline_col a string, color for the vertical line in the histogram
+#' @param main_title Name of scaled weights plot and unscaled weights plot, respectively.
+#' @param bins number of bin parameter to use
+#'
+#' @return a plot of unscaled and scaled weights
+#' @export
+
+plot_weights_ggplot <- function(weighted_data, bin_col, vline_col,
+                                main_title,
+                                bins) {
+  # check if ggplot2 package is installed
+  if (!requireNamespace("ggplot2", quietly = TRUE)) {
+    stop("ggplot2 package is needed to run this function")
+  }
+
+  weights_stat <- calculate_weights_legend(weighted_data)
+
+  # prepare dataset to use in ggplot
+  wt_data0 <- weighted_data$data[, c("weights", "scaled_weights")]
+  colnames(wt_data0) <- main_title
+  wt_data <- stack(wt_data0)
+  wt_data$median <- ifelse(wt_data$ind == main_title[1],
+    weights_stat$wt_median, weights_stat$wt_scaled_median
+  )
+
+  # create legend data
+  lab <- with(weights_stat, {
+    lab <- c(paste0("Median = ", wt_median), paste0("Median = ", wt_scaled_median))
+    lab <- paste0(lab, "\nESS = ", ess, "\nReduction% = ", ess_reduction)
+    if (nr_na > 0) lab <- paste0(lab, "\n#Missing Weights = ", nr_na)
+    lab
+  })
+  legend_data <- data.frame(ind = main_title, lab = lab)
+
+  hist_plot <- ggplot2::ggplot(wt_data) +
+    ggplot2::geom_histogram(ggplot2::aes_string(x = "values"), bins = bins, color = bin_col, fill = bin_col) +
+    ggplot2::geom_vline(ggplot2::aes_string(xintercept = "median"),
+      color = vline_col,
+      linetype = "dashed"
+    ) +
+    ggplot2::theme_bw() +
+    ggplot2::facet_wrap(~ind, nrow = 1) +
+    ggplot2::geom_text(data = legend_data, ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3) +
+    ggplot2::theme(
+      axis.title = ggplot2::element_text(size = 12),
+      axis.text = ggplot2::element_text(size = 12)
+    ) +
+    ggplot2::ylab("Frequency") +
+    ggplot2::xlab("Weight")
+
+  return(hist_plot)
+}
+
+
+#' Plot method for Estimate Weights objects
+#'
+#' The plot function displays individuals weights with key summary in top right legend that includes
+#' median weight, effective sample size (ESS), and reduction percentage (what percent ESS is reduced from the
+#' original sample size). There are two options of plotting: base R plot and ggplot. The default
+#' for base R plot is to plot unscaled and scaled separately. The default
+#' for ggplot is to plot unscaled and scaled weights on a same plot.
+#'
+#' @param x object from [estimate_weights]
+#' @param ggplot indicator to print base weights plot or ggplot weights plot
+#' @param bin_col a string, color for the bins of histogram
+#' @param vline_col a string, color for the vertical line in the histogram
+#' @param main_title title of the plot. For ggplot, name of scaled weights plot and unscaled weights plot, respectively.
+#' @param scaled_weights (base plot only) an indicator for using scaled weights instead of regular weights
+#' @param bins (ggplot only) number of bin parameter to use
+#'
+#' @examples
+#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
+#' plot(weighted_data)
+#'
+#' library(ggplot2)
+#' plot(weighted_data, ggplot = TRUE)
+#' @describeIn estimate_weights Plot method for estimate_weights objects
+#' @export
+
+plot.maicplus_estimate_weights <- function(x, ggplot = FALSE,
+                                           bin_col = "#6ECEB2", vline_col = "#688CE8",
+                                           main_title = NULL,
+                                           scaled_weights = TRUE,
+                                           bins = 50, ...) {
+  if (ggplot) {
+    if (is.null(main_title)) main_title <- c("Scaled Individual Weights", "Unscaled Individual Weights")
+    plot_weights_ggplot(x, bin_col, vline_col, main_title, bins)
+  } else {
+    if (is.null(main_title)) {
+      main_title <- ifelse(scaled_weights, "Scaled Individual Weights", "Unscaled Individual Weights")
+    }
+    plot_weights_base(x, bin_col, vline_col, main_title, scaled_weights)
+  }
+}
+
+
 #' Check to see if weights are optimized correctly
 #'
 #' This function checks to see if the optimization is done properly by checking the covariate averages
@@ -230,7 +368,7 @@
    sum_centered_IPD_with_weights = as.vector(num_check)
  )
  attr(outdata, "footer") <- list()
  ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE) # find item that was matched by mean
  ind_mean <- sapply(ind_mean, function(ii) any(grepl("_MEAN$", ii)))
  outdata$match_stat <- ifelse(grepl("_MEDIAN$", outdata$covariate), "Median",
    ifelse(grepl("_SQUARED$", outdata$covariate), "SD",
@@ -242,11 +380,11 @@
  outdata$external_trial <- unlist(processed_agd[paste(outdata$covariate, toupper(outdata$match_stat), sep = "_")])

  # fill in stat from unweighted and weighted IPD
  for (ii in 1:nrow(outdata)) {
    covname <- outdata$covariate[ii]
    if (outdata$match_stat[ii] %in% c("Mean", "Prop")) {
      outdata$internal_trial[ii] <- mean(ipd_with_weights[[covname]], na.rm = TRUE)
      outdata$internal_trial_after_weighted[ii] <- weighted.mean(ipd_with_weights[[covname]], w = ipd_with_weights$weights, na.rm = TRUE)
    } else if (outdata$match_stat[ii] == "Median") {
      outdata$internal_trial[ii] <- quantile(ipd_with_weights[[covname]],
        probs = 0.5,
@@ -295,7 +433,7 @@
 #'
 #' @describeIn check_weights Print method for check_weights objects
 #' @export
 print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd_digits = 3, digits = getOption("digits"), ...) {
  round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
  round_digits[is.na(round_digits)] <- digits

@@ -325,6 +463,6 @@
 ess_footnote_text <- function(width = 0.9 * getOption("width")) {
   text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals
 (https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common
-and it may be considered sub optimal."
+and it may be considered suboptimal."
   paste0(strwrap(text, width = width), collapse = "\n")
 }
diff --git a/inst/extdata/weighted_data.rda b/inst/extdata/weighted_data.rda
diff --git a/man/calculate_weights_legend.Rd b/man/calculate_weights_legend.Rd
diff --git a/man/estimate_weights.Rd b/man/estimate_weights.Rd