Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rev #36

Merged
merged 17 commits into from
Aug 21, 2023
Merged

rev #36

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Imports:
survival,
DescTools,
MASS,
boot
boot,
stringr
Suggests:
knitr,
testthat (>= 2.0),
Expand Down
7 changes: 6 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(plot,maicplus_estimate_weights)
S3method(print,maicplus_check_weights)
export(bucher)
export(calculate_weights_legend)
export(center_ipd)
export(check_weights)
export(dummize_ipd)
Expand All @@ -11,7 +13,8 @@ export(generate_survival_data)
export(log_cum_haz_plot)
export(maic_tte_unanchor)
export(medSurv_makeup)
export(plot_weights)
export(plot_weights_base)
export(plot_weights_ggplot)
export(process_agd)
export(resid_plot)
export(survfit_makeup)
Expand All @@ -20,4 +23,6 @@ import(MASS)
import(boot)
import(graphics)
import(stats)
import(stringr)
import(survival)
importFrom(utils,stack)
2 changes: 2 additions & 0 deletions R/maicplus-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@ NULL
#' @import DescTools
#' @import MASS
#' @import boot
#' @import stringr
#' @importFrom utils stack
NULL
188 changes: 163 additions & 25 deletions R/matching.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@

# prepare data for optimization
if (is.null(centered_colnames)) centered_colnames <- seq_len(ncol(data))
EM <- data[, centered_colnames, drop = FALSE]

Check warning on line 54 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=54,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
ind <- apply(EM, 1, function(xx) any(is.na(xx)))
nr_missing <- sum(ind)
EM <- as.matrix(EM[!ind, , drop = FALSE])

Check warning on line 57 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=57,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.

# objective and gradient functions
objfn <- function(alpha, X) {

Check warning on line 60 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=60,col=28,[object_name_linter] Variable and function name style should be snake_case or symbols.
sum(exp(X %*% alpha))
}
gradfn <- function(alpha, X) {

Check warning on line 63 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=63,col=29,[object_name_linter] Variable and function name style should be snake_case or symbols.
colSums(sweep(X, 1, exp(X %*% alpha), "*"))
}

Expand All @@ -86,56 +86,92 @@
if (is.numeric(centered_colnames)) centered_colnames <- names(data)[centered_colnames]

# Output
list(
outdata <- list(
data = data,
centered_colnames = centered_colnames,
nr_missing = nr_missing,
ess = sum(wt)^2 / sum(wt^2),
opt = opt1
)

class(outdata) <- c("maicplus_estimate_weights", "list")
outdata
}


#' Plot MAIC weights in a histogram with key statistics in legend
#'
#' Generates a plot given the individuals weights with key summary in top right legend that includes
#' median weight, effective sample size (ESS), and reduction percentage (what percent ESS is reduced from the
#' original sample size).
#' There are two options of weights provided in \code{\link{estimate_weights}}: unscaled or scaled.
#' Scaled weights are relative to the original unit weights of each individual.
#' In other words, a scaled weight greater than 1 means that an individual carries more weight in the
#' re-weighted population than the original data and a scaled weight less than 1 means that an individual carries
#' less weight in the re-weighted population than the original data.
#'
#' @param wt a numeric vector of individual MAIC weights (derived using \code{\link{estimate_weights}})
#' @param bin_col a string, color for the bins of histogram
#' @param vline_col a string, color for the vertical line in the histogram
#' @param main_title a character string, main title of the plot
#' Calculates ESS reduction and median weights which is used to create legend for weights plot
#'
#' @return a plot of unscaled or scaled weights
#' @param weighted_data object returned after calculating weights using [estimate_weights]
#'
#' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count
#' @examples
#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' calculate_weights_legend(weighted_data)
#' @export

plot_weights <- function(wt, bin_col = "#6ECEB2", vline_col = "#688CE8", main_title = "Unscaled Individual Weights") {
calculate_weights_legend <- function(weighted_data) {
ess <- weighted_data$ess
wt <- weighted_data$data$weights
wt_scaled <- weighted_data$data$scaled_weights

# calculate sample size and exclude NA from wt
nr_na <- sum(is.na(wt))
n <- length(wt) - nr_na
wt <- na.omit(wt)
wt_scaled <- na.omit(wt_scaled)

# calculate ess reduction and median weights
ess_reduction <- (1 - (ess / n)) * 100
wt_median <- median(wt)
wt_scaled_median <- median(wt_scaled)

list(
ess = round(ess, 2),
ess_reduction = round(ess_reduction, 2),
wt_median = round(wt_median, 4),
wt_scaled_median = round(wt_scaled_median, 4),
nr_na = nr_na
)
}

#' Plot MAIC weights in a histogram with key statistics in legend
#'
#' Generates a base R histogram of weights. Default is to plot either unscaled or scaled weights and not both.
#'
#' @param weighted_data object returned after calculating weights using [estimate_weights]
#' @param bin_col a string, color for the bins of histogram
#' @param vline_col a string, color for the vertical line in the histogram
#' @param main_title title of the plot
#' @param scaled_weights an indicator for using scaled weights instead of regular weights
#'
#' @return a plot of unscaled or scaled weights
#' @export

# calculate effective sample size (ESS) and reduction of original sample
ess <- (sum(wt)^2) / sum(wt^2)
ess_reduct <- round((1 - (ess / n)) * 100, 2)
plot_weights_base <- function(weighted_data,
bin_col, vline_col, main_title,
scaled_weights) {
weights_stat <- calculate_weights_legend(weighted_data)

if (scaled_weights) {
wt <- weighted_data$data$scaled_weights
median_wt <- weights_stat$wt_scaled_median
} else {
wt <- weighted_data$data$weights
median_wt <- weights_stat$wt_median
}

# prepare legend
plot_legend <- c(
paste0("Median = ", round(median(wt), 4)),
paste0("ESS = ", round(ess, 2)),
paste0("Reduction% = ", ess_reduct)
paste0("Median = ", median_wt),
paste0("ESS = ", weights_stat$ess),
paste0("Reduction% = ", weights_stat$ess_reduct)
)
plot_lty <- c(2, NA, NA)

if (nr_na > 0) {
plot_legend <- c(plot_legend, paste0("#Missing Weights = ", nr_na))
if (weights_stat$nr_na > 0) {
plot_legend <- c(plot_legend, paste0("#Missing Weights = ", weights_stat$nr_na))
plot_lty <- c(plot_lty, NA)
}

Expand All @@ -147,6 +183,108 @@
legend("topright", bty = "n", lty = plot_lty, cex = 0.8, legend = plot_legend)
}

#' Plot MAIC weights in a histogram with key statistics in legend using ggplot
#'
#' Generates a ggplot histogram of weights. Default is to plot both unscaled and scaled weights on a same graph.
#'
#' @param weighted_data object returned after calculating weights using [estimate_weights]
#' @param bin_col a string, color for the bins of histogram
#' @param vline_col a string, color for the vertical line in the histogram
#' @param main_title Name of scaled weights plot and unscaled weights plot, respectively.
#' @param bins number of bin parameter to use
#'
#' @return a plot of unscaled and scaled weights
#' @export

plot_weights_ggplot <- function(weighted_data, bin_col, vline_col,
main_title,
bins) {
# check if ggplot2 package is installed
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("ggplot2 package is needed to run this function")
}

weights_stat <- calculate_weights_legend(weighted_data)

# prepare dataset to use in ggplot
wt_data0 <- weighted_data$data[, c("weights", "scaled_weights")]
colnames(wt_data0) <- main_title
wt_data <- stack(wt_data0)
wt_data$median <- ifelse(wt_data$ind == main_title[1],
weights_stat$wt_median, weights_stat$wt_scaled_median
)

# create legend data
lab <- with(weights_stat, {
lab <- c(paste0("Median = ", wt_median), paste0("Median = ", wt_scaled_median))
lab <- paste0(lab, "\nESS = ", ess, "\nReduction% = ", ess_reduction)
if (nr_na > 0) lab <- paste0(lab, "\n#Missing Weights = ", nr_na)
lab
})
legend_data <- data.frame(ind = main_title, lab = lab)

hist_plot <- ggplot2::ggplot(wt_data) +
ggplot2::geom_histogram(ggplot2::aes_string(x = "values"), bins = bins, color = bin_col, fill = bin_col) +
ggplot2::geom_vline(ggplot2::aes_string(xintercept = "median"),
color = vline_col,
linetype = "dashed"
) +
ggplot2::theme_bw() +
ggplot2::facet_wrap(~ind, nrow = 1) +
ggplot2::geom_text(data = legend_data, ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3) +

Check warning on line 234 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=234,col=121,[line_length_linter] Lines should not be more than 120 characters.
ggplot2::theme(
axis.title = ggplot2::element_text(size = 12),
axis.text = ggplot2::element_text(size = 12)
) +
ggplot2::ylab("Frequency") +
ggplot2::xlab("Weight")

return(hist_plot)
}


#' Plot method for Estimate Weights objects
#'
#' The plot function displays individuals weights with key summary in top right legend that includes
#' median weight, effective sample size (ESS), and reduction percentage (what percent ESS is reduced from the
#' original sample size). There are two options of plotting: base R plot and ggplot. The default
#' for base R plot is to plot unscaled and scaled separately. The default
#' for ggplot is to plot unscaled and scaled weights on a same plot.
#'
#' @param x object from [estimate_weights]
#' @param ggplot indicator to print base weights plot or ggplot weights plot
#' @param bin_col a string, color for the bins of histogram
#' @param vline_col a string, color for the vertical line in the histogram
#' @param main_title title of the plot. For ggplot, name of scaled weights plot and unscaled weights plot, respectively.
#' @param scaled_weights (base plot only) an indicator for using scaled weights instead of regular weights
#' @param bins (ggplot only) number of bin parameter to use
#'
#' @examples
MikeJSeo marked this conversation as resolved.
Show resolved Hide resolved
#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' plot(weighted_data)
#'
#' library(ggplot2)
#' plot(weighted_data, ggplot = TRUE)
#' @describeIn estimate_weights Plot method for estimate_weights objects
#' @export

plot.maicplus_estimate_weights <- function(x, ggplot = FALSE,
bin_col = "#6ECEB2", vline_col = "#688CE8",
main_title = NULL,
scaled_weights = TRUE,
bins = 50, ...) {
if (ggplot) {
if (is.null(main_title)) main_title <- c("Scaled Individual Weights", "Unscaled Individual Weights")
plot_weights_ggplot(x, bin_col, vline_col, main_title, bins)
} else {
if (is.null(main_title)) {
main_title <- ifelse(scaled_weights, "Scaled Individual Weights", "Unscaled Individual Weights")
}
plot_weights_base(x, bin_col, vline_col, main_title, scaled_weights)
}
}


#' Check to see if weights are optimized correctly
#'
#' This function checks to see if the optimization is done properly by checking the covariate averages
Expand Down Expand Up @@ -230,7 +368,7 @@
sum_centered_IPD_with_weights = as.vector(num_check)
)
attr(outdata, "footer") <- list()
ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE) # find item that was matched by mean

Check warning on line 371 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=371,col=121,[line_length_linter] Lines should not be more than 120 characters.
ind_mean <- sapply(ind_mean, function(ii) any(grepl("_MEAN$", ii)))
outdata$match_stat <- ifelse(grepl("_MEDIAN$", outdata$covariate), "Median",
ifelse(grepl("_SQUARED$", outdata$covariate), "SD",
Expand All @@ -242,11 +380,11 @@
outdata$external_trial <- unlist(processed_agd[paste(outdata$covariate, toupper(outdata$match_stat), sep = "_")])

# fill in stat from unweighted and weighted IPD
for (ii in 1:nrow(outdata)) {

Check warning on line 383 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=383,col=14,[seq_linter] 1:nrow(...) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead.
covname <- outdata$covariate[ii]
if (outdata$match_stat[ii] %in% c("Mean", "Prop")) {
outdata$internal_trial[ii] <- mean(ipd_with_weights[[covname]], na.rm = TRUE)
outdata$internal_trial_after_weighted[ii] <- weighted.mean(ipd_with_weights[[covname]], w = ipd_with_weights$weights, na.rm = TRUE)

Check warning on line 387 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=387,col=121,[line_length_linter] Lines should not be more than 120 characters.
} else if (outdata$match_stat[ii] == "Median") {
outdata$internal_trial[ii] <- quantile(ipd_with_weights[[covname]],
probs = 0.5,
Expand Down Expand Up @@ -295,7 +433,7 @@
#'
#' @describeIn check_weights Print method for check_weights objects
#' @export
print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd_digits = 3, digits = getOption("digits"), ...) {

Check warning on line 436 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=436,col=121,[line_length_linter] Lines should not be more than 120 characters.
round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
round_digits[is.na(round_digits)] <- digits

Expand Down Expand Up @@ -325,6 +463,6 @@
ess_footnote_text <- function(width = 0.9 * getOption("width")) {
text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals
(https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common
and it may be considered sub optimal."
and it may be considered suboptimal."
paste0(strwrap(text, width = width), collapse = "\n")
}
Binary file added inst/extdata/weighted_data.rda
Binary file not shown.
21 changes: 21 additions & 0 deletions man/calculate_weights_legend.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions man/estimate_weights.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading