Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
hoppanda committed Aug 18, 2023
1 parent 410fdba commit 776ebd4
Show file tree
Hide file tree
Showing 15 changed files with 173 additions and 146 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ export(bucher)
export(center_ipd)
export(check_weights)
export(dummize_ipd)
export(ess_footnote_text)
export(estimate_weights)
export(generate_survival_data)
export(km_plot)
export(log_cum_haz_plot)
export(maic_tte_unanchor)
export(medSurv_makeup)
Expand Down
99 changes: 36 additions & 63 deletions R/matching.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Functions for matching step: estimation of individual weights

# functions to be exported ---------------------------------------

#' Derive individual weights in the matching step of MAIC
#'
#' This function takes individual patient data (IPD) with centered covariates
#' (effect modifiers and/or prognostic variables) as input and generates
#' weights for each individual in IPD trial to match the covariates in aggregate data.
#' Assuming data is properly processed, this function takes individual patient data (IPD) with centered covariates
#' (effect modifiers and/or prognostic variables) as input, and generates weights for each individual in IPD trial
#' to match the covariates in aggregate data.
#'
#' @param data a numeric matrix, centered covariates of IPD, no missing value in any cell is allowed
#' @param centered_colnames a character or numeric vector (column indicators) of centered covariates
Expand All @@ -24,6 +26,14 @@
#' \item{ess}{effective sample size, square of sum divided by sum of squares}
#' \item{opt}{R object returned by \code{base::optim()}, for assess convergence and other details}
#' }
#'
#' @examples
#' load(system.file("extdata", "ipd_centered.rda", package = "maicplus", mustWork = TRUE))
#'
#' centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
#' centered_colnames <- paste0(centered_colnames, "_CENTERED")
#' match_res <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
#'
#' @export

estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, method = "BFGS", ...) {
Expand Down Expand Up @@ -112,8 +122,15 @@ estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, meth
#' @param vline_col a string, color for the vertical line in the histogram
#' @param main_title a character string, main title of the plot
#'
#' @return a plot of unscaled or scaled weights
#' @examples
#' load(system.file("extdata", "match_res.rda", package = "maicplus", mustWork = TRUE))
#' wt <- match_res$data$weights
#' wt_scaled <- match_res$data$scaled_weights
#' par(mfrow = c(1, 2))
#' plot_weights(wt, bin_col = "orange", vline_col = "darkblue")
#' plot_weights(wt_scaled, main_title = "Scaled Individual Weights")
#'
#' @return a plot of unscaled or scaled weights
#' @export

plot_weights <- function(wt, bin_col = "#6ECEB2", vline_col = "#688CE8", main_title = "Unscaled Individual Weights") {
Expand Down Expand Up @@ -152,66 +169,25 @@ plot_weights <- function(wt, bin_col = "#6ECEB2", vline_col = "#688CE8", main_ti
#' This function checks to see if the optimization is done properly by checking the covariate averages
#' before and after adjustment.
#'
#' @param weighted_data object returned after calculating weights using \code{\link{estimate_weights}}
#' @param match_res object returned after calculating weights using \code{\link{estimate_weights}}
#' @param processed_agd a data frame, object returned after using \code{\link{process_agd}} or
#' aggregated data following the same naming convention
#' @param mean_digits number of digits for rounding mean columns in the output
#' @param prop_digits number of digits for rounding proportion columns in the output
#' @param sd_digits number of digits for rounding mean columns in the output
#'
#' @examples
#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
#' load(system.file("extdata", "match_res.rda", package = "maicplus", mustWork = TRUE))
#' outdata <- check_weights(match_res, processed_agd = agd)
#' print(outdata)
#'
#' @import DescTools
#'
#' @return data.frame of weighted and unweighted covariate averages of the IPD, and average of aggregate data
#' @export
#'
#' @examples
#' adsl <- read.csv(system.file("extdata", "adsl.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#' adrs <- read.csv(system.file("extdata", "adrs.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#' adtte <- read.csv(system.file("extdata", "adtte.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#'
#' ### AgD
#' # Baseline aggregate data for the comparator population
#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
#' package = "maicplus", mustWork = TRUE
#' ))
#' # target_pop2 <- read.csv(system.file("extdata", "aggregate_data_example_2.csv",
#' # package = "maicplus", mustWork = TRUE))
#' # target_pop3 <- read.csv(system.file("extdata", "aggregate_data_example_3.csv",
#' # package = "maicplus", mustWork = TRUE))
#'
#' # for time-to-event endpoints, pseudo IPD from digitalized KM
#' pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#'
#' #### prepare data ----------------------------------------------------------
#' target_pop <- process_agd(target_pop)
#' # target_pop2 <- process_agd(target_pop2) # demo of process_agd in different scenarios
#' # target_pop3 <- process_agd(target_pop3) # demo of process_agd in different scenarios
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
#'
#' match_res <- estimate_weights(
#' data = use_adsl,
#' centered_colnames = grep("_CENTERED$", names(use_adsl)),
#' start_val = 0,
#' method = "BFGS"
#' )
#'
#' check <- check_weights(
#' weighted_data = match_res,
#' processed_agd = target_pop
#' )
#'
#' print(check)
#'


check_weights <- function(weighted_data, processed_agd) {
ipd_with_weights <- weighted_data$data
match_cov <- weighted_data$centered_colnames
Expand Down Expand Up @@ -292,9 +268,9 @@ check_weights <- function(weighted_data, processed_agd) {
#' @param sd_digits number of digits for rounding mean columns in the output
#' @param digits minimal number of significant digits, see [print.default].
#' @param ... further arguments to [print.data.frame]
#'
#' @describeIn check_weights Print method for check_weights objects
#' @export

print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd_digits = 3, digits = getOption("digits"), ...) {

Check warning on line 274 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=274,col=121,[line_length_linter] Lines should not be more than 120 characters.
round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
round_digits[is.na(round_digits)] <- digits
Expand All @@ -313,15 +289,12 @@ print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd
}
}

#' Prints Note on Expected Sample Size Reduction
#' Note on Expected Sample Size Reduction
#'
#' @param width Number of characters to break string into new lines (`\n`).
#'
#' @return A character string
#' @export
#'
#' @examples
#' ess_footnote_text(width = 80)

ess_footnote_text <- function(width = 0.9 * getOption("width")) {
text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals
(https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common
Expand Down
11 changes: 11 additions & 0 deletions R/process_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ process_agd <- function(raw_agd) {
#' @param dummize_cols vector of column names to binarize
#' @param dummize_ref_level vector of reference level of the variables to binarize
#'
#' @examples
#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#'
#' @return ipd with dummized columns
#' @export

Expand Down Expand Up @@ -144,6 +148,13 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
#' In this case, SEX_MALE should also be available in the aggregate data.
#' @param agd pre-processed aggregate data which contain STUDY, ARM, and N. Variable names should be followed
#' by legal suffixes (i.e. MEAN, MEDIAN, SD, or PROP). Note that COUNT suffix is no longer accepted.
#' @examples
#' ipd <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' ipd <- dummize_ipd(ipd, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv", package = "maicplus", mustWork = TRUE))
#' agd <- process_agd(target_pop)
#'
#' ipd_centered <- center_ipd(ipd = ipd, agd = agd)
#'
#' @return centered ipd using aggregate level data averages
#' @export
Expand Down
50 changes: 39 additions & 11 deletions R/survival-helper.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
#' helper function: makeup to get median survival time from a `survival::survfit` object
#'
#' Extract and display median survival time with confidence interval
#' extract and display median survival time with confidence interval
#'
#' @param km_fit returned object from \code{survival::survfit}
#' @param legend a character string, name used in 'type' column in returned data frame
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#'
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' medSurv <- medSurv_makeup(kmobj, legend = "before matching", time_scale = "day")
#'
#' @return a data frame with a index column 'type', median survival time and confidence interval
#' @export

medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)

Expand All @@ -29,11 +35,16 @@ medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
}



#' Helper function to select a set of variables used for Kaplan-Meier plot
#' Helper function to select set of variables used for Kaplan-Meier plot
#'
#' @param km_fit returned object from \code{survival::survfit}
#' @return a list of data frames of variables from \code{survival::survfit}. Data frames are divided by treatment.
#'
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' survobj <- survfit_makeup(kmobj)
#'
#' @return a list of data frames of variables from survfit. Data frame is divided by treatment.
#' @export

survfit_makeup <- function(km_fit) {
Expand All @@ -57,13 +68,21 @@ survfit_makeup <- function(km_fit) {
#'
#' @param km_fit_before returned object from \code{survival::survfit} before adjustment
#' @param km_fit_after returned object from \code{survival::survfit} after adjustment
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#' @param trt a character string, name of the interested treatment in internal trial (real IPD)
#' @param trt_ext character string, name of the interested comparator in external trial used to
#' subset \code{dat_ext} (pseudo IPD)
#' @param endpoint_name a character string, name of the endpoint
#' @param time_scale time unit of median survival time, taking a value of 'year', 'month', 'week' or 'day'
#' @param trt internal trial treatment
#' @param trt_ext external trial treatment
#' @param endpoint_name name of the endpoint
#' @param line_col color of the line curves with the order of external, internal unadjusted, and internal adjusted
#'
#' @return a KM plot
#' @return a Kaplan-Meier plot
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' kmobj_adj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, weights = combined_data_tte$weight, conf.type = "log-log")
#' par(cex.main = 0.85)
#' km_plot(kmobj, kmobj_adj, time_scale = "month", trt = "A", trt_ext = "B", endpoint_name = "OS")
#' @export

km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext, endpoint_name = "") {
time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)

Expand Down Expand Up @@ -152,15 +171,20 @@ km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext
#'
#' a diagnosis plot for proportional hazard assumption, versus log-time (default) or time
#'
#' @param clldat object returned from \code{\link{survfit_makeup}}
#' @param km_fit returned object from \code{survival::survfit}
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#' @param log_time logical, TRUE (default) or FALSE
#' @param endpoint_name a character string, name of the endpoint
#' @param subtitle a character string, subtitle of the plot
#' @param exclude_censor logical, should censored data point be plotted
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' log_cum_haz_plot(kmobj, time_scale = "month", log_time = TRUE, endpoint_name = "OS", subtitle = "(Before Matching)")
#'
#' @return a plot
#' @export

log_cum_haz_plot <- function(clldat,
time_scale,
log_time = TRUE,
Expand Down Expand Up @@ -222,6 +246,10 @@ log_cum_haz_plot <- function(clldat,
#' @param log_time logical, TRUE (default) or FALSE
#' @param endpoint_name a character string, name of the endpoint
#' @param subtitle a character string, subtitle of the plot
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' unweighted_cox <- coxph(Surv(TIME, EVENT == 1) ~ ARM, data = combined_data_tte)
#' resid_plot(unweighted_cox, time_scale = "month", log_time = TRUE, endpoint_name = "OS", subtitle = "(Before Matching)")
#'
#' @return a plot
#' @export
Expand Down
9 changes: 9 additions & 0 deletions man/center_ipd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 6 additions & 51 deletions man/check_weights.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/dummize_ipd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 776ebd4

Please sign in to comment.