Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update roxygen header of 3 r files with examples #38

Merged
merged 18 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
S3method(plot,maicplus_estimate_weights)
S3method(print,maicplus_check_weights)
export(bucher)
export(calculate_weights_legend)
export(center_ipd)
export(check_weights)
export(dummize_ipd)
export(ess_footnote_text)
export(estimate_weights)
export(generate_survival_data)
export(km_plot)
export(log_cum_haz_plot)
export(maic_tte_unanchor)
export(medSurv_makeup)
Expand Down
114 changes: 46 additions & 68 deletions R/matching.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Functions for matching step: estimation of individual weights

# functions to be exported ---------------------------------------

#' Derive individual weights in the matching step of MAIC
#'
#' This function takes individual patient data (IPD) with centered covariates
#' (effect modifiers and/or prognostic variables) as input and generates
#' weights for each individual in IPD trial to match the covariates in aggregate data.
#' Assuming data is properly processed, this function takes individual patient data (IPD) with centered covariates
#' (effect modifiers and/or prognostic variables) as input, and generates weights for each individual in IPD trial
#' to match the covariates in aggregate data.
#'
#' @param data a numeric matrix, centered covariates of IPD, no missing value in any cell is allowed
#' @param centered_colnames a character or numeric vector (column indicators) of centered covariates
Expand All @@ -24,6 +26,14 @@
#' \item{ess}{effective sample size, square of sum divided by sum of squares}
#' \item{opt}{R object returned by \code{base::optim()}, for assess convergence and other details}
#' }
#'
#' @examples
#' load(system.file("extdata", "ipd_centered.rda", package = "maicplus", mustWork = TRUE))
#'
#' centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
#' centered_colnames <- paste0(centered_colnames, "_CENTERED")
#' match_res <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
#'
#' @export

estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, method = "BFGS", ...) {
Expand Down Expand Up @@ -99,19 +109,23 @@ estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, meth
}


#' Plot MAIC weights in a histogram with key statistics in legend
#' Calculate Statistics for Weight Plot Legend
#'
#' Calculates ESS reduction and median weights which is used to create legend for weights plot
#'
#' @param weighted_data object returned after calculating weights using [estimate_weights]
#'
#' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count
#' @examples
#' \dontrun{
#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' calculate_weights_legend(weighted_data)
#' @export

#' }
#' @keywords internal
calculate_weights_legend <- function(weighted_data) {
if (!inherits(weighted_data, "maicplus_estimate_weights")) {
stop("weighted_data must be class `maicplus_estimate_weights` generated by estimate_weights()")
}
ess <- weighted_data$ess
wt <- weighted_data$data$weights
wt_scaled <- weighted_data$data$scaled_weights
Expand Down Expand Up @@ -231,7 +245,10 @@ plot_weights_ggplot <- function(weighted_data, bin_col, vline_col,
) +
ggplot2::theme_bw() +
ggplot2::facet_wrap(~ind, nrow = 1) +
ggplot2::geom_text(data = legend_data, ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3) +
ggplot2::geom_text(
data = legend_data,
ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3
) +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 12),
axis.text = ggplot2::element_text(size = 12)
Expand Down Expand Up @@ -294,62 +311,18 @@ plot.maicplus_estimate_weights <- function(x, ggplot = FALSE,
#' @param processed_agd a data frame, object returned after using \code{\link{process_agd}} or
#' aggregated data following the same naming convention
#'
#' @examples
#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MikeJSeo Do you have the code to generate agd.rda? Could you add the code in data-raw/agd.R

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I just stored this data.frame manually:
agd <- data.frame(
STUDY = "Lung study",
ARM = "Total",
N = 300,
AGE_MEAN = 51,
AGE_MEDIAN = 49,
AGE_SD = 3.25,
SEX_MALE_PROP = 147/300,
ECOG0_PROP = 0.40,
ECOG0_COUNT = 40,
SMOKE_PROP = 58/(300-5),
N_PR_THER_MEDIAN = 2
)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we even need agd.rda? Let's just do this?
target_pop <- read.csv(system.file("extdata","aggregate_data_example_1.csv", package = "maicplus", mustWork =
agd <- process_agd(target_pop)

#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' outdata <- check_weights(match_res, processed_agd = agd)
#' print(outdata)
#'
#' @import DescTools
#'
#' @return data.frame of weighted and unweighted covariate averages of the IPD, and average of aggregate data
#' @export
#'
#' @examples
#' adsl <- read.csv(system.file("extdata", "adsl.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#' adrs <- read.csv(system.file("extdata", "adrs.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#' adtte <- read.csv(system.file("extdata", "adtte.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#'
#' ### AgD
#' # Baseline aggregate data for the comparator population
#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
#' package = "maicplus", mustWork = TRUE
#' ))
#' # target_pop2 <- read.csv(system.file("extdata", "aggregate_data_example_2.csv",
#' # package = "maicplus", mustWork = TRUE))
#' # target_pop3 <- read.csv(system.file("extdata", "aggregate_data_example_3.csv",
#' # package = "maicplus", mustWork = TRUE))
#'
#' # for time-to-event endpoints, pseudo IPD from digitalized KM
#' pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#'
#' #### prepare data ----------------------------------------------------------
#' target_pop <- process_agd(target_pop)
#' # target_pop2 <- process_agd(target_pop2) # demo of process_agd in different scenarios
#' # target_pop3 <- process_agd(target_pop3) # demo of process_agd in different scenarios
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
#'
#' match_res <- estimate_weights(
#' data = use_adsl,
#' centered_colnames = grep("_CENTERED$", names(use_adsl)),
#' start_val = 0,
#' method = "BFGS"
#' )
#'
#' check <- check_weights(
#' weighted_data = match_res,
#' processed_agd = target_pop
#' )
#'
#' print(check)
#'


check_weights <- function(weighted_data, processed_agd) {
ipd_with_weights <- weighted_data$data
match_cov <- weighted_data$centered_colnames
Expand All @@ -368,7 +341,8 @@ check_weights <- function(weighted_data, processed_agd) {
sum_centered_IPD_with_weights = as.vector(num_check)
)
attr(outdata, "footer") <- list()
ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE) # find item that was matched by mean
# find item that was matched by mean
ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE)
ind_mean <- sapply(ind_mean, function(ii) any(grepl("_MEAN$", ii)))
outdata$match_stat <- ifelse(grepl("_MEDIAN$", outdata$covariate), "Median",
ifelse(grepl("_SQUARED$", outdata$covariate), "SD",
Expand All @@ -384,7 +358,10 @@ check_weights <- function(weighted_data, processed_agd) {
covname <- outdata$covariate[ii]
if (outdata$match_stat[ii] %in% c("Mean", "Prop")) {
outdata$internal_trial[ii] <- mean(ipd_with_weights[[covname]], na.rm = TRUE)
outdata$internal_trial_after_weighted[ii] <- weighted.mean(ipd_with_weights[[covname]], w = ipd_with_weights$weights, na.rm = TRUE)
outdata$internal_trial_after_weighted[ii] <- weighted.mean(
ipd_with_weights[[covname]],
w = ipd_with_weights$weights, na.rm = TRUE
)
} else if (outdata$match_stat[ii] == "Median") {
outdata$internal_trial[ii] <- quantile(ipd_with_weights[[covname]],
probs = 0.5,
Expand Down Expand Up @@ -430,10 +407,14 @@ check_weights <- function(weighted_data, processed_agd) {
#' @param sd_digits number of digits for rounding mean columns in the output
#' @param digits minimal number of significant digits, see [print.default].
#' @param ... further arguments to [print.data.frame]
#'
#' @describeIn check_weights Print method for check_weights objects
#' @export
print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd_digits = 3, digits = getOption("digits"), ...) {

print.maicplus_check_weights <- function(x,
mean_digits = 2,
prop_digits = 2,
sd_digits = 3,
digits = getOption("digits"), ...) {
round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
round_digits[is.na(round_digits)] <- digits

Expand All @@ -451,15 +432,12 @@ print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd
}
}

#' Prints Note on Expected Sample Size Reduction
#' Note on Expected Sample Size Reduction
#'
#' @param width Number of characters to break string into new lines (`\n`).
#'
#' @return A character string
#' @export
#'
#' @examples
#' ess_footnote_text(width = 80)
#' @keywords internal
ess_footnote_text <- function(width = 0.9 * getOption("width")) {
text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals
(https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common
Expand Down
11 changes: 11 additions & 0 deletions R/process_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ process_agd <- function(raw_agd) {
#' @param dummize_cols vector of column names to binarize
#' @param dummize_ref_level vector of reference level of the variables to binarize
#'
#' @examples
#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#'
#' @return ipd with dummized columns
#' @export

Expand Down Expand Up @@ -144,6 +148,13 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
#' In this case, SEX_MALE should also be available in the aggregate data.
#' @param agd pre-processed aggregate data which contain STUDY, ARM, and N. Variable names should be followed
#' by legal suffixes (i.e. MEAN, MEDIAN, SD, or PROP). Note that COUNT suffix is no longer accepted.
#' @examples
#' ipd <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' ipd <- dummize_ipd(ipd, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv", package = "maicplus", mustWork = TRUE))
#' agd <- process_agd(target_pop)
#'
#' ipd_centered <- center_ipd(ipd = ipd, agd = agd)
#'
#' @return centered ipd using aggregate level data averages
#' @export
Expand Down
50 changes: 39 additions & 11 deletions R/survival-helper.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
#' helper function: makeup to get median survival time from a `survival::survfit` object
#'
#' Extract and display median survival time with confidence interval
#' extract and display median survival time with confidence interval
#'
#' @param km_fit returned object from \code{survival::survfit}
#' @param legend a character string, name used in 'type' column in returned data frame
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#'
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' medSurv <- medSurv_makeup(kmobj, legend = "before matching", time_scale = "day")
#'
#' @return a data frame with a index column 'type', median survival time and confidence interval
#' @export

medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)

Expand All @@ -29,11 +35,16 @@ medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {
}



#' Helper function to select a set of variables used for Kaplan-Meier plot
#' Helper function to select set of variables used for Kaplan-Meier plot
#'
#' @param km_fit returned object from \code{survival::survfit}
#' @return a list of data frames of variables from \code{survival::survfit}. Data frames are divided by treatment.
#'
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' survobj <- survfit_makeup(kmobj)
#'
#' @return a list of data frames of variables from survfit. Data frame is divided by treatment.
#' @export

survfit_makeup <- function(km_fit) {
Expand All @@ -57,13 +68,21 @@ survfit_makeup <- function(km_fit) {
#'
#' @param km_fit_before returned object from \code{survival::survfit} before adjustment
#' @param km_fit_after returned object from \code{survival::survfit} after adjustment
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#' @param trt a character string, name of the interested treatment in internal trial (real IPD)
#' @param trt_ext character string, name of the interested comparator in external trial used to
#' subset \code{dat_ext} (pseudo IPD)
#' @param endpoint_name a character string, name of the endpoint
#' @param time_scale time unit of median survival time, taking a value of 'year', 'month', 'week' or 'day'
#' @param trt internal trial treatment
#' @param trt_ext external trial treatment
#' @param endpoint_name name of the endpoint
#' @param line_col color of the line curves with the order of external, internal unadjusted, and internal adjusted
#'
#' @return a KM plot
#' @return a Kaplan-Meier plot
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' kmobj_adj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, weights = combined_data_tte$weight, conf.type = "log-log")
#' par(cex.main = 0.85)
#' km_plot(kmobj, kmobj_adj, time_scale = "month", trt = "A", trt_ext = "B", endpoint_name = "OS")
#' @export

km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext, endpoint_name = "") {
time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)

Expand Down Expand Up @@ -152,15 +171,20 @@ km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext
#'
#' a diagnosis plot for proportional hazard assumption, versus log-time (default) or time
#'
#' @param clldat object returned from \code{\link{survfit_makeup}}
#' @param km_fit returned object from \code{survival::survfit}
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#' @param log_time logical, TRUE (default) or FALSE
#' @param endpoint_name a character string, name of the endpoint
#' @param subtitle a character string, subtitle of the plot
#' @param exclude_censor logical, should censored data point be plotted
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' kmobj <- survfit(Surv(TIME, EVENT) ~ ARM, combined_data_tte, conf.type = "log-log")
#' log_cum_haz_plot(kmobj, time_scale = "month", log_time = TRUE, endpoint_name = "OS", subtitle = "(Before Matching)")
#'
#' @return a plot
#' @export

log_cum_haz_plot <- function(clldat,
gravesti marked this conversation as resolved.
Show resolved Hide resolved
time_scale,
log_time = TRUE,
Expand Down Expand Up @@ -222,6 +246,10 @@ log_cum_haz_plot <- function(clldat,
#' @param log_time logical, TRUE (default) or FALSE
#' @param endpoint_name a character string, name of the endpoint
#' @param subtitle a character string, subtitle of the plot
#' @examples
#' load(system.file("extdata", "combined_data_tte.rda", package = "maicplus", mustWork = TRUE))
#' unweighted_cox <- coxph(Surv(TIME, EVENT == 1) ~ ARM, data = combined_data_tte)
#' resid_plot(unweighted_cox, time_scale = "month", log_time = TRUE, endpoint_name = "OS", subtitle = "(Before Matching)")
#'
#' @return a plot
#' @export
Expand Down
5 changes: 4 additions & 1 deletion man/calculate_weights_legend.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions man/center_ipd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading