Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update roxygen header of 3 r files with examples #38

Merged
merged 18 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ images
^maicplus.*\.tar\.gz$
^maicplus.*\.tgz$
^inst/dev/MAIC/MAIC-main/.Rbuildignore
^data-raw$
^design$
3 changes: 1 addition & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
S3method(plot,maicplus_estimate_weights)
S3method(print,maicplus_check_weights)
export(bucher)
export(calculate_weights_legend)
export(center_ipd)
export(check_weights)
export(dummize_ipd)
export(ess_footnote_text)
export(estimate_weights)
export(generate_survival_data)
export(km_plot)
export(log_cum_haz_plot)
export(maic_tte_unanchor)
export(medSurv_makeup)
Expand Down
105 changes: 36 additions & 69 deletions R/matching.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Functions for matching step: estimation of individual weights

# functions to be exported ---------------------------------------

#' Derive individual weights in the matching step of MAIC
#'
#' This function takes individual patient data (IPD) with centered covariates
#' (effect modifiers and/or prognostic variables) as input and generates
#' weights for each individual in IPD trial to match the covariates in aggregate data.
#' Assuming data is properly processed, this function takes individual patient data (IPD) with centered covariates
#' (effect modifiers and/or prognostic variables) as input, and generates weights for each individual in IPD trial
#' to match the covariates in aggregate data.
#'
#' @param data a numeric matrix, centered covariates of IPD, no missing value in any cell is allowed
#' @param centered_colnames a character or numeric vector (column indicators) of centered covariates
Expand All @@ -24,6 +26,8 @@
#' \item{ess}{effective sample size, square of sum divided by sum of squares}
#' \item{opt}{R object returned by \code{base::optim()}, for assess convergence and other details}
#' }
#'
#' @examples
#' @export

estimate_weights <- function(data, centered_colnames = NULL, start_val = 0, method = "BFGS", ...) {
Expand Down Expand Up @@ -51,16 +55,16 @@

# prepare data for optimization
if (is.null(centered_colnames)) centered_colnames <- seq_len(ncol(data))
EM <- data[, centered_colnames, drop = FALSE]

Check warning on line 58 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=58,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
ind <- apply(EM, 1, function(xx) any(is.na(xx)))
nr_missing <- sum(ind)
EM <- as.matrix(EM[!ind, , drop = FALSE])

Check warning on line 61 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=61,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.

# objective and gradient functions
objfn <- function(alpha, X) {

Check warning on line 64 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=64,col=28,[object_name_linter] Variable and function name style should be snake_case or symbols.
sum(exp(X %*% alpha))
}
gradfn <- function(alpha, X) {

Check warning on line 67 in R/matching.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/matching.R,line=67,col=29,[object_name_linter] Variable and function name style should be snake_case or symbols.
colSums(sweep(X, 1, exp(X %*% alpha), "*"))
}

Expand Down Expand Up @@ -99,19 +103,23 @@
}


#' Plot MAIC weights in a histogram with key statistics in legend
#' Calculate Statistics for Weight Plot Legend
#'
#' Calculates ESS reduction and median weights which is used to create legend for weights plot
#'
#' @param weighted_data object returned after calculating weights using [estimate_weights]
#'
#' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count
#' @examples
#' \dontrun{
#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' calculate_weights_legend(weighted_data)
#' @export

#' }
#' @keywords internal
calculate_weights_legend <- function(weighted_data) {
if (!inherits(weighted_data, "maicplus_estimate_weights")) {
stop("weighted_data must be class `maicplus_estimate_weights` generated by estimate_weights()")
}
ess <- weighted_data$ess
wt <- weighted_data$data$weights
wt_scaled <- weighted_data$data$scaled_weights
Expand Down Expand Up @@ -231,7 +239,10 @@
) +
ggplot2::theme_bw() +
ggplot2::facet_wrap(~ind, nrow = 1) +
ggplot2::geom_text(data = legend_data, ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3) +
ggplot2::geom_text(
data = legend_data,
ggplot2::aes_string(label = "lab"), x = Inf, y = Inf, hjust = 1, vjust = 1, size = 3
) +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 12),
axis.text = ggplot2::element_text(size = 12)
Expand Down Expand Up @@ -294,62 +305,13 @@
#' @param processed_agd a data frame, object returned after using \code{\link{process_agd}} or
#' aggregated data following the same naming convention
#'
#' @examples
#' @import DescTools
#'
#' @return data.frame of weighted and unweighted covariate averages of the IPD, and average of aggregate data
#' @export
#'
#' @examples
#' adsl <- read.csv(system.file("extdata", "adsl.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#' adrs <- read.csv(system.file("extdata", "adrs.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#' adtte <- read.csv(system.file("extdata", "adtte.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#'
#' ### AgD
#' # Baseline aggregate data for the comparator population
#' target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv",
#' package = "maicplus", mustWork = TRUE
#' ))
#' # target_pop2 <- read.csv(system.file("extdata", "aggregate_data_example_2.csv",
#' # package = "maicplus", mustWork = TRUE))
#' # target_pop3 <- read.csv(system.file("extdata", "aggregate_data_example_3.csv",
#' # package = "maicplus", mustWork = TRUE))
#'
#' # for time-to-event endpoints, pseudo IPD from digitalized KM
#' pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv",
#' package = "maicplus",
#' mustWork = TRUE
#' ))
#'
#' #### prepare data ----------------------------------------------------------
#' target_pop <- process_agd(target_pop)
#' # target_pop2 <- process_agd(target_pop2) # demo of process_agd in different scenarios
#' # target_pop3 <- process_agd(target_pop3) # demo of process_agd in different scenarios
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' use_adsl <- center_ipd(ipd = adsl, agd = target_pop)
#'
#' match_res <- estimate_weights(
#' data = use_adsl,
#' centered_colnames = grep("_CENTERED$", names(use_adsl)),
#' start_val = 0,
#' method = "BFGS"
#' )
#'
#' check <- check_weights(
#' weighted_data = match_res,
#' processed_agd = target_pop
#' )
#'
#' print(check)
#'


check_weights <- function(weighted_data, processed_agd) {
ipd_with_weights <- weighted_data$data
match_cov <- weighted_data$centered_colnames
Expand All @@ -368,7 +330,8 @@
sum_centered_IPD_with_weights = as.vector(num_check)
)
attr(outdata, "footer") <- list()
ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE) # find item that was matched by mean
# find item that was matched by mean
ind_mean <- lapply(outdata$covariate, grep, x = names(processed_agd), value = TRUE)
ind_mean <- sapply(ind_mean, function(ii) any(grepl("_MEAN$", ii)))
outdata$match_stat <- ifelse(grepl("_MEDIAN$", outdata$covariate), "Median",
ifelse(grepl("_SQUARED$", outdata$covariate), "SD",
Expand All @@ -380,11 +343,14 @@
outdata$external_trial <- unlist(processed_agd[paste(outdata$covariate, toupper(outdata$match_stat), sep = "_")])

# fill in stat from unweighted and weighted IPD
for (ii in 1:nrow(outdata)) {
for (ii in seq_len(nrow(outdata))) {
covname <- outdata$covariate[ii]
if (outdata$match_stat[ii] %in% c("Mean", "Prop")) {
outdata$internal_trial[ii] <- mean(ipd_with_weights[[covname]], na.rm = TRUE)
outdata$internal_trial_after_weighted[ii] <- weighted.mean(ipd_with_weights[[covname]], w = ipd_with_weights$weights, na.rm = TRUE)
outdata$internal_trial_after_weighted[ii] <- weighted.mean(
ipd_with_weights[[covname]],
w = ipd_with_weights$weights, na.rm = TRUE
)
} else if (outdata$match_stat[ii] == "Median") {
outdata$internal_trial[ii] <- quantile(ipd_with_weights[[covname]],
probs = 0.5,
Expand Down Expand Up @@ -430,10 +396,14 @@
#' @param sd_digits number of digits for rounding mean columns in the output
#' @param digits minimal number of significant digits, see [print.default].
#' @param ... further arguments to [print.data.frame]
#'
#' @describeIn check_weights Print method for check_weights objects
#' @export
print.maicplus_check_weights <- function(x, mean_digits = 2, prop_digits = 2, sd_digits = 3, digits = getOption("digits"), ...) {

print.maicplus_check_weights <- function(x,
mean_digits = 2,
prop_digits = 2,
sd_digits = 3,
digits = getOption("digits"), ...) {
round_digits <- c("Mean" = mean_digits, "Prop" = prop_digits, "SD" = sd_digits)[x$match_stat]
round_digits[is.na(round_digits)] <- digits

Expand All @@ -451,15 +421,12 @@
}
}

#' Prints Note on Expected Sample Size Reduction
#' Note on Expected Sample Size Reduction
#'
#' @param width Number of characters to break string into new lines (`\n`).
#'
#' @return A character string
#' @export
#'
#' @examples
#' ess_footnote_text(width = 80)
#' @keywords internal
ess_footnote_text <- function(width = 0.9 * getOption("width")) {
text <- "An ESS reduction up to ~60% is not unexpected based on the 2021 survey of NICE's technology appraisals
(https://onlinelibrary.wiley.com/doi/full/10.1002/jrsm.1511), whereas a reduction of >75% is less common
Expand Down
16 changes: 15 additions & 1 deletion R/process_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@
#' @param dummize_cols vector of column names to binarize
#' @param dummize_ref_level vector of reference level of the variables to binarize
#'
#' @examples
#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#'
#' @return ipd with dummized columns
#' @export

Expand Down Expand Up @@ -140,10 +144,20 @@
#'
#' @param ipd IPD variable names should match the aggregate data names without the suffix.
#' This would involve either changing the aggregate data name or the ipd name.
#' For instance, if we binarize SEX variable with MALE as a reference using [dummize_ipd], function names the new variable as SEX_MALE.
#' For instance, if we binarize SEX variable with MALE as a reference using [dummize_ipd],
#' function names the new variable as SEX_MALE.
#' In this case, SEX_MALE should also be available in the aggregate data.
#' @param agd pre-processed aggregate data which contain STUDY, ARM, and N. Variable names should be followed
#' by legal suffixes (i.e. MEAN, MEDIAN, SD, or PROP). Note that COUNT suffix is no longer accepted.
#' @examples
#' ipd <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' ipd <- dummize_ipd(ipd, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' target_pop <- read.csv(
#' system.file("extdata", "aggregate_data_example_1.csv", package = "maicplus", mustWork = TRUE)
#' )
#' agd <- process_agd(target_pop)
#'
#' ipd_centered <- center_ipd(ipd = ipd, agd = agd)
#'
#' @return centered ipd using aggregate level data averages
#' @export
Expand Down Expand Up @@ -200,13 +214,13 @@

if (nrow(use_agd) < 2) stop("error in call complete_agd: need to have at least 2 rows that ARM!='total' ")

rowId <- nrow(use_agd) + 1

Check warning on line 217 in R/process_data.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/process_data.R,line=217,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
use_agd[rowId, ] <- NA
use_agd$STUDY[rowId] <- use_agd$STUDY[1]
use_agd$ARM[rowId] <- "total"

# complete N and count
NN <- use_agd[["N"]][rowId] <- sum(use_agd[["N"]], na.rm = TRUE)

Check warning on line 223 in R/process_data.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/process_data.R,line=223,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
nn <- use_agd[["N"]][-rowId]
for (i in grep("_COUNT$", names(use_agd), value = TRUE)) {
use_agd[[i]][rowId] <- sum(use_agd[[i]][-rowId], na.rm = TRUE)
Expand Down
54 changes: 32 additions & 22 deletions R/survival-helper.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
#' helper function: makeup to get median survival time from a `survival::survfit` object
#' Helper function: makeup to get median survival time from a `survival::survfit` object
#'
#' Extract and display median survival time with confidence interval
#'
#' @param km_fit returned object from \code{survival::survfit}
#' @param legend a character string, name used in 'type' column in returned data frame
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#'
#' @examples
#' @return a data frame with a index column 'type', median survival time and confidence interval
#' @export

medSurv_makeup <- function(km_fit, legend = "before matching", time_scale) {

Check warning on line 13 in R/survival-helper.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/survival-helper.R,line=13,col=1,[object_name_linter] Variable and function name style should be snake_case or symbols.
time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)

if (!time_scale %in% names(time_unit)) stop("time_scale has to be 'year', 'month', 'week' or 'day'")
Expand All @@ -29,11 +31,12 @@
}



#' Helper function to select a set of variables used for Kaplan-Meier plot
#' Helper function to select set of variables used for Kaplan-Meier plot
#'
#' @param km_fit returned object from \code{survival::survfit}
#' @return a list of data frames of variables from \code{survival::survfit}. Data frames are divided by treatment.
#'
#' @examples
#' @return a list of data frames of variables from survfit. Data frame is divided by treatment.
#' @export

survfit_makeup <- function(km_fit) {
Expand All @@ -57,14 +60,18 @@
#'
#' @param km_fit_before returned object from \code{survival::survfit} before adjustment
#' @param km_fit_after returned object from \code{survival::survfit} after adjustment
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#' @param trt a character string, name of the interested treatment in internal trial (real IPD)
#' @param trt_ext character string, name of the interested comparator in external trial used to
#' subset \code{dat_ext} (pseudo IPD)
#' @param endpoint_name a character string, name of the endpoint
#' @param time_scale time unit of median survival time, taking a value of 'year', 'month', 'week' or 'day'
#' @param trt internal trial treatment
#' @param trt_ext external trial treatment
#' @param endpoint_name name of the endpoint
#' @param line_col color of the line curves with the order of external, internal unadjusted, and internal adjusted
#'
#' @return a KM plot
km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext, endpoint_name = "") {
#' @return a Kaplan-Meier plot
#' @examples
#' @export

km_plot <- function(km_fit_before, km_fit_after = NULL, time_scale, trt, trt_ext, endpoint_name = "",
line_col = c("#5450E4", "#00857C", "#6ECEB2")) {
time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)

if (!time_scale %in% names(time_unit)) stop("time_scale has to be 'year', 'month', 'week' or 'day'")
Expand Down Expand Up @@ -97,48 +104,48 @@
# add km lines from external trial
lines(
y = pd_be[[trt_ext]]$surv,
x = (pd_be[[trt_ext]]$time / time_unit[[time_scale]]), col = "#5450E4",
x = (pd_be[[trt_ext]]$time / time_unit[[time_scale]]), col = line_col[1],
type = "s"
)
tmpid <- pd_be[[trt_ext]]$censor == 1
points(
y = pd_be[[trt_ext]]$surv[tmpid],
x = (pd_be[[trt_ext]]$time[tmpid] / time_unit[[time_scale]]),
col = "#5450E4", pch = 3, cex = 0.7
col = line_col[1], pch = 3, cex = 0.7
)

# add km lines from internal trial before adjustment
lines(
y = pd_be[[trt]]$surv,
x = (pd_be[[trt]]$time / time_unit[[time_scale]]), col = "#00857C",
x = (pd_be[[trt]]$time / time_unit[[time_scale]]), col = line_col[2],
type = "s"
)
tmpid <- pd_be[[trt]]$censor == 1
points(
y = pd_be[[trt]]$surv[tmpid],
x = (pd_be[[trt]]$time[tmpid] / time_unit[[time_scale]]),
col = "#00857C", pch = 3, cex = 0.7
col = line_col[2], pch = 3, cex = 0.7
)

# add km lines from internal trial after adjustment
if (!is.null(km_fit_after)) {
lines(
y = pd_af[[trt]]$surv,
x = (pd_af[[trt]]$time / time_unit[[time_scale]]), col = "#6ECEB2", lty = 2,
x = (pd_af[[trt]]$time / time_unit[[time_scale]]), col = line_col[3], lty = 2,
type = "s"
)
tmpid <- pd_af[[trt]]$censor == 1
points(
y = pd_af[[trt]]$surv[tmpid],
x = (pd_af[[trt]]$time[tmpid] / time_unit[[time_scale]]),
col = "#6ECEB2", pch = 3, cex = 0.7
col = line_col[3], pch = 3, cex = 0.7
)
}

use_leg <- 1:ifelse(is.null(km_fit_after), 2, 3)
# add legend
legend("topright",
bty = "n", lty = c(1, 1, 2)[use_leg], cex = 0.8, col = c("#5450E4", "#00857C", "#6ECEB2")[use_leg],
bty = "n", lty = c(1, 1, 2)[use_leg], cex = 0.8, col = line_col[use_leg],
legend = c(
paste0("Comparator: ", trt_ext),
paste0("Treatment: ", trt),
Expand All @@ -152,16 +159,17 @@
#'
#' a diagnosis plot for proportional hazard assumption, versus log-time (default) or time
#'
#' @param clldat object returned from \code{\link{survfit_makeup}}
#' @param km_fit returned object from \code{survival::survfit}
#' @param time_scale a character string, 'year', 'month', 'week' or 'day', time unit of median survival time
#' @param log_time logical, TRUE (default) or FALSE
#' @param endpoint_name a character string, name of the endpoint
#' @param subtitle a character string, subtitle of the plot
#' @param exclude_censor logical, should censored data point be plotted
#'
#' @examples
#' @return a plot
#' @export
log_cum_haz_plot <- function(clldat,

log_cum_haz_plot <- function(km_fit,
time_scale,
log_time = TRUE,
endpoint_name = "",
Expand All @@ -170,12 +178,14 @@
time_unit <- list("year" = 365.24, "month" = 30.4367, "week" = 7, "day" = 1)
if (!time_scale %in% names(time_unit)) stop("time_scale has to be 'year', 'month', 'week' or 'day'")

clldat <- survfit_makeup(km_fit)

if (exclude_censor) {
clldat <- lapply(clldat, function(xxt) xxt[xxt$censor == 0, , drop = FALSE])
}

all.times <- do.call(rbind, clldat)$time / time_unit[[time_scale]]

Check warning on line 187 in R/survival-helper.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/survival-helper.R,line=187,col=3,[object_name_linter] Variable and function name style should be snake_case or symbols.
if (log_time) all.times <- log(all.times)

Check warning on line 188 in R/survival-helper.R

View workflow job for this annotation

GitHub Actions / SuperLinter 🦸‍♀️ / SuperLinter 🦸‍♂️

file=/github/workspace/R/survival-helper.R,line=188,col=17,[object_name_linter] Variable and function name style should be snake_case or symbols.
t_range <- range(all.times)
y_range <- range(log(do.call(rbind, clldat)$cumhaz))

Expand Down Expand Up @@ -222,7 +232,7 @@
#' @param log_time logical, TRUE (default) or FALSE
#' @param endpoint_name a character string, name of the endpoint
#' @param subtitle a character string, subtitle of the plot
#'
#' @examples
#' @return a plot
#' @export

Expand Down
Loading
Loading