diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 47798fb1..9beaf36a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,7 @@ repos: - shiny - lubridate - DescTools + - lmtest # codemeta must be above use-tidy-description when both are used # - id: codemeta-description-updated - id: use-tidy-description diff --git a/NAMESPACE b/NAMESPACE index 081263e9..5933373f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(basic_kmplot) export(basic_kmplot2) export(bootstrap_HR) export(bucher) +export(calculate_weights_legend) export(center_ipd) export(check_weights) export(dummize_ipd) diff --git a/R/data.R b/R/data.R new file mode 100644 index 00000000..0990ef58 --- /dev/null +++ b/R/data.R @@ -0,0 +1,220 @@ +# unanchored datasets ------ + +#' Patient data from single arm study +#' @format a data frame with 500 rows and 8 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm.} +#' \item{AGE}{Age in years at baseline.} +#' \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.} +#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.} +#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.} +#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.} +#' \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.} +#' +#' } +#' @keywords dataset +#' @family unanchored datasets +"adsl_sat" + +#' Survival data from single arm trial +#' @format A data frame with 500 rows and 10 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm, `"A"`.} +#' \item{AVAL}{Analysis value which in this dataset overall survival time in days.} +#' \item{AVALU}{Unit of `AVAL`.} +#' \item{PARAMCD}{Paramater code of `AVAL`, `"OS"`.} +#' \item{PARAM}{Parameter name of `AVAL`, `"Overall Survival`.} +#' \item{CNSR}{Censoring indicator `0`/`1`.} +#' \item{TIME}{Survival time in days.} +#' \item{EVENT}{Event indicator `0`/`1`.} +#' } +#' @family unanchored datasets +#' @keywords dataset +"adtte_sat" + + +#' Pseudo individual patient survival data from published study +#' @format A data frame with 300 rows and 3 columns: +#' \describe{ +#' \item{Time}{Survival time in days.} +#' \item{Event}{Event indicator `0`/`1`.} +#' \item{ARM}{Assigned treatment arm, `"B"`.} +#' } +#' @family unanchored datasets +#' @keywords dataset +"pseudo_ipd_sat" + + +#' Centered patient data from single arm trial +#' @format A data frame with 500 rows and 14 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm.} +#' \item{AGE}{Age in years at baseline.} +#' \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.} +#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.} +#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.} +#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.} +#' \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.} +#' \item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data [agd].} +#' \item{AGE_MEDIAN_CENTERED}{`AGE` greater/less than `MEDIAN_AGE` in [agd] coded as `1`/`0` and then centered at +#' 0.5.} +#' \item{AGE_SQUARED_CENTERED}{`AGE` squared and centered with respect to the `AGE` in [agd]. The squared age in the +#' aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.} +#' \item{SEX_MALE_CENTERED}{`SEX_MALE` centered by the proportion of male patients in [agd]} +#' \item{ECOG0_CENTERED}{`ECOG0` centered by the proportion of `ECOG0` in [agd]} +#' \item{SMOKE_CENTERED}{`SMOKE` centered by the proportion of `SMOKE` in [agd]} +#' \item{N_PR_THER_MEDIAN_CENTERED}{`N_PR_THER` centered by the median in [agd].} +#' } +#' @family unanchored datasets +#' @keywords dataset +"centered_ipd_sat" + +#' Binary outcome data from single arm trial +#' @format A data frame with 500 rows and 5 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm.} +#' \item{AVAL}{Analysis value, in this dataset an indicator of response.} +#' \item{PARAM}{Parameter type of `AVAL`.} +#' \item{RESPONSE}{Indicator of response.} +#' } +#' @family unanchored datasets +#' @keywords dataset +"adrs_sat" + +#' Weighted object for single arm trial data +#' @format A `maicplus_estimate_weights` object created by [estimate_weights()] containing +#' \describe{ +#' \item{data}{patient level data with weights} +#' \item{centered_colnames}{Columns used in MAIC} +#' \item{nr_missing}{Number of observations with missing data} +#' \item{ess}{Expected sample size} +#' \item{opt}{Information from `optim` from weight calculation} +#' \item{boot}{Parameters and bootstrap sample weights, `NULL` in this object} +#' } +#' @family unanchored datasets +#' @keywords dataset +"weighted_sat" + +# aggregate data ------ + +#' Aggregate effect modifier data from published study +#' +#' This data is formatted to be used in [center_ipd()]. +#' +#' @format A data frame with 3 rows and 9 columns: +#' \describe{ +#' \item{STUDY}{The study name, Study_XXXX} +#' \item{ARM}{Study arm name or total} +#' \item{N}{Number of observations in study arm} +#' \item{AGE_MEAN}{Mean age in study arm} +#' \item{AGE_MEDIAN}{Median age in study arm} +#' \item{AGE_SD}{Standard deviation of age in study arm} +#' \item{SEX_MALE_COUNT}{Number of male patients} +#' \item{ECOG0_COUNT}{Number of patients with ECOG score = 0} +#' \item{SMOKE_COUNT}{Number of smokers} +#' \item{N_PR_THER_MEDIAN}{Median number of prior therapies} +#' } +#' @family unanchored datasets +#' @family anchored datasets +#' @keywords dataset +"agd" + + +# anchored datasets ------- + +#' Patient data from two arm trial +#' @format A data frame with 1000 rows and 8 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm.} +#' \item{AGE}{Age in years at baseline.} +#' \item{SEX}{Sex of patient recorded as character "Male"/"Female"} +#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.} +#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.} +#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.} +#' \item{SEX_MALE}{Indicator of SEX == "Male" as numeric 1/0} +#' } +#' @family anchored datasets +#' @keywords dataset +"adsl_twt" + + +#' Survival data from two arm trial +#' @format A data frame with 1000 rows and 10 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm, `"A"`, `"C"`.} +#' \item{AVAL}{Analysis value which in this dataset overall survival time in days.} +#' \item{AVALU}{Unit of `AVAL`.} +#' \item{PARAMCD}{Parameter code of `AVAL`, `"OS"`.} +#' \item{PARAM}{Parameter name of `AVAL`, `"Overall Survival`.} +#' \item{CNSR}{Censoring indicator `0`/`1`.} +#' \item{TIME}{Survival time in days.} +#' \item{EVENT}{Event indicator `0`/`1`.} +#' } +#' @family anchored datasets +#' @keywords dataset +"adtte_twt" + +#' Binary outcome data from two arm trial +#' @format A data frame with 1000 rows and 5 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm, `"A"`, `"C"`.} +#' \item{AVAL}{Analysis value, in this dataset an indicator of response.} +#' \item{PARAM}{Parameter type of `AVAL`.} +#' \item{RESPONSE}{Indicator of response.} +#' } +"adrs_twt" + +#' Pseudo individual patient survival data from published two arm study +#' @format A data frame with 800 rows and 3 columns: +#' \describe{ +#' \item{Time}{Survival time in days.} +#' \item{Event}{Event indicator `0`/`1`.} +#' \item{ARM}{Assigned treatment arm, `"B"`, `"C"`.} +#' } +#' @family anchored datasets +#' @keywords dataset +"pseudo_ipd_twt" + + +#' Centered patient data from two arm trial +#' @format A data frame with 1000 rows and 14 columns: +#' \describe{ +#' \item{USUBJID}{Unique subject identifiers for patients.} +#' \item{ARM}{Assigned treatment arm.} +#' \item{AGE}{Age in years at baseline.} +#' \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.} +#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.} +#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.} +#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.} +#' \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.} +#' \item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data [agd].} +#' \item{AGE_MEDIAN_CENTERED}{`AGE` greater/less than `MEDIAN_AGE` in [agd] coded as `1`/`0` and then centered at +#' 0.5.} +#' \item{AGE_SQUARED_CENTERED}{`AGE` squared and centered with respect to the `AGE` in [agd]. The squared age in the +#' aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.} +#' \item{SEX_MALE_CENTERED}{`SEX_MALE` centered by the proportion of male patients in [agd]} +#' \item{ECOG0_CENTERED}{`ECOG0` centered by the proportion of `ECOG0` in [agd]} +#' \item{SMOKE_CENTERED}{`SMOKE` centered by the proportion of `SMOKE` in [agd]} +#' \item{N_PR_THER_MEDIAN_CENTERED}{`N_PR_THER` centered by the median in [agd].} +#' } +#' @keywords dataset +#' @family anchored datasets +"centered_ipd_twt" + + +if (FALSE) { + make_roxygen_data <- function(df) { + cn <- colnames(df) + cat("#' @format A data frame with", nrow(df), "rows and", ncol(df), "columns:\n") + cat("#' \\describe{\n") + for (i in cn) cat("#' \\item{", i, "}{}\n", sep = "") + cat("#' }") + } +} diff --git a/R/matching.R b/R/matching.R index a85f1822..fcf24f9d 100644 --- a/R/matching.R +++ b/R/matching.R @@ -37,14 +37,19 @@ #' } #' #' @examples -#' load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE)) -#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) -#' ipd_centered <- center_ipd(ipd = ipd, agd = agd) -#' -#' centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN") -#' centered_colnames <- paste0(centered_colnames, "_CENTERED") +#' data(agd) +#' data(adsl_sat) +#' ipd_centered <- center_ipd(ipd = adsl_sat, agd = process_agd(agd)) +#' centered_colnames <- grep("_CENTERED", colnames(ipd_centered), value = TRUE) +#' centered_colnames #' weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames) -#' +#' \donttest{ +#' # To later estimate bootstrap confidence intervals, we calculate the weights +#' # for the bootstrap samples: +#' weighted_data_boot <- estimate_weights( +#' data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 500 +#' ) +#' } #' @export estimate_weights <- function(data, @@ -199,10 +204,9 @@ optimise_weights <- function(matrix, #' #' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count #' @examples -#' \dontrun{ -#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE)) -#' calculate_weights_legend(weighted_data) -#' } +#' data("weighted_sat") +#' calculate_weights_legend(weighted_sat) +#' @export #' @keywords internal calculate_weights_legend <- function(weighted_data) { @@ -362,11 +366,11 @@ plot_weights_ggplot <- function(weighted_data, bin_col, vline_col, #' @param bins (`ggplot` only) number of bin parameter to use #' #' @examples -#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE)) -#' plot(weighted_data) +#' plot(weighted_sat) #' -#' library(ggplot2) -#' plot(weighted_data, ggplot = TRUE) +#' if (requireNamespace("ggplot2")) { +#' plot(weighted_sat, ggplot = TRUE) +#' } #' @describeIn estimate_weights Plot method for estimate_weights objects #' @export @@ -397,9 +401,7 @@ plot.maicplus_estimate_weights <- function(x, ggplot = FALSE, #' aggregated data following the same naming convention #' #' @examples -#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE)) -#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) -#' check_weights(weighted_data, agd) +#' check_weights(weighted_sat, process_agd(agd)) #' #' @import DescTools #' diff --git a/R/process_data.R b/R/process_data.R index b11dfade..d125835b 100644 --- a/R/process_data.R +++ b/R/process_data.R @@ -114,8 +114,8 @@ process_agd <- function(raw_agd) { #' @param dummize_ref_level vector of reference level of the variables to binarize #' #' @examples -#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE)) -#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +#' data(adsl_twt) +#' dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Male")) #' #' @return ipd with dummized columns #' @export @@ -152,8 +152,8 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) { #' suffix is no longer accepted. #' @examples #' # load in IPD -#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE)) -#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +#' data(adsl_sat) +#' adsl <- dummize_ipd(adsl_sat, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) #' #' # Reading aggregate data by Excel #' target_pop <- read.csv( @@ -162,7 +162,7 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) { #' agd <- process_agd(target_pop) #' #' # Alternatively, you can specify aggregate data manually in data frame -#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) +#' data(agd) #' ipd_centered <- center_ipd(ipd = adsl, agd = agd) #' #' @return centered ipd using aggregate level data averages diff --git a/data-raw/dummy_anchored.R b/data-raw/dummy_anchored.R new file mode 100644 index 00000000..19fd99ee --- /dev/null +++ b/data-raw/dummy_anchored.R @@ -0,0 +1,80 @@ +#### create anchored example datasets #### + +devtools::load_all() +library(flexsurv) +set.seed(2024) + +# create adsl_twt +adsl <- read.csv(system.file("extdata", "adsl.csv", + package = "maicplus", + mustWork = TRUE +)) +adsl$X <- NULL +adsl$USUBJID <- paste0("xx", adsl$USUBJID) +adsl2 <- adsl +adsl2$ARM <- "C" +adsl2$USUBJID <- sample(size = nrow(adsl2), paste0("yy", adsl2$USUBJID), replace = FALSE) +adsl2 <- adsl2[order(adsl2$USUBJID), ] + +adsl_twt <- rbind(adsl, adsl2) + +# create adtte_twt +adtte <- read.csv(system.file("extdata", "adtte.csv", + package = "maicplus", + mustWork = TRUE +)) +adtte$TIME <- adtte$AVAL +adtte$EVENT <- 1 - adtte$CNSR +adtte$USUBJID <- paste0("xx", adtte$USUBJID) + +adtte2 <- adtte +adtte2$ARM <- "C" +adtte2$TIME <- adtte2$TIME * runif(nrow(adtte2), 0.15, 0.3) +fit_C <- flexsurv::flexsurvspline(formula = Surv(TIME, EVENT) ~ 1, data = adtte2, k = 3) +tmp <- simulate(fit_C, nsim = 1, seed = 1234, newdata = adtte2, censtime = max(adtte$TIME)) +adtte2$TIME <- tmp$time_1 +adtte2$EVENT <- tmp$event_1 +adtte2$USUBJID <- paste0("yy", adtte2$USUBJID) + +adtte_twt <- rbind(adtte, adtte2) +adtte_twt$EVNT <- NULL + +### Binary +adrs_twt1 <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE)) +adrs_twt1$USUBJID <- paste0("xx", adrs_twt1$USUBJID) +adrs_twt1$RESPONSE <- adrs_twt1$AVAL + +adrs_twt2 <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE)) +adrs_twt2$ARM <- "C" +adrs_twt2$AVAL <- adrs_twt2$RESPONSE <- rbinom(nrow(adrs_twt2), size = 1, prob = 0.68) +adrs_twt2$USUBJID <- paste0("yy", adrs_twt2$USUBJID) + +adrs_twt <- rbind(adrs_twt1, adrs_twt2) + +# Make sure that agd is up-to-date! +data("agd") + +# create pseudo_ipd_twt +pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv", + package = "maicplus", + mustWork = TRUE +)) +pseudo_ipd$ARM <- "B" +pseudo_ipd2 <- adtte2[, c("TIME", "EVENT", "ARM")] +names(pseudo_ipd2) <- c("Time", "Event", "ARM") +tmp <- simulate(fit_C, nsim = 1, seed = 4321, newdata = adtte2, censtime = max(pseudo_ipd$Time)) +pseudo_ipd2$Time <- tmp$time_1 +pseudo_ipd2$Event <- tmp$event_1 + +pseudo_ipd_twt <- rbind(pseudo_ipd, pseudo_ipd2) + +# create centered adsl_twt +agd <- process_agd(agd) +adsl_twt <- dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +centered_ipd_twt <- center_ipd(ipd = adsl_twt, agd = agd) + + +### Output +usethis::use_data(adsl_twt, adtte_twt, pseudo_ipd_twt, centered_ipd_twt, adrs_twt, + internal = FALSE, overwrite = TRUE +) diff --git a/data-raw/dummy_unanchored.R b/data-raw/dummy_unanchored.R new file mode 100644 index 00000000..198ee4a2 --- /dev/null +++ b/data-raw/dummy_unanchored.R @@ -0,0 +1,53 @@ +#### create datasets for unanchored case #### +## adsl_sat, adtte_sat, adrs_sat, agd (AgD of effect modifiers), pseudo_ipd_sat (AgD, tte data) + +devtools::load_all() +# Read in relevant ADaM data and rename variables of interest +adsl_sat <- read.csv(system.file("extdata", "adsl.csv", + package = "maicplus", + mustWork = TRUE +)) +adsl_sat$X <- NULL +adtte_sat <- read.csv(system.file("extdata", "adtte.csv", + package = "maicplus", + mustWork = TRUE +)) +adtte_sat$TIME <- adtte_sat$AVAL +adtte_sat$EVENT <- 1 - adtte_sat$CNSR +adtte_sat <- adtte_sat[adtte_sat$ARM == "A", , drop = FALSE] +adtte_sat$EVNT <- NULL + +adsl_sat <- adsl_sat[adsl_sat$USUBJID %in% adtte_sat$USUBJID, , drop = FALSE] + + +### AgD +# Baseline aggregate data for the comparator population +agd <- read.csv(system.file("extdata", "aggregate_data_example_1.csv", + package = "maicplus", mustWork = TRUE +)) +# for time-to-event endpoints, pseudo IPD from digitalized KM +pseudo_ipd_sat <- read.csv(system.file("extdata", "psuedo_IPD.csv", + package = "maicplus", + mustWork = TRUE +)) +pseudo_ipd_sat$ARM <- "B" + +### Centered IPD +agd_sat <- process_agd(agd) +adsl_sat <- dummize_ipd(adsl_sat, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +centered_ipd_sat <- center_ipd(ipd = adsl_sat, agd = agd_sat) + +### Binary +adrs_sat <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE)) +adrs_sat$RESPONSE <- adrs_sat$AVAL + +## Example weighted TTE +ipd_centered <- center_ipd(ipd = adsl_sat, agd = agd_sat) +centered_colnames <- paste0(c("AGE", "AGE_MEDIAN", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE"), "_CENTERED") +weighted_sat <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames) + + +### Output +usethis::use_data(adsl_sat, adtte_sat, agd, pseudo_ipd_sat, centered_ipd_sat, adrs_sat, weighted_sat, + internal = FALSE, overwrite = TRUE +) diff --git a/data/adrs_sat.rda b/data/adrs_sat.rda new file mode 100644 index 00000000..7a7b5e1b Binary files /dev/null and b/data/adrs_sat.rda differ diff --git a/data/adrs_twt.rda b/data/adrs_twt.rda new file mode 100644 index 00000000..b202158b Binary files /dev/null and b/data/adrs_twt.rda differ diff --git a/data/adsl_sat.rda b/data/adsl_sat.rda new file mode 100644 index 00000000..a770c8b0 Binary files /dev/null and b/data/adsl_sat.rda differ diff --git a/data/adsl_twt.rda b/data/adsl_twt.rda new file mode 100644 index 00000000..7d9c10c4 Binary files /dev/null and b/data/adsl_twt.rda differ diff --git a/data/adtte_sat.rda b/data/adtte_sat.rda new file mode 100644 index 00000000..3a64dcc1 Binary files /dev/null and b/data/adtte_sat.rda differ diff --git a/data/adtte_twt.rda b/data/adtte_twt.rda new file mode 100644 index 00000000..35da640e Binary files /dev/null and b/data/adtte_twt.rda differ diff --git a/data/agd.rda b/data/agd.rda new file mode 100644 index 00000000..394384be Binary files /dev/null and b/data/agd.rda differ diff --git a/data/centered_ipd_sat.rda b/data/centered_ipd_sat.rda new file mode 100644 index 00000000..897a60e1 Binary files /dev/null and b/data/centered_ipd_sat.rda differ diff --git a/data/centered_ipd_twt.rda b/data/centered_ipd_twt.rda new file mode 100644 index 00000000..8d04e22e Binary files /dev/null and b/data/centered_ipd_twt.rda differ diff --git a/data/pseudo_ipd_sat.rda b/data/pseudo_ipd_sat.rda new file mode 100644 index 00000000..4d9f29cb Binary files /dev/null and b/data/pseudo_ipd_sat.rda differ diff --git a/data/pseudo_ipd_twt.rda b/data/pseudo_ipd_twt.rda new file mode 100644 index 00000000..758778bf Binary files /dev/null and b/data/pseudo_ipd_twt.rda differ diff --git a/data/weighted_sat.rda b/data/weighted_sat.rda new file mode 100644 index 00000000..4938e68a Binary files /dev/null and b/data/weighted_sat.rda differ diff --git a/inst/WORDLIST b/inst/WORDLIST index 801bb23e..9f5dac06 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -31,6 +31,7 @@ Kaplan MAIC multivariable optim +Paramater pharma Phillippo phillippo @@ -44,6 +45,7 @@ sd Signorovitch signorovitch Texp +THER TSD TTE tte @@ -54,3 +56,4 @@ unscaled unstacked USUBJID vcovHC +XXXX diff --git a/inst/examples/kmplot_anchored_ex.R b/inst/examples/kmplot_anchored_ex.R index 514d2e24..3112eb56 100644 --- a/inst/examples/kmplot_anchored_ex.R +++ b/inst/examples/kmplot_anchored_ex.R @@ -2,40 +2,19 @@ ### IPD # Read in relevant ADaM data and rename variables of interest -adsl <- read.csv(system.file("extdata", "adsl.csv", - package = "maicplus", - mustWork = TRUE -)) -adtte <- read.csv(system.file("extdata", "adtte.csv", - package = "maicplus", - mustWork = TRUE -)) -adtte$TIME <- adtte$AVAL -adtte$EVENT <- adtte$EVNT -adtte2 <- adtte -adtte2$ARM <- "C" -adtte2$TIME <- adtte2$TIME + 7 -adtte <- rbind(adtte, adtte2) +adsl_twt +adtte_twt ### AgD # Baseline aggregate data for the comparator population -target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv", - package = "maicplus", mustWork = TRUE -)) +agd + # for time-to-event endpoints, pseudo IPD from digitalized KM -pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv", - package = "maicplus", - mustWork = TRUE -)) -pseudo_ipd$ARM <- "B" -pseudo_ipd2 <- pseudo_ipd -pseudo_ipd2$ARM <- "C" -pseudo_ipd2$Time <- pseudo_ipd2$Time + 5 -pseudo_ipd <- rbind(pseudo_ipd, pseudo_ipd2) +pseudo_ipd_twt #### prepare data -target_pop <- process_agd(target_pop) -adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +target_pop <- process_agd(agd) +adsl <- dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) use_adsl <- center_ipd(ipd = adsl, agd = target_pop) #### derive weights @@ -49,9 +28,9 @@ match_res <- estimate_weights( # plot by trial kmplot( weights_object = match_res, - tte_ipd = adtte, + tte_ipd = adtte_twt, trt_var_ipd = "ARM", - tte_pseudo_ipd = pseudo_ipd, + tte_pseudo_ipd = pseudo_ipd_twt, trt_var_agd = "ARM", endpoint_name = "Overall Survival", trt_ipd = "A", @@ -71,9 +50,9 @@ kmplot( # plot by arm kmplot( weights_object = match_res, - tte_ipd = adtte, + tte_ipd = adtte_twt, trt_var_ipd = "ARM", - tte_pseudo_ipd = pseudo_ipd, + tte_pseudo_ipd = pseudo_ipd_twt, trt_var_agd = "ARM", endpoint_name = "Overall Survival", trt_ipd = "A", @@ -92,9 +71,9 @@ kmplot( # plot all kmplot( weights_object = match_res, - tte_ipd = adtte, + tte_ipd = adtte_twt, trt_var_ipd = "ARM", - tte_pseudo_ipd = pseudo_ipd, + tte_pseudo_ipd = pseudo_ipd_twt, trt_var_agd = "ARM", endpoint_name = "Overall Survival", trt_ipd = "A", diff --git a/inst/examples/maic_unanchored_binary_ex.R b/inst/examples/maic_unanchored_binary_ex.R index 2baeccce..edcdb995 100644 --- a/inst/examples/maic_unanchored_binary_ex.R +++ b/inst/examples/maic_unanchored_binary_ex.R @@ -1,19 +1,18 @@ -# load in prognostic IPD data and AgD -load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE)) -load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) -ipd_centered <- center_ipd(ipd = ipd, agd = agd) +# load in centered prognostic IPD data -# estimate weights -centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN") -centered_colnames <- paste0(centered_colnames, "_CENTERED") +centered_ipd_sat +centered_colnames <- grep("_CENTERED$", colnames(centered_ipd_sat), value = TRUE) +weighted_data <- estimate_weights(data = centered_ipd_sat, centered_colnames = centered_colnames) +weighted_data2 <- estimate_weights( + data = centered_ipd_sat, centered_colnames = centered_colnames, + n_boot_iteration = 500 +) -weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames) -weighted_data2 <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 400) +# binary IPD +adrs_sat -# get dummy binary IPD -adrs <- read.csv(system.file("extdata", "adrs.csv", package = "maicplus", mustWork = TRUE)) -adrs$RESPONSE <- adrs$AVAL +# get dummy binary IPD pseudo_adrs <- get_pseudo_ipd_binary( binary_agd = data.frame( ARM = rep("B", 2), @@ -26,7 +25,7 @@ pseudo_adrs <- get_pseudo_ipd_binary( # unanchored binary MAIC, with CI based on sandwich estimator maic_unanchored( weights_object = weighted_data, - ipd = adrs, + ipd = adrs_sat, pseudo_ipd = pseudo_adrs, trt_ipd = "A", trt_agd = "B", @@ -42,7 +41,7 @@ maic_unanchored( # unanchored binary MAIC, with bootstrapped CI maic_unanchored( weights_object = weighted_data2, - ipd = adrs, + ipd = adrs_sat, pseudo_ipd = pseudo_adrs, trt_ipd = "A", trt_agd = "B", diff --git a/inst/extdata/aggregate_data_example_1.csv b/inst/extdata/aggregate_data_example_1.csv index 32d705b2..a1128627 100644 --- a/inst/extdata/aggregate_data_example_1.csv +++ b/inst/extdata/aggregate_data_example_1.csv @@ -1,4 +1,4 @@ -STUDY,ARM,N,AGE_MEAN,AGE_MEDIAN,AGE_SD,SEX_MALE_COUNT,ECOG0_COUNT,SMOKE_COUNT -Study_XXXX,Total,300,51,49,3.25,147,105,58 -Study_XXXX,Intervention,,,,,,, -Study_XXXX,Comparator,,,,,,, +STUDY,ARM,N,AGE_MEAN,AGE_MEDIAN,AGE_SD,SEX_MALE_COUNT,ECOG0_COUNT,SMOKE_COUNT,N_PR_THER_MEDIAN +Study_XXXX,Total,300,51,49,3.25,147,105,58,2 +Study_XXXX,Intervention,,,,,,,, +Study_XXXX,Comparator,,,,,,,, diff --git a/man/adrs_sat.Rd b/man/adrs_sat.Rd new file mode 100644 index 00000000..e015731f --- /dev/null +++ b/man/adrs_sat.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{adrs_sat} +\alias{adrs_sat} +\title{Binary outcome data from single arm trial} +\format{ +A data frame with 500 rows and 5 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm.} +\item{AVAL}{Analysis value, in this dataset an indicator of response.} +\item{PARAM}{Parameter type of \code{AVAL}.} +\item{RESPONSE}{Indicator of response.} +} +} +\usage{ +adrs_sat +} +\description{ +Binary outcome data from single arm trial +} +\seealso{ +Other unanchored datasets: +\code{\link{adsl_sat}}, +\code{\link{adtte_sat}}, +\code{\link{agd}}, +\code{\link{centered_ipd_sat}}, +\code{\link{pseudo_ipd_sat}}, +\code{\link{weighted_sat}} +} +\concept{unanchored datasets} +\keyword{dataset} diff --git a/man/adrs_twt.Rd b/man/adrs_twt.Rd new file mode 100644 index 00000000..11105141 --- /dev/null +++ b/man/adrs_twt.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{adrs_twt} +\alias{adrs_twt} +\title{Binary outcome data from two arm trial} +\format{ +A data frame with 1000 rows and 5 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm, \code{"A"}, \code{"C"}.} +\item{AVAL}{Analysis value, in this dataset an indicator of response.} +\item{PARAM}{Parameter type of \code{AVAL}.} +\item{RESPONSE}{Indicator of response.} +} +} +\usage{ +adrs_twt +} +\description{ +Binary outcome data from two arm trial +} +\keyword{datasets} diff --git a/man/adsl_sat.Rd b/man/adsl_sat.Rd new file mode 100644 index 00000000..bf6c48db --- /dev/null +++ b/man/adsl_sat.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{adsl_sat} +\alias{adsl_sat} +\title{Patient data from single arm study} +\format{ +a data frame with 500 rows and 8 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm.} +\item{AGE}{Age in years at baseline.} +\item{SEX}{Sex of patient recorded as character \code{"Male"}/\code{"Female"}.} +\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.} +\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.} +\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.} +\item{SEX_MALE}{Indicator of \code{SEX == "Male"} as numeric \code{1}/\code{0}.} + +} +} +\usage{ +adsl_sat +} +\description{ +Patient data from single arm study +} +\seealso{ +Other unanchored datasets: +\code{\link{adrs_sat}}, +\code{\link{adtte_sat}}, +\code{\link{agd}}, +\code{\link{centered_ipd_sat}}, +\code{\link{pseudo_ipd_sat}}, +\code{\link{weighted_sat}} +} +\concept{unanchored datasets} +\keyword{dataset} diff --git a/man/adsl_twt.Rd b/man/adsl_twt.Rd new file mode 100644 index 00000000..6ef07138 --- /dev/null +++ b/man/adsl_twt.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{adsl_twt} +\alias{adsl_twt} +\title{Patient data from two arm trial} +\format{ +A data frame with 1000 rows and 8 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm.} +\item{AGE}{Age in years at baseline.} +\item{SEX}{Sex of patient recorded as character "Male"/"Female"} +\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.} +\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.} +\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.} +\item{SEX_MALE}{Indicator of SEX == "Male" as numeric 1/0} +} +} +\usage{ +adsl_twt +} +\description{ +Patient data from two arm trial +} +\seealso{ +Other anchored datasets: +\code{\link{adtte_twt}}, +\code{\link{agd}}, +\code{\link{centered_ipd_twt}}, +\code{\link{pseudo_ipd_twt}} +} +\concept{anchored datasets} +\keyword{dataset} diff --git a/man/adtte_sat.Rd b/man/adtte_sat.Rd new file mode 100644 index 00000000..6d21be80 --- /dev/null +++ b/man/adtte_sat.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{adtte_sat} +\alias{adtte_sat} +\title{Survival data from single arm trial} +\format{ +A data frame with 500 rows and 10 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm, \code{"A"}.} +\item{AVAL}{Analysis value which in this dataset overall survival time in days.} +\item{AVALU}{Unit of \code{AVAL}.} +\item{PARAMCD}{Paramater code of \code{AVAL}, \code{"OS"}.} +\item{PARAM}{Parameter name of \code{AVAL}, \verb{"Overall Survival}.} +\item{CNSR}{Censoring indicator \code{0}/\code{1}.} +\item{TIME}{Survival time in days.} +\item{EVENT}{Event indicator \code{0}/\code{1}.} +} +} +\usage{ +adtte_sat +} +\description{ +Survival data from single arm trial +} +\seealso{ +Other unanchored datasets: +\code{\link{adrs_sat}}, +\code{\link{adsl_sat}}, +\code{\link{agd}}, +\code{\link{centered_ipd_sat}}, +\code{\link{pseudo_ipd_sat}}, +\code{\link{weighted_sat}} +} +\concept{unanchored datasets} +\keyword{dataset} diff --git a/man/adtte_twt.Rd b/man/adtte_twt.Rd new file mode 100644 index 00000000..5e6d2618 --- /dev/null +++ b/man/adtte_twt.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{adtte_twt} +\alias{adtte_twt} +\title{Survival data from two arm trial} +\format{ +A data frame with 1000 rows and 10 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm, \code{"A"}, \code{"C"}.} +\item{AVAL}{Analysis value which in this dataset overall survival time in days.} +\item{AVALU}{Unit of \code{AVAL}.} +\item{PARAMCD}{Parameter code of \code{AVAL}, \code{"OS"}.} +\item{PARAM}{Parameter name of \code{AVAL}, \verb{"Overall Survival}.} +\item{CNSR}{Censoring indicator \code{0}/\code{1}.} +\item{TIME}{Survival time in days.} +\item{EVENT}{Event indicator \code{0}/\code{1}.} +} +} +\usage{ +adtte_twt +} +\description{ +Survival data from two arm trial +} +\seealso{ +Other anchored datasets: +\code{\link{adsl_twt}}, +\code{\link{agd}}, +\code{\link{centered_ipd_twt}}, +\code{\link{pseudo_ipd_twt}} +} +\concept{anchored datasets} +\keyword{dataset} diff --git a/man/agd.Rd b/man/agd.Rd new file mode 100644 index 00000000..31b097e0 --- /dev/null +++ b/man/agd.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{agd} +\alias{agd} +\title{Aggregate effect modifier data from published study} +\format{ +A data frame with 3 rows and 9 columns: +\describe{ +\item{STUDY}{The study name, Study_XXXX} +\item{ARM}{Study arm name or total} +\item{N}{Number of observations in study arm} +\item{AGE_MEAN}{Mean age in study arm} +\item{AGE_MEDIAN}{Median age in study arm} +\item{AGE_SD}{Standard deviation of age in study arm} +\item{SEX_MALE_COUNT}{Number of male patients} +\item{ECOG0_COUNT}{Number of patients with ECOG score = 0} +\item{SMOKE_COUNT}{Number of smokers} +\item{N_PR_THER_MEDIAN}{Median number of prior therapies} +} +} +\usage{ +agd +} +\description{ +This data is formatted to be used in \code{\link[=center_ipd]{center_ipd()}}. +} +\seealso{ +Other unanchored datasets: +\code{\link{adrs_sat}}, +\code{\link{adsl_sat}}, +\code{\link{adtte_sat}}, +\code{\link{centered_ipd_sat}}, +\code{\link{pseudo_ipd_sat}}, +\code{\link{weighted_sat}} + +Other anchored datasets: +\code{\link{adsl_twt}}, +\code{\link{adtte_twt}}, +\code{\link{centered_ipd_twt}}, +\code{\link{pseudo_ipd_twt}} +} +\concept{anchored datasets} +\concept{unanchored datasets} +\keyword{dataset} diff --git a/man/calculate_weights_legend.Rd b/man/calculate_weights_legend.Rd index d9078b88..617c206c 100644 --- a/man/calculate_weights_legend.Rd +++ b/man/calculate_weights_legend.Rd @@ -16,9 +16,7 @@ list of ESS, ESS reduction, median value of scaled and unscaled weights, and mis Calculates ESS reduction and median weights which is used to create legend for weights plot } \examples{ -\dontrun{ -load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE)) -calculate_weights_legend(weighted_data) -} +data("weighted_sat") +calculate_weights_legend(weighted_sat) } \keyword{internal} diff --git a/man/center_ipd.Rd b/man/center_ipd.Rd index beba52d8..57d6ad7f 100644 --- a/man/center_ipd.Rd +++ b/man/center_ipd.Rd @@ -27,8 +27,8 @@ IPD and aggregate data variable names should match. } \examples{ # load in IPD -adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE)) -adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +data(adsl_sat) +adsl <- dummize_ipd(adsl_sat, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) # Reading aggregate data by Excel target_pop <- read.csv( @@ -37,7 +37,7 @@ target_pop <- read.csv( agd <- process_agd(target_pop) # Alternatively, you can specify aggregate data manually in data frame -load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) +data(agd) ipd_centered <- center_ipd(ipd = adsl, agd = agd) } diff --git a/man/centered_ipd_sat.Rd b/man/centered_ipd_sat.Rd new file mode 100644 index 00000000..671e44d4 --- /dev/null +++ b/man/centered_ipd_sat.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{centered_ipd_sat} +\alias{centered_ipd_sat} +\title{Centered patient data from single arm trial} +\format{ +A data frame with 500 rows and 14 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm.} +\item{AGE}{Age in years at baseline.} +\item{SEX}{Sex of patient recorded as character \code{"Male"}/\code{"Female"}.} +\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.} +\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.} +\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.} +\item{SEX_MALE}{Indicator of \code{SEX == "Male"} as numeric \code{1}/\code{0}.} +\item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data \link{agd}.} +\item{AGE_MEDIAN_CENTERED}{\code{AGE} greater/less than \code{MEDIAN_AGE} in \link{agd} coded as \code{1}/\code{0} and then centered at +0.5.} +\item{AGE_SQUARED_CENTERED}{\code{AGE} squared and centered with respect to the \code{AGE} in \link{agd}. The squared age in the +aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.} +\item{SEX_MALE_CENTERED}{\code{SEX_MALE} centered by the proportion of male patients in \link{agd}} +\item{ECOG0_CENTERED}{\code{ECOG0} centered by the proportion of \code{ECOG0} in \link{agd}} +\item{SMOKE_CENTERED}{\code{SMOKE} centered by the proportion of \code{SMOKE} in \link{agd}} +\item{N_PR_THER_MEDIAN_CENTERED}{\code{N_PR_THER} centered by the median in \link{agd}.} +} +} +\usage{ +centered_ipd_sat +} +\description{ +Centered patient data from single arm trial +} +\seealso{ +Other unanchored datasets: +\code{\link{adrs_sat}}, +\code{\link{adsl_sat}}, +\code{\link{adtte_sat}}, +\code{\link{agd}}, +\code{\link{pseudo_ipd_sat}}, +\code{\link{weighted_sat}} +} +\concept{unanchored datasets} +\keyword{dataset} diff --git a/man/centered_ipd_twt.Rd b/man/centered_ipd_twt.Rd new file mode 100644 index 00000000..a34fa20c --- /dev/null +++ b/man/centered_ipd_twt.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{centered_ipd_twt} +\alias{centered_ipd_twt} +\title{Centered patient data from two arm trial} +\format{ +A data frame with 1000 rows and 14 columns: +\describe{ +\item{USUBJID}{Unique subject identifiers for patients.} +\item{ARM}{Assigned treatment arm.} +\item{AGE}{Age in years at baseline.} +\item{SEX}{Sex of patient recorded as character \code{"Male"}/\code{"Female"}.} +\item{SMOKE}{Smoking status at baseline as integer \code{1}/\code{0}.} +\item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer \code{1}/\code{0}.} +\item{N_PR_THER}{Number of prior therapies received as integer \verb{1, 2, 3, 4}.} +\item{SEX_MALE}{Indicator of \code{SEX == "Male"} as numeric \code{1}/\code{0}.} +\item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data \link{agd}.} +\item{AGE_MEDIAN_CENTERED}{\code{AGE} greater/less than \code{MEDIAN_AGE} in \link{agd} coded as \code{1}/\code{0} and then centered at +0.5.} +\item{AGE_SQUARED_CENTERED}{\code{AGE} squared and centered with respect to the \code{AGE} in \link{agd}. The squared age in the +aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.} +\item{SEX_MALE_CENTERED}{\code{SEX_MALE} centered by the proportion of male patients in \link{agd}} +\item{ECOG0_CENTERED}{\code{ECOG0} centered by the proportion of \code{ECOG0} in \link{agd}} +\item{SMOKE_CENTERED}{\code{SMOKE} centered by the proportion of \code{SMOKE} in \link{agd}} +\item{N_PR_THER_MEDIAN_CENTERED}{\code{N_PR_THER} centered by the median in \link{agd}.} +} +} +\usage{ +centered_ipd_twt +} +\description{ +Centered patient data from two arm trial +} +\seealso{ +Other anchored datasets: +\code{\link{adsl_twt}}, +\code{\link{adtte_twt}}, +\code{\link{agd}}, +\code{\link{pseudo_ipd_twt}} +} +\concept{anchored datasets} +\keyword{dataset} diff --git a/man/check_weights.Rd b/man/check_weights.Rd index bb25ff46..992a41b2 100644 --- a/man/check_weights.Rd +++ b/man/check_weights.Rd @@ -48,8 +48,6 @@ before and after adjustment. }} \examples{ -load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE)) -load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) -check_weights(weighted_data, agd) +check_weights(weighted_sat, process_agd(agd)) } diff --git a/man/dummize_ipd.Rd b/man/dummize_ipd.Rd index b9471b16..f44fea9d 100644 --- a/man/dummize_ipd.Rd +++ b/man/dummize_ipd.Rd @@ -22,7 +22,7 @@ This would be especially useful if the variable has more than two factors. Note that the original variable is kept after a variable is dummized. } \examples{ -adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE)) -adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +data(adsl_twt) +dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Male")) } diff --git a/man/estimate_weights.Rd b/man/estimate_weights.Rd index 23011211..73535f68 100644 --- a/man/estimate_weights.Rd +++ b/man/estimate_weights.Rd @@ -93,17 +93,22 @@ for \code{ggplot} is to plot unscaled and scaled weights on a same plot. }} \examples{ -load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE)) -load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE)) -ipd_centered <- center_ipd(ipd = ipd, agd = agd) - -centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN") -centered_colnames <- paste0(centered_colnames, "_CENTERED") +data(agd) +data(adsl_sat) +ipd_centered <- center_ipd(ipd = adsl_sat, agd = process_agd(agd)) +centered_colnames <- grep("_CENTERED", colnames(ipd_centered), value = TRUE) +centered_colnames weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames) +\donttest{ +# To later estimate bootstrap confidence intervals, we calculate the weights +# for the bootstrap samples: +weighted_data_boot <- estimate_weights( + data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 500 +) +} +plot(weighted_sat) -load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE)) -plot(weighted_data) - -library(ggplot2) -plot(weighted_data, ggplot = TRUE) +if (requireNamespace("ggplot2")) { + plot(weighted_sat, ggplot = TRUE) +} } diff --git a/man/kmplot.Rd b/man/kmplot.Rd index d81ef433..744c6524 100644 --- a/man/kmplot.Rd +++ b/man/kmplot.Rd @@ -65,40 +65,19 @@ It is wrapper function of \code{basic_kmplot}. The argument setting is similar t ### IPD # Read in relevant ADaM data and rename variables of interest -adsl <- read.csv(system.file("extdata", "adsl.csv", - package = "maicplus", - mustWork = TRUE -)) -adtte <- read.csv(system.file("extdata", "adtte.csv", - package = "maicplus", - mustWork = TRUE -)) -adtte$TIME <- adtte$AVAL -adtte$EVENT <- adtte$EVNT -adtte2 <- adtte -adtte2$ARM <- "C" -adtte2$TIME <- adtte2$TIME + 7 -adtte <- rbind(adtte, adtte2) +adsl_twt +adtte_twt ### AgD # Baseline aggregate data for the comparator population -target_pop <- read.csv(system.file("extdata", "aggregate_data_example_1.csv", - package = "maicplus", mustWork = TRUE -)) +agd + # for time-to-event endpoints, pseudo IPD from digitalized KM -pseudo_ipd <- read.csv(system.file("extdata", "psuedo_IPD.csv", - package = "maicplus", - mustWork = TRUE -)) -pseudo_ipd$ARM <- "B" -pseudo_ipd2 <- pseudo_ipd -pseudo_ipd2$ARM <- "C" -pseudo_ipd2$Time <- pseudo_ipd2$Time + 5 -pseudo_ipd <- rbind(pseudo_ipd, pseudo_ipd2) +pseudo_ipd_twt #### prepare data -target_pop <- process_agd(target_pop) -adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) +target_pop <- process_agd(agd) +adsl <- dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Female")) use_adsl <- center_ipd(ipd = adsl, agd = target_pop) #### derive weights @@ -112,9 +91,9 @@ match_res <- estimate_weights( # plot by trial kmplot( weights_object = match_res, - tte_ipd = adtte, + tte_ipd = adtte_twt, trt_var_ipd = "ARM", - tte_pseudo_ipd = pseudo_ipd, + tte_pseudo_ipd = pseudo_ipd_twt, trt_var_agd = "ARM", endpoint_name = "Overall Survival", trt_ipd = "A", @@ -134,9 +113,9 @@ kmplot( # plot by arm kmplot( weights_object = match_res, - tte_ipd = adtte, + tte_ipd = adtte_twt, trt_var_ipd = "ARM", - tte_pseudo_ipd = pseudo_ipd, + tte_pseudo_ipd = pseudo_ipd_twt, trt_var_agd = "ARM", endpoint_name = "Overall Survival", trt_ipd = "A", @@ -155,9 +134,9 @@ kmplot( # plot all kmplot( weights_object = match_res, - tte_ipd = adtte, + tte_ipd = adtte_twt, trt_var_ipd = "ARM", - tte_pseudo_ipd = pseudo_ipd, + tte_pseudo_ipd = pseudo_ipd_twt, trt_var_agd = "ARM", endpoint_name = "Overall Survival", trt_ipd = "A", diff --git a/man/pseudo_ipd_sat.Rd b/man/pseudo_ipd_sat.Rd new file mode 100644 index 00000000..d226548e --- /dev/null +++ b/man/pseudo_ipd_sat.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{pseudo_ipd_sat} +\alias{pseudo_ipd_sat} +\title{Pseudo individual patient survival data from published study} +\format{ +A data frame with 300 rows and 3 columns: +\describe{ +\item{Time}{Survival time in days.} +\item{Event}{Event indicator \code{0}/\code{1}.} +\item{ARM}{Assigned treatment arm, \code{"B"}.} +} +} +\usage{ +pseudo_ipd_sat +} +\description{ +Pseudo individual patient survival data from published study +} +\seealso{ +Other unanchored datasets: +\code{\link{adrs_sat}}, +\code{\link{adsl_sat}}, +\code{\link{adtte_sat}}, +\code{\link{agd}}, +\code{\link{centered_ipd_sat}}, +\code{\link{weighted_sat}} +} +\concept{unanchored datasets} +\keyword{dataset} diff --git a/man/pseudo_ipd_twt.Rd b/man/pseudo_ipd_twt.Rd new file mode 100644 index 00000000..15a2be3e --- /dev/null +++ b/man/pseudo_ipd_twt.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{pseudo_ipd_twt} +\alias{pseudo_ipd_twt} +\title{Pseudo individual patient survival data from published two arm study} +\format{ +A data frame with 800 rows and 3 columns: +\describe{ +\item{Time}{Survival time in days.} +\item{Event}{Event indicator \code{0}/\code{1}.} +\item{ARM}{Assigned treatment arm, \code{"B"}, \code{"C"}.} +} +} +\usage{ +pseudo_ipd_twt +} +\description{ +Pseudo individual patient survival data from published two arm study +} +\seealso{ +Other anchored datasets: +\code{\link{adsl_twt}}, +\code{\link{adtte_twt}}, +\code{\link{agd}}, +\code{\link{centered_ipd_twt}} +} +\concept{anchored datasets} +\keyword{dataset} diff --git a/man/weighted_sat.Rd b/man/weighted_sat.Rd new file mode 100644 index 00000000..2e244022 --- /dev/null +++ b/man/weighted_sat.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{weighted_sat} +\alias{weighted_sat} +\title{Weighted object for single arm trial data} +\format{ +A \code{maicplus_estimate_weights} object created by \code{\link[=estimate_weights]{estimate_weights()}} containing +\describe{ +\item{data}{patient level data with weights} +\item{centered_colnames}{Columns used in MAIC} +\item{nr_missing}{Number of observations with missing data} +\item{ess}{Expected sample size} +\item{opt}{Information from \code{optim} from weight calculation} +\item{boot}{Parameters and bootstrap sample weights, \code{NULL} in this object} +} +} +\usage{ +weighted_sat +} +\description{ +Weighted object for single arm trial data +} +\seealso{ +Other unanchored datasets: +\code{\link{adrs_sat}}, +\code{\link{adsl_sat}}, +\code{\link{adtte_sat}}, +\code{\link{agd}}, +\code{\link{centered_ipd_sat}}, +\code{\link{pseudo_ipd_sat}} +} +\concept{unanchored datasets} +\keyword{dataset} diff --git a/tests/testthat/test-maic_anchored.R b/tests/testthat/test-maic_anchored.R index 7aa2848e..97935991 100644 --- a/tests/testthat/test-maic_anchored.R +++ b/tests/testthat/test-maic_anchored.R @@ -55,9 +55,14 @@ test_that("maic_anchored works for TTE using robust SE", { use_adsl <- center_ipd(ipd = adsl, agd = target_pop) #### derive weights + cols <- c( + "AGE_CENTERED", "AGE_MEDIAN_CENTERED", "AGE_SQUARED_CENTERED", + "SEX_MALE_CENTERED", "ECOG0_CENTERED", "SMOKE_CENTERED" + ) + # cols <- grep("_CENTERED$", names(use_adsl)) match_res <- estimate_weights( data = use_adsl, - centered_colnames = grep("_CENTERED$", names(use_adsl)), + centered_colnames = cols, start_val = 0, method = "BFGS" ) @@ -164,9 +169,14 @@ test_that("maic_anchored works for TTE using bootstrap SE", { use_adsl <- center_ipd(ipd = adsl, agd = target_pop) #### derive weights + cols <- c( + "AGE_CENTERED", "AGE_MEDIAN_CENTERED", "AGE_SQUARED_CENTERED", + "SEX_MALE_CENTERED", "ECOG0_CENTERED", "SMOKE_CENTERED" + ) + # cols <- grep("_CENTERED$", names(use_adsl)) match_res_boot <- estimate_weights( data = use_adsl, - centered_colnames = grep("_CENTERED$", names(use_adsl)), + centered_colnames = cols, start_val = 0, method = "BFGS", n_boot_iteration = 5, diff --git a/tests/testthat/test-maic_unanchored.R b/tests/testthat/test-maic_unanchored.R index 470b3b0f..81949c84 100644 --- a/tests/testthat/test-maic_unanchored.R +++ b/tests/testthat/test-maic_unanchored.R @@ -120,16 +120,21 @@ test_that("test time to event case", { use_adsl <- center_ipd(ipd = adsl, agd = target_pop) #### derive weights + cols <- c( + "AGE_CENTERED", "AGE_MEDIAN_CENTERED", "AGE_SQUARED_CENTERED", + "SEX_MALE_CENTERED", "ECOG0_CENTERED", "SMOKE_CENTERED" + ) + # cols <- grep("_CENTERED$", names(use_adsl)) match_res <- estimate_weights( data = use_adsl, - centered_colnames = grep("_CENTERED$", names(use_adsl)), + centered_colnames = cols, start_val = 0, method = "BFGS" ) match_res_boot <- estimate_weights( data = use_adsl, - centered_colnames = grep("_CENTERED$", names(use_adsl)), + centered_colnames = cols, start_val = 0, method = "BFGS", n_boot_iteration = 500,