Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add example data #105

Merged
merged 22 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ repos:
- shiny
- lubridate
- DescTools
- lmtest
# codemeta must be above use-tidy-description when both are used
# - id: codemeta-description-updated
- id: use-tidy-description
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(basic_kmplot)
export(basic_kmplot2)
export(bootstrap_HR)
export(bucher)
export(calculate_weights_legend)
export(center_ipd)
export(check_weights)
export(dummize_ipd)
Expand Down
220 changes: 220 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
# unanchored datasets ------

#' Patient data from single arm study
#' @format a data frame with 500 rows and 8 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm.}
#' \item{AGE}{Age in years at baseline.}
#' \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.}
#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
#' \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.}
#'
#' }
#' @keywords dataset
#' @family unanchored datasets
"adsl_sat"

#' Survival data from single arm trial
#' @format A data frame with 500 rows and 10 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm, `"A"`.}
#' \item{AVAL}{Analysis value which in this dataset overall survival time in days.}
#' \item{AVALU}{Unit of `AVAL`.}
#' \item{PARAMCD}{Paramater code of `AVAL`, `"OS"`.}
#' \item{PARAM}{Parameter name of `AVAL`, `"Overall Survival`.}
#' \item{CNSR}{Censoring indicator `0`/`1`.}
#' \item{TIME}{Survival time in days.}
#' \item{EVENT}{Event indicator `0`/`1`.}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EVENT and EVNT are same is that okay? I guess so?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove EVNT and check code works

#' }
#' @family unanchored datasets
#' @keywords dataset
"adtte_sat"


#' Pseudo individual patient survival data from published study
#' @format A data frame with 300 rows and 3 columns:
#' \describe{
#' \item{Time}{Survival time in days.}
#' \item{Event}{Event indicator `0`/`1`.}
#' \item{ARM}{Assigned treatment arm, `"B"`.}
#' }
#' @family unanchored datasets
#' @keywords dataset
"pseudo_ipd_sat"


#' Centered patient data from single arm trial
#' @format A data frame with 500 rows and 14 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm.}
#' \item{AGE}{Age in years at baseline.}
#' \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.}
#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
#' \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.}
#' \item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data [agd].}
#' \item{AGE_MEDIAN_CENTERED}{`AGE` greater/less than `MEDIAN_AGE` in [agd] coded as `1`/`0` and then centered at
#' 0.5.}
#' \item{AGE_SQUARED_CENTERED}{`AGE` squared and centered with respect to the `AGE` in [agd]. The squared age in the
#' aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.}
#' \item{SEX_MALE_CENTERED}{`SEX_MALE` centered by the proportion of male patients in [agd]}
#' \item{ECOG0_CENTERED}{`ECOG0` centered by the proportion of `ECOG0` in [agd]}
#' \item{SMOKE_CENTERED}{`SMOKE` centered by the proportion of `SMOKE` in [agd]}
#' \item{N_PR_THER_MEDIAN_CENTERED}{`N_PR_THER` centered by the median in [agd].}
#' }
#' @family unanchored datasets
#' @keywords dataset
"centered_ipd_sat"

#' Binary outcome data from single arm trial
#' @format A data frame with 500 rows and 5 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm.}
#' \item{AVAL}{Analysis value, in this dataset an indicator of response.}
#' \item{PARAM}{Parameter type of `AVAL`.}
#' \item{RESPONSE}{Indicator of response.}
#' }
#' @family unanchored datasets
#' @keywords dataset
"adrs_sat"

#' Weighted object for single arm trial data
#' @format A `maicplus_estimate_weights` object created by [estimate_weights()] containing
#' \describe{
#' \item{data}{patient level data with weights}
#' \item{centered_colnames}{Columns used in MAIC}
#' \item{nr_missing}{Number of observations with missing data}
#' \item{ess}{Expected sample size}
#' \item{opt}{Information from `optim` from weight calculation}
#' \item{boot}{Parameters and bootstrap sample weights, `NULL` in this object}
#' }
#' @family unanchored datasets
#' @keywords dataset
"weighted_sat"

# aggregate data ------

#' Aggregate effect modifier data from published study
#'
#' This data is formatted to be used in [center_ipd()].
#'
#' @format A data frame with 3 rows and 9 columns:
#' \describe{
#' \item{STUDY}{The study name, Study_XXXX}
#' \item{ARM}{Study arm name or total}
#' \item{N}{Number of observations in study arm}
#' \item{AGE_MEAN}{Mean age in study arm}
#' \item{AGE_MEDIAN}{Median age in study arm}
#' \item{AGE_SD}{Standard deviation of age in study arm}
#' \item{SEX_MALE_COUNT}{Number of male patients}
#' \item{ECOG0_COUNT}{Number of patients with ECOG score = 0}
#' \item{SMOKE_COUNT}{Number of smokers}
#' \item{N_PR_THER_MEDIAN}{Median number of prior therapies}
#' }
#' @family unanchored datasets
#' @family anchored datasets
#' @keywords dataset
"agd"


# anchored datasets -------

#' Patient data from two arm trial
#' @format A data frame with 1000 rows and 8 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm.}
#' \item{AGE}{Age in years at baseline.}
#' \item{SEX}{Sex of patient recorded as character "Male"/"Female"}
#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
#' \item{SEX_MALE}{Indicator of SEX == "Male" as numeric 1/0}
#' }
#' @family anchored datasets
#' @keywords dataset
"adsl_twt"


#' Survival data from two arm trial
#' @format A data frame with 1000 rows and 10 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm, `"A"`, `"C"`.}
#' \item{AVAL}{Analysis value which in this dataset overall survival time in days.}
#' \item{AVALU}{Unit of `AVAL`.}
#' \item{PARAMCD}{Parameter code of `AVAL`, `"OS"`.}
#' \item{PARAM}{Parameter name of `AVAL`, `"Overall Survival`.}
#' \item{CNSR}{Censoring indicator `0`/`1`.}
#' \item{TIME}{Survival time in days.}
#' \item{EVENT}{Event indicator `0`/`1`.}
#' }
#' @family anchored datasets
#' @keywords dataset
"adtte_twt"

#' Binary outcome data from two arm trial
#' @format A data frame with 1000 rows and 5 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm, `"A"`, `"C"`.}
#' \item{AVAL}{Analysis value, in this dataset an indicator of response.}
#' \item{PARAM}{Parameter type of `AVAL`.}
#' \item{RESPONSE}{Indicator of response.}
#' }
"adrs_twt"

#' Pseudo individual patient survival data from published two arm study
#' @format A data frame with 800 rows and 3 columns:
#' \describe{
#' \item{Time}{Survival time in days.}
#' \item{Event}{Event indicator `0`/`1`.}
#' \item{ARM}{Assigned treatment arm, `"B"`, `"C"`.}
#' }
#' @family anchored datasets
#' @keywords dataset
"pseudo_ipd_twt"


#' Centered patient data from two arm trial
#' @format A data frame with 1000 rows and 14 columns:
#' \describe{
#' \item{USUBJID}{Unique subject identifiers for patients.}
#' \item{ARM}{Assigned treatment arm.}
#' \item{AGE}{Age in years at baseline.}
#' \item{SEX}{Sex of patient recorded as character `"Male"`/`"Female"`.}
#' \item{SMOKE}{Smoking status at baseline as integer `1`/`0`.}
#' \item{ECOG0}{Indicator of ECOG score = 0 at baseline as integer `1`/`0`.}
#' \item{N_PR_THER}{Number of prior therapies received as integer `1, 2, 3, 4`.}
#' \item{SEX_MALE}{Indicator of `SEX == "Male"` as numeric `1`/`0`.}
#' \item{AGE_CENTERED}{Age in years at baseline relative to average in aggregate data [agd].}
#' \item{AGE_MEDIAN_CENTERED}{`AGE` greater/less than `MEDIAN_AGE` in [agd] coded as `1`/`0` and then centered at
#' 0.5.}
#' \item{AGE_SQUARED_CENTERED}{`AGE` squared and centered with respect to the `AGE` in [agd]. The squared age in the
#' aggregate data is derived from the \eqn{E(X^2)} term in the variance formula.}
#' \item{SEX_MALE_CENTERED}{`SEX_MALE` centered by the proportion of male patients in [agd]}
#' \item{ECOG0_CENTERED}{`ECOG0` centered by the proportion of `ECOG0` in [agd]}
#' \item{SMOKE_CENTERED}{`SMOKE` centered by the proportion of `SMOKE` in [agd]}
#' \item{N_PR_THER_MEDIAN_CENTERED}{`N_PR_THER` centered by the median in [agd].}
#' }
#' @keywords dataset
#' @family anchored datasets
"centered_ipd_twt"


if (FALSE) {
make_roxygen_data <- function(df) {
cn <- colnames(df)
cat("#' @format A data frame with", nrow(df), "rows and", ncol(df), "columns:\n")
cat("#' \\describe{\n")
for (i in cn) cat("#' \\item{", i, "}{}\n", sep = "")
cat("#' }")
}
}
38 changes: 20 additions & 18 deletions R/matching.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,19 @@
#' }
#'
#' @examples
#' load(system.file("extdata", "ipd.rda", package = "maicplus", mustWork = TRUE))
#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
#' ipd_centered <- center_ipd(ipd = ipd, agd = agd)
#'
#' centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
#' centered_colnames <- paste0(centered_colnames, "_CENTERED")
#' data(agd)
#' data(adsl_sat)
#' ipd_centered <- center_ipd(ipd = adsl_sat, agd = process_agd(agd))
#' centered_colnames <- grep("_CENTERED", colnames(ipd_centered), value = TRUE)
#' centered_colnames
#' weighted_data <- estimate_weights(data = ipd_centered, centered_colnames = centered_colnames)
#'
#' \donttest{
#' # To later estimate bootstrap confidence intervals, we calculate the weights
#' # for the bootstrap samples:
#' weighted_data_boot <- estimate_weights(
#' data = ipd_centered, centered_colnames = centered_colnames, n_boot_iteration = 500
#' )
#' }
#' @export

estimate_weights <- function(data,
Expand Down Expand Up @@ -199,10 +204,9 @@ optimise_weights <- function(matrix,
#'
#' @return list of ESS, ESS reduction, median value of scaled and unscaled weights, and missing count
#' @examples
#' \dontrun{
#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' calculate_weights_legend(weighted_data)
#' }
#' data("weighted_sat")
#' calculate_weights_legend(weighted_sat)
#' @export
#' @keywords internal

calculate_weights_legend <- function(weighted_data) {
Expand Down Expand Up @@ -362,11 +366,11 @@ plot_weights_ggplot <- function(weighted_data, bin_col, vline_col,
#' @param bins (`ggplot` only) number of bin parameter to use
#'
#' @examples
#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' plot(weighted_data)
#' plot(weighted_sat)
#'
#' library(ggplot2)
#' plot(weighted_data, ggplot = TRUE)
#' if (requireNamespace("ggplot2")) {
#' plot(weighted_sat, ggplot = TRUE)
#' }
#' @describeIn estimate_weights Plot method for estimate_weights objects
#' @export

Expand Down Expand Up @@ -397,9 +401,7 @@ plot.maicplus_estimate_weights <- function(x, ggplot = FALSE,
#' aggregated data following the same naming convention
#'
#' @examples
#' load(system.file("extdata", "weighted_data.rda", package = "maicplus", mustWork = TRUE))
#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
#' check_weights(weighted_data, agd)
#' check_weights(weighted_sat, process_agd(agd))
#'
#' @import DescTools
#'
Expand Down
10 changes: 5 additions & 5 deletions R/process_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ process_agd <- function(raw_agd) {
#' @param dummize_ref_level vector of reference level of the variables to binarize
#'
#' @examples
#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' data(adsl_twt)
#' dummize_ipd(adsl_twt, dummize_cols = c("SEX"), dummize_ref_level = c("Male"))
#'
#' @return ipd with dummized columns
#' @export
Expand Down Expand Up @@ -152,8 +152,8 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
#' suffix is no longer accepted.
#' @examples
#' # load in IPD
#' adsl <- read.csv(system.file("extdata", "adsl.csv", package = "maicplus", mustWork = TRUE))
#' adsl <- dummize_ipd(adsl, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#' data(adsl_sat)
#' adsl <- dummize_ipd(adsl_sat, dummize_cols = c("SEX"), dummize_ref_level = c("Female"))
#'
#' # Reading aggregate data by Excel
#' target_pop <- read.csv(
Expand All @@ -162,7 +162,7 @@ dummize_ipd <- function(raw_ipd, dummize_cols, dummize_ref_level) {
#' agd <- process_agd(target_pop)
#'
#' # Alternatively, you can specify aggregate data manually in data frame
#' load(system.file("extdata", "agd.rda", package = "maicplus", mustWork = TRUE))
#' data(agd)
#' ipd_centered <- center_ipd(ipd = adsl, agd = agd)
#'
#' @return centered ipd using aggregate level data averages
Expand Down
Loading
Loading