diff --git a/NAMESPACE b/NAMESPACE index f7ffbaa..78729b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(print,count_evt) S3method(print,or_ci) S3method(print,prop_ci) S3method(print,s_coxph) @@ -11,6 +12,7 @@ export(derive_bor) export(h_pairwise_survdiff) export(h_prep_prop) export(rrPostProb) +export(s_count_event) export(s_get_coxph) export(s_get_lsmeans) export(s_get_survfit) @@ -39,6 +41,7 @@ importFrom(lubridate,days) importFrom(lubridate,ymd) importFrom(magrittr,"%>%") importFrom(magrittr,set_colnames) +importFrom(magrittr,set_names) importFrom(magrittr,set_rownames) importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/R/count_event.R b/R/count_event.R new file mode 100644 index 0000000..79fb1bb --- /dev/null +++ b/R/count_event.R @@ -0,0 +1,154 @@ +# s_count_event ---- + +#' Count the Number of Events for Specific Variable +#' +#' @description `r lifecycle::badge("experimental")` +#' +#' This function counts the number of events occur for specific variable with +#' multiple conditions if needed, such as counting the different types of AE +#' occurrence rates. +#' +#' @param data (`data.frame`)\cr a data frame as input. +#' @param var (`string`)\cr target variable name for counting. +#' @param by (`string`)\cr an optional variable to group by. If null, use the whole data. +#' @param cond (`string`)\cr a listing contains multiple types of filtering conditions +#' that only support the equation such as `"TRTEMFL" = "Y"`. And the left side of the +#' equation is filtering id while the right should be a flag, string or numerical value. +#' @param label (`string`)\cr an optional string vector for labeling each filtering +#' conditions. Default is the name of `cond` variable. +#' @param denom (`string` or `data.frame`)\cr denominator for proportion can be a +#' numeric vector of denominators or a data frame where we can count the `var` inside. +#' +#' @return +#' * `s_count_event` returns an object of class `count_evt` that is a data frame +#' contains percentages for each group for different conditions. +#' +#' @note +#' - The order of `cond` names should be one-to-one respect to `label` you define. +#' - The order of `denom` names should be one-to-one respect to `by` variable levels. +#' +#' @export +#' +#' @examples +#' data("rand_adsl") +#' data("rand_adae") +#' +#' # by TRTA groups +#' s_count_event( +#' data = rand_adae, var = "SUBJID", by = "ARMCD", +#' cond = list( +#' "TEAEs" = c("TRTEMFL" = "Y"), +#' "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y"), +#' "SAE" = c("AESER" = "Y"), +#' "TRSAE" = c("AESER" = "Y", "AEREL" = "Y") +#' ), +#' label = c("Any TEAEs", "Any treatment-related TEAEs", +#' "Any serious TEAEs", "Any serious treatment-related TEAEs"), +#' denom = rand_adsl +#' ) +#' +#' # specify the denominator for each groups +#' s_count_event( +#' data = rand_adae, var = "SUBJID", by = "ARMCD", +#' cond = list("TEAEs" = c("TRTEMFL" = "Y")), +#' label = c("Any TEAEs"), +#' denom = c(100, 100, 100) +#' ) +#' +#' # no grouping +#' s_count_event( +#' data = rand_adae, var = "SUBJID", +#' cond = list( +#' "TEAEs" = c("TRTEMFL" = "Y"), +#' "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y") +#' ), +#' label = c("Any TEAEs", "Any treatment-related TEAEs"), +#' denom = 200 +#' ) +s_count_event <- function(data, + var, + by = NULL, + cond, + label = names(cond), + denom) { + assert_class(data, "data.frame") + assert_subset(var, names(data), empty.ok = FALSE) + assert_subset(by, names(data)) + assert_true(length(names(cond)) == length(label)) + assert_multi_class(denom, c("numeric", "data.frame")) + if (is.null(by) & is.data.frame(denom)) { + stop("denom should be numeric vector if by is defined as NULL.") + } + + cond_labels <- split(label, names(cond)) + if (is.null(by)) { + by <- "Total" + data[["Total"]] <- factor(by, levels = by) + } else { + if (!is.factor(data[[by]])) { + data[[by]] <- factor(by, levels = unique(data[[by]])) + } + } + denom_vec <- if (is.data.frame(denom)) { + if (!is.null(by)) { + cnt <- denom %>% count(!!sym(by)) + set_names(cnt[, "n", drop = TRUE], cnt[, 1, drop = TRUE]) + } else { + set_names(length(denom[[var]]), levels(data[[by]])) + } + } else { + set_names(denom, levels(data[[by]])) + } + + assert_numeric(denom_vec) + assert_true(all(names(denom_vec) == levels(data[[by]]))) + + cnt_tb <- cond %>% + purrr::imap(function(x, idx) { + filter_expr <- if (is.character(x)) { + paste0(names(x), " == '", x, "'", collapse = " & ") + } else if (is.numeric(x)) { + paste0(names(x), " == ", x, collapse = " & ") + } else { + stop("Each element of cond variable should be numeric or characteric.") + } + df <- data %>% + filter(!!rlang::parse_quo(filter_expr, env = rlang::global_env())) + attributes(df)$label <- NULL + res <- if (!is.null(by)) { + df %>% + distinct(!!sym(by), !!sym(var), .keep_all = TRUE) %>% + count(!!sym(by)) + } else { + df %>% + distinct(!!sym(var), .keep_all = TRUE) %>% + count() + } + res %>% + mutate( + group = droplevels(!!sym(by)), + N = as.vector(denom_vec), + perc = .data$n / .data$N, + label_ = idx, + label = cond_labels[[idx]] + ) %>% + ungroup() %>% + select(c("group", "n", "N", "perc", "label_", "label")) + }) %>% + purrr::list_rbind() + + structure( + list( + data = data, + cnt = cnt_tb, + params = list( + var = var, + by = by, + cond = cond, + label = label, + denom = denom_vec + ) + ), + class = "count_evt" + ) +} diff --git a/R/data.R b/R/data.R index fbc9426..79e27be 100644 --- a/R/data.R +++ b/R/data.R @@ -34,3 +34,15 @@ #' John Wiley and Sons Inc., New York, NY. #' "whas500" + +#' CDISC Random ADaM Data +#' +#' @description `r lifecycle::badge("experimental")` +#' +#' Random ADaM data created from `random.cdisc.data` package. +#' +#' @rdname cdisc_random_data +"rand_adsl" + +#' @rdname cdisc_random_data +"rand_adae" diff --git a/R/package.R b/R/package.R index 32ce4bd..ee57906 100644 --- a/R/package.R +++ b/R/package.R @@ -5,7 +5,7 @@ "_PACKAGE" #' @import checkmate -#' @importFrom magrittr set_colnames set_rownames +#' @importFrom magrittr set_colnames set_rownames set_names #' @importFrom lifecycle deprecated #' @importFrom stats pbeta rbinom confint as.formula setNames coef quantile #' @importFrom dplyr add_count arrange bind_rows case_when count distinct filter diff --git a/R/pkg-methods.R b/R/pkg-methods.R index b6a822c..87f1435 100644 --- a/R/pkg-methods.R +++ b/R/pkg-methods.R @@ -496,3 +496,36 @@ print.s_coxph <- function(x, ...) { invisible(x) } + + +#' @describeIn s_count_event prints the counts and percentages of events. +#' @exportS3Method +#' @keywords internal +print.count_evt <- function(x, ...) { + grp_var <- x$params$by + + a_evt_func <- function(df, .var, cnt_tb, lab) { + curgrp <- df[[.var]][1] + res <- cnt_tb %>% + filter(.data$group == curgrp & .data$label == lab) + in_rows( + rcell(unlist(res[, c("n", "perc"), drop = TRUE]), format = "xx (xx.x%)"), + .names = lab + ) + } + + labels <- x$params$label + tbl <- basic_table(show_colcounts = TRUE) %>% + split_cols_by(grp_var) + for (i in seq_along(labels)) { + tbl <- tbl %>% + analyze(grp_var, a_evt_func, show_labels = "hidden", + extra_args = list(cnt_tb = x$cnt, lab = labels[i]), + table_names = labels[i]) + } + result <- tbl %>% + build_table(df = x$data, col_counts = x$params$denom) + print(result) + + invisible(x) +} diff --git a/R/proportion.R b/R/proportion.R index fac21db..6c54081 100644 --- a/R/proportion.R +++ b/R/proportion.R @@ -31,7 +31,7 @@ NULL #' #' @param data (`data.frame`)\cr a data frame as input. #' @param var (`string`)\cr target variable name for estimation. -#' @param by (`string`)\cr a optional variable to group by. If null, use the whole data. +#' @param by (`string`)\cr an optional variable to group by. If null, use the whole data. #' @param by.level (`vector`)\cr an optional vector for encoding `var` as a factor #' and the first level will be as the reference group. If null, use the default #' order to encode. diff --git a/data-raw/create_random_cdisc.R b/data-raw/create_random_cdisc.R new file mode 100644 index 0000000..fdb38be --- /dev/null +++ b/data-raw/create_random_cdisc.R @@ -0,0 +1,9 @@ +rand_adsl <- random.cdisc.data::radsl( + N = 100, study_duration = 2, + with_trt02 = FALSE, + seed = 2 +) +rand_adae <- random.cdisc.data::radae(adsl, seed = 2) + +usethis::use_data(rand_adsl, overwrite = TRUE) +usethis::use_data(rand_adae, overwrite = TRUE) diff --git a/data/rand_adae.rda b/data/rand_adae.rda new file mode 100644 index 0000000..fb44b8f Binary files /dev/null and b/data/rand_adae.rda differ diff --git a/data/rand_adsl.rda b/data/rand_adsl.rda new file mode 100644 index 0000000..982f586 Binary files /dev/null and b/data/rand_adsl.rda differ diff --git a/inst/WORDLIST b/inst/WORDLIST index 6ce892c..c443d0b 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,6 +1,7 @@ ADRS ADT ADaM +AE ANCOVA AVAL AVALC diff --git a/man/cdisc_random_data.Rd b/man/cdisc_random_data.Rd new file mode 100644 index 0000000..cf7ed1c --- /dev/null +++ b/man/cdisc_random_data.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{rand_adsl} +\alias{rand_adsl} +\alias{rand_adae} +\title{CDISC Random ADaM Data} +\format{ +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 100 rows and 45 columns. + +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 515 rows and 82 columns. +} +\usage{ +rand_adsl + +rand_adae +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +Random ADaM data created from \code{random.cdisc.data} package. +} +\keyword{datasets} diff --git a/man/prop_odds_ratio.Rd b/man/prop_odds_ratio.Rd index cc286dc..3d70223 100644 --- a/man/prop_odds_ratio.Rd +++ b/man/prop_odds_ratio.Rd @@ -52,7 +52,7 @@ h_prep_prop(data, var, by, by.level, resp) \item{var}{(\code{string})\cr target variable name for estimation.} -\item{by}{(\code{string})\cr a optional variable to group by. If null, use the whole data.} +\item{by}{(\code{string})\cr an optional variable to group by. If null, use the whole data.} \item{by.level}{(\code{vector})\cr an optional vector for encoding \code{var} as a factor and the first level will be as the reference group. If null, use the default diff --git a/man/s_count_event.Rd b/man/s_count_event.Rd new file mode 100644 index 0000000..7cc57b4 --- /dev/null +++ b/man/s_count_event.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/count_event.R, R/pkg-methods.R +\name{s_count_event} +\alias{s_count_event} +\alias{print.count_evt} +\title{Count the Number of Events for Specific Variable} +\usage{ +s_count_event(data, var, by = NULL, cond, label = names(cond), denom) + +\method{print}{count_evt}(x, ...) +} +\arguments{ +\item{data}{(\code{data.frame})\cr a data frame as input.} + +\item{var}{(\code{string})\cr target variable name for counting.} + +\item{by}{(\code{string})\cr an optional variable to group by. If null, use the whole data.} + +\item{cond}{(\code{string})\cr a listing contains multiple types of filtering conditions +that only support the equation such as \code{"TRTEMFL" = "Y"}. And the left side of the +equation is filtering id while the right should be a flag, string or numerical value.} + +\item{label}{(\code{string})\cr an optional string vector for labeling each filtering +conditions. Default is the name of \code{cond} variable.} + +\item{denom}{(\code{string} or \code{data.frame})\cr denominator for proportion can be a +numeric vector of denominators or a data frame where we can count the \code{var} inside.} +} +\value{ +\itemize{ +\item \code{s_count_event} returns an object of class \code{count_evt} that is a data frame +contains percentages for each group for different conditions. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +This function counts the number of events occur for specific variable with +multiple conditions if needed, such as counting the different types of AE +occurrence rates. +} +\section{Functions}{ +\itemize{ +\item \code{print(count_evt)}: prints the counts and percentages of events. + +}} +\note{ +\itemize{ +\item The order of \code{cond} names should be one-to-one respect to \code{label} you define. +\item The order of \code{denom} names should be one-to-one respect to \code{by} variable levels. +} +} +\examples{ +data("rand_adsl") +data("rand_adae") + +# by TRTA groups +s_count_event( + data = rand_adae, var = "SUBJID", by = "ARMCD", + cond = list( + "TEAEs" = c("TRTEMFL" = "Y"), + "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y"), + "SAE" = c("AESER" = "Y"), + "TRSAE" = c("AESER" = "Y", "AEREL" = "Y") + ), + label = c("Any TEAEs", "Any treatment-related TEAEs", + "Any serious TEAEs", "Any serious treatment-related TEAEs"), + denom = rand_adsl +) + +# specify the denominator for each groups +s_count_event( + data = rand_adae, var = "SUBJID", by = "ARMCD", + cond = list("TEAEs" = c("TRTEMFL" = "Y")), + label = c("Any TEAEs"), + denom = c(100, 100, 100) +) + +# no grouping +s_count_event( + data = rand_adae, var = "SUBJID", + cond = list( + "TEAEs" = c("TRTEMFL" = "Y"), + "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y") + ), + label = c("Any TEAEs", "Any treatment-related TEAEs"), + denom = 200 +) +} +\keyword{internal} diff --git a/tests/spelling.R b/tests/spelling.R index 13f77d9..6713838 100644 --- a/tests/spelling.R +++ b/tests/spelling.R @@ -1,6 +1,3 @@ -if (requireNamespace("spelling", quietly = TRUE)) { - spelling::spell_check_test( - vignettes = TRUE, error = FALSE, - skip_on_cran = TRUE - ) -} +if(requireNamespace('spelling', quietly = TRUE)) + spelling::spell_check_test(vignettes = TRUE, error = FALSE, + skip_on_cran = TRUE) diff --git a/tests/testthat/test-count_event.R b/tests/testthat/test-count_event.R new file mode 100644 index 0000000..1de64c7 --- /dev/null +++ b/tests/testthat/test-count_event.R @@ -0,0 +1,128 @@ +test_that("s_count_event works as expected with default arguments", { + data("rand_adsl") + data("rand_adae") + + res <- s_count_event( + data = rand_adae, var = "SUBJID", by = "ARMCD", + cond = list( + "TEAEs" = c("TRTEMFL" = "Y"), + "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y"), + "SAE" = c("AESER" = "Y"), + "TRSAE" = c("AESER" = "Y", "AEREL" = "Y") + ), + label = c( + "Any TEAEs", "Any treatment-related TEAEs", + "Any serious TEAEs", "Any serious treatment-related TEAEs" + ), + denom = rand_adsl + ) + + expect_equal( + res$cnt, + tibble::tibble( + group = factor(rep(c("ARM A", "ARM B", "ARM C"), 4), levels = c("ARM A", "ARM B", "ARM C")), + n = c(34, 30, 26, 33, 28, 25, 25, 23, 22, 23, 19, 17), + N = rep(c(36, 34, 30), 4), + perc = c( + 0.9444444, 0.8823529, 0.8666667, 0.9166667, 0.8235294, 0.8333333, + 0.6944444, 0.6764706, 0.7333333, 0.6388889, 0.5588235, 0.5666667 + ), + label_ = rep(c("TEAEs", "TRAEs", "SAE", "TRSAE"), each = 3), + label = rep(c( + "Any TEAEs", "Any treatment-related TEAEs", + "Any serious TEAEs", "Any serious treatment-related TEAEs" + ), each = 3) + ), + tolerance = 0.0001 + ) +}) + +test_that("s_count_event works as expected with default arguments", { + data("rand_adsl") + data("rand_adae") + + res <- s_count_event( + data = rand_adae, var = "SUBJID", by = "ARMCD", + cond = list( + "TEAEs" = c("TRTEMFL" = "Y"), + "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y"), + "SAE" = c("AESER" = "Y"), + "TRSAE" = c("AESER" = "Y", "AEREL" = "Y") + ), + label = c( + "Any TEAEs", "Any treatment-related TEAEs", + "Any serious TEAEs", "Any serious treatment-related TEAEs" + ), + denom = rand_adsl + ) + + expect_equal( + res$cnt, + tibble::tibble( + group = factor(rep(c("ARM A", "ARM B", "ARM C"), 4), levels = c("ARM A", "ARM B", "ARM C")), + n = c(34, 30, 26, 33, 28, 25, 25, 23, 22, 23, 19, 17), + N = rep(c(36, 34, 30), 4), + perc = c( + 0.9444444, 0.8823529, 0.8666667, 0.9166667, 0.8235294, 0.8333333, + 0.6944444, 0.6764706, 0.7333333, 0.6388889, 0.5588235, 0.5666667 + ), + label_ = rep(c("TEAEs", "TRAEs", "SAE", "TRSAE"), each = 3), + label = rep(c( + "Any TEAEs", "Any treatment-related TEAEs", + "Any serious TEAEs", "Any serious treatment-related TEAEs" + ), each = 3) + ), + tolerance = 0.0001 + ) +}) + +test_that("s_count_event works as expected with specific the denominator", { + data("rand_adsl") + data("rand_adae") + + res <- s_count_event( + data = rand_adae, var = "SUBJID", by = "ARMCD", + cond = list("TEAEs" = c("TRTEMFL" = "Y")), + label = c("Any TEAEs"), + denom = c(100, 100, 100) + ) + + expect_equal( + res$cnt, + tibble::tibble( + group = factor(c("ARM A", "ARM B", "ARM C"), levels = c("ARM A", "ARM B", "ARM C")), + n = c(34, 30, 26), + N = rep(100, 3), + perc = c(0.34, 0.30, 0.26), + label_ = rep("TEAEs", 3), + label = rep("Any TEAEs", 3), + ) + ) +}) + +test_that("s_count_event works as expected with no grouping", { + data("rand_adsl") + data("rand_adae") + + res <- s_count_event( + data = rand_adae, var = "SUBJID", + cond = list( + "TEAEs" = c("TRTEMFL" = "Y"), + "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y") + ), + label = c("Any TEAEs", "Any treatment-related TEAEs"), + denom = 200 + ) + + expect_equal( + res$cnt, + tibble::tibble( + group = factor("Total", levels = "Total"), + n = c(90, 86), + N = rep(200, 2), + perc = c(0.45, 0.43), + label_ = c("TEAEs", "TRAEs"), + label = c("Any TEAEs", "Any treatment-related TEAEs"), + ) + ) +}) diff --git a/tests/testthat/test-pkg-methods.R b/tests/testthat/test-pkg-methods.R index 3a6bfed..dfc5e60 100644 --- a/tests/testthat/test-pkg-methods.R +++ b/tests/testthat/test-pkg-methods.R @@ -155,3 +155,38 @@ test_that("print.s_coxph works as expected", { ))) expect_match(res3, "P-value method for HR: logtest", fixed = TRUE) }) + +test_that("print.count_evt works as expected", { + data("rand_adsl") + data("rand_adae") + + res <- capture_output(print(s_count_event( + data = rand_adae, var = "SUBJID", by = "ARMCD", + cond = list( + "TEAEs" = c("TRTEMFL" = "Y"), + "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y"), + "SAE" = c("AESER" = "Y"), + "TRSAE" = c("AESER" = "Y", "AEREL" = "Y") + ), + label = c( + "Any TEAEs", "Any treatment-related TEAEs", + "Any serious TEAEs", "Any serious treatment-related TEAEs" + ), + denom = rand_adsl + ))) + expect_match(res, "ARM A ARM B ARM C", fixed = TRUE) + expect_match(res, "(N=36) (N=34) (N=30)", fixed = TRUE) + + res2 <- capture_output(print(s_count_event( + data = rand_adae, var = "SUBJID", + cond = list( + "TEAEs" = c("TRTEMFL" = "Y"), + "TRAEs" = c("TRTEMFL" = "Y", "AEREL" = "Y") + ), + label = c("Any TEAEs", "Any treatment-related TEAEs"), + denom = 200 + ))) + expect_match(res2, "Total", fixed = TRUE) + expect_match(res2, "(N=200)", fixed = TRUE) +}) +