diff --git a/.Rbuildignore b/.Rbuildignore index 9c3045a1..c2d17038 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -24,3 +24,4 @@ ^rsconnect$ ^data-raw$ ^scratch.R$ +^CRAN-SUBMISSION$ diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 00000000..a964860a --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 1.2.0 +Date: 2024-02-14 17:07:48 UTC +SHA: 806f9a0a103059542437632f5977cc1e8ded2652 diff --git a/DESCRIPTION b/DESCRIPTION index f12f07c9..fec79b17 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Tplyr Title: A Traceability Focused Grammar of Clinical Data Summary -Version: 1.2.0 +Version: 1.2.1 Authors@R: c( person(given = "Eli", diff --git a/NAMESPACE b/NAMESPACE index c1e7f2a4..b3b291b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ S3method(str,tplyr_table) export("%>%") export("header_n<-") export("pop_data<-") +export(add_anti_join) export(add_column_headers) export(add_filters) export(add_layer) diff --git a/R/count_bindings.R b/R/count_bindings.R index bc86aa84..4dd1603c 100644 --- a/R/count_bindings.R +++ b/R/count_bindings.R @@ -745,6 +745,13 @@ add_missing_subjects_row <- function(e, fmt = NULL, sort_value = NULL) { } assert_inherits_class(e, "count_layer") + if (identical(env_get(env_parent(e), 'target'), env_get(env_parent(e), 'pop_data'))) { + warning(paste("\tPopulation data was not set separately from the target data.", + "\tMissing subject counts may be misleading in this scenario.", + "\tDid you mean to use `set_missing_count() instead?", + sep="\n")) + } + env_bind(e, include_missing_subjects_row = TRUE) env_bind(e, missing_subjects_count_format = fmt) env_bind(e, missing_subjects_sort_value = sort_value) diff --git a/R/meta-builders.R b/R/meta-builders.R index 601ba14e..3cae0b2a 100644 --- a/R/meta-builders.R +++ b/R/meta-builders.R @@ -94,6 +94,7 @@ build_count_meta <- function(layer, table_where, layer_where, treat_grps, summar # The total row label may not pass through, so set it total_row_label <- ifelse(is.null(layer$total_row_label), 'Total', layer$total_row_label) + missing_subjects_row_label <- ifelse(is.null(layer$total_row_label), 'Missing', layer$missing_subjects_row_label) count_missings <- ifelse(is.null(layer$count_missings), FALSE, layer$count_missings) mlist <- layer$missing_count_list @@ -101,6 +102,9 @@ build_count_meta <- function(layer, table_where, layer_where, treat_grps, summar character_outer <- get_character_outer(layer) unnested_character <- is_unnested_character(layer) + # Pull out table object to use later + tbl <- env_parent(layer) + meta <- vector('list', length(values[[1]])) # Vectorize across the input data @@ -113,6 +117,7 @@ build_count_meta <- function(layer, table_where, layer_where, treat_grps, summar } row_filter <- list() + aj <- NULL # Pull out the current row's values cur_values <- map(values, ~ .x[i]) @@ -130,21 +135,26 @@ build_count_meta <- function(layer, table_where, layer_where, treat_grps, summar if (summary_var[i] == total_row_label && !count_missings) { # Filter out the missing counts if the total row should exclude missings row_filter <- make_parsed_strings(layer$target_var, list(mlist), negate=TRUE) - } - else if (summary_var[i] %in% names(mlist)) { + } else if (summary_var[i] == missing_subjects_row_label) { + # Special handling for missing subject rows + # Make a meta object for the pop data + pop_filt_inds <- which(filter_variables %in% unlist(list(tbl$treat_var, tbl$cols))) + pop_filt_vars <- filter_variables[pop_filt_inds] + pop_filt_vals <- filter_values[pop_filt_inds] + pop_meta <- build_meta(tbl$pop_where, quo(TRUE), treat_grps, pop_filt_vars, pop_filt_vals) + aj <- new_anti_join(join_meta=pop_meta, on=layer$distinct_by) + } else if (summary_var[i] %in% names(mlist)) { # Get the values for the missing row miss_val <- mlist[which(names(mlist) == summary_var[i])] row_filter <- make_parsed_strings(layer$target_var, list(miss_val)) - } - else if (summary_var[i] != total_row_label) { + } else if (summary_var[i] != total_row_label) { # Subset to outer layer value row_filter <- make_parsed_strings(na_var, summary_var[i]) } add_vars <- append(add_vars, na_var) - } - else { + } else { # Inside the nested layer filter_variables <- variables filter_values <- cur_values @@ -162,6 +172,18 @@ build_count_meta <- function(layer, table_where, layer_where, treat_grps, summar else if (summary_var[i] == total_row_label && !count_missings) { # Filter out the missing counts if the total row should exclude missings row_filter <- make_parsed_strings(layer$target_var, list(mlist), negate=TRUE) + } else if (summary_var[i] == missing_subjects_row_label) { + # Special handling for missing subject rows + # Make a meta object for the pop data + pop_filt_inds <- which(filter_variables %in% unlist(list(tbl$treat_var, tbl$cols))) + pop_filt_vars <- filter_variables[pop_filt_inds] + pop_filt_vals <- filter_values[pop_filt_inds] + # Reset to the pop treat value + pop_filt_vars[[ + which(map_chr(pop_filt_vars, as_label) == as_label(tbl$treat_var)) + ]] <- tbl$pop_treat_var + pop_meta <- build_meta(tbl$pop_where, quo(TRUE), treat_grps, pop_filt_vars, pop_filt_vals) + aj <- new_anti_join(join_meta=pop_meta, on=layer$distinct_by) } else if (!is.na(character_outer) && summary_var[i] == character_outer) { # If the outer layer is a character string then don't provide a filter @@ -176,8 +198,8 @@ build_count_meta <- function(layer, table_where, layer_where, treat_grps, summar # Make the meta object meta[[i]] <- build_meta(table_where, layer_where, treat_grps, filter_variables, filter_values) %>% add_filters_(row_filter) %>% - add_variables_(add_vars) - + add_variables_(add_vars) %>% + add_anti_join_(aj) } meta diff --git a/R/meta.R b/R/meta.R index 78ccaa2f..86da7d37 100644 --- a/R/meta.R +++ b/R/meta.R @@ -221,8 +221,93 @@ print.tplyr_meta <- function(x, ...) { cat("Names:\n") names <- map_chr(x$names, as_label) filters <- map_chr(x$filters, as_label) - cat(" ", paste(names, collapse = ", "), "\n") + cat(" ", paste0(names, collapse = ", "), "\n") cat("Filters:\n") - cat(" ", paste(filters, collapse = ", "), "\n") + cat(" ", paste0(filters, collapse = ", "), "\n") + if (!is.null(x$anti_join)) { + cat("Anti-join:\n") + cat(" Join Meta:\n") + cat(paste0(" ", capture.output(x$anti_join$join_meta), "\n"), sep="") + cat(" On:\n") + aj_on <- map_chr(x$anti_join$on, as_label) + cat(" ", paste0(aj_on, collapse = ", "), "\n") + } invisible() } + +#' Create an tplyr_meta_anti_join object +#' +#' @return tplyr_meta_anti_join object +#' @noRd +new_anti_join <- function(join_meta, on) { + structure( + list( + join_meta = join_meta, + on = on + ), + class="tplyr_meta_anti_join" + ) +} + +#' Internal application of anti_join onto tplyr_meta object +#' @noRd +add_anti_join_ <- function(meta, aj) { + meta$anti_join <- aj + meta +} + +#' Add an anti-join onto a tplyr_meta object +#' +#' An anti-join allows a tplyr_meta object to refer to data that should be +#' extracted from a separate dataset, like the population data of a Tplyr table, +#' that is unavailable in the target dataset. The primary use case for this is +#' the presentation of missing subjects, which in a Tplyr table is presented +#' using the function `add_missing_subjects_row()`. The missing subjects +#' themselves are not present in the target data, and are thus only available in +#' the population data. The `add_anti_join()` function allows you to provide the +#' meta information relevant to the population data, and then specify the `on` +#' variable that should be used to join with the target dataset and find the +#' values present in the population data that are missing from the target data. +#' +#' @param meta A tplyr_meta object referring to the target data +#' @param join_meta A tplyr_meta object referring to the population data +#' @param on A list of quosures containing symbols - most likely set to USUBJID. +#' +#' @return A tplyr_meta object +#' @md +#' @export +#' +#' @examples +#' +#' tm <- tplyr_meta( +#' rlang::quos(TRT01A, SEX, ETHNIC, RACE), +#' rlang::quos(TRT01A == "Placebo", TRT01A == "SEX", ETHNIC == "HISPANIC OR LATINO") +#' ) +#' +#' tm %>% +#' add_anti_join( +#' tplyr_meta( +#' rlang::quos(TRT01A, ETHNIC), +#' rlang::quos(TRT01A == "Placebo", ETHNIC == "HISPANIC OR LATINO") +#' ), +#' on = rlang::quos(USUBJID) +#' ) +add_anti_join <- function(meta, join_meta, on){ + + if (!inherits(meta, 'tplyr_meta')) { + stop("meta must be a tplyr_meta object", call.=FALSE) + } + + if (!inherits(join_meta, 'tplyr_meta')) { + stop("join_meta must be a tplyr_meta object", call.=FALSE) + } + + if (!all(map_lgl(on, ~ is_quosure(.) && quo_is_symbol(.)))) { + stop("on must be provided as a list of names", call.=FALSE) + } + + + aj <- new_anti_join(join_meta, on) + + add_anti_join_(meta, aj) +} diff --git a/R/meta_utils.R b/R/meta_utils.R index fabbbcbd..3d0fc36e 100644 --- a/R/meta_utils.R +++ b/R/meta_utils.R @@ -54,7 +54,7 @@ get_meta_result <- function(x, row_id, column, ...) { get_meta_result.tplyr_table <- function(x, row_id, column, ...) { m <- x$metadata - get_meta_result.data.frame(m, row_id, column) + get_meta_result.data.frame(m, row_id, column, ...) } #' @export @@ -69,6 +69,10 @@ get_meta_result.data.frame <- function(x, row_id, column, ...) { 'column present in the built Tplyr dataframe'), call.=FALSE) } + if (length(list(...)) > 0) { + warning("Extra arguments were provided to get_meta_result() that will not be used.", immediate.=TRUE) + } + # Pull out the cell of interest res <- x[[which(x$row_id == row_id), column]][[1]] @@ -109,6 +113,8 @@ get_meta_result.data.frame <- function(x, row_id, column, ...) { #' @param column The result column of interest, provided as a character string #' @param add_cols Additional columns to include in subset data.frame output #' @param target A data frame to be subset (if not pulled from a Tplyr table) +#' @param pop_data A data frame to be subset through an anti-join (if not pulled +#' from a Tplyr table) #' @param ... additional arguments #' #' @return A data.frame @@ -139,7 +145,8 @@ get_meta_subset <- function(x, row_id, column, add_cols = vars(USUBJID), ...) { #' @export #' @rdname get_meta_subset get_meta_subset.data.frame <- function(x, row_id, column, - add_cols = vars(USUBJID), target = NULL, ...) { + add_cols = vars(USUBJID), + target = NULL, pop_data = NULL, ...) { # Get the metadata object ready m <- get_meta_result(x, row_id, column) @@ -152,9 +159,33 @@ get_meta_subset.data.frame <- function(x, row_id, column, stop("If querying metadata without a tplyr_table, a target must be provided", call.=FALSE) } - target %>% + if (length(list(...)) > 0) { + warning("Extra arguments were provided to get_meta_subset() that will not be used.", immediate.=TRUE) + } + + out <- target %>% filter(!!!m$filters) %>% select(!!!add_cols, !!!m$names) + + if (!is.null(m$anti_join)) { + aj <- m$anti_join + pd <- pop_data %>% + filter(!!!aj$join_meta$filters) %>% + select(!!!aj$on, !!!add_cols, !!!aj$join_meta$names) + + mrg_var <- map_chr(aj$on, as_name) + names(mrg_var) <- mrg_var + + if (!(mrg_var %in% names(pd)) | !(mrg_var %in% names(out))) { + stop(paste0( + "The `on` variable specified is missing from either the target data or the population data subsets.\n ", + "Try adding the `on` variables to the `add_cols` parameter") + ) + } + out <- anti_join(pd, out, by=mrg_var) + } + + out } #' @export @@ -164,13 +195,7 @@ get_meta_subset.tplyr_table <- function(x, row_id, column, add_cols = vars(USUBJ # Get the metadata object ready m <- get_meta_result(x, row_id, column) - if (!inherits(add_cols, 'quosures')) { - stop("add_cols must be provided using `dplyr::vars()`", call.=FALSE) - } - - # Subset and return the data - x$target %>% - filter(!!!m$filters) %>% - select(!!!add_cols, !!!m$names) + get_meta_subset(x$metadata, row_id, column, add_cols = add_cols, + target = x$target, pop_data = x$pop_data) } diff --git a/_pkgdown.yml b/_pkgdown.yml index 8218614e..380b4141 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -62,7 +62,9 @@ reference: - add_total_row - add_total_group - add_risk_diff + - add_missing_subjects_row - set_total_row_label + - set_missing_subjects_row_label - title: Descriptive Statistics Layer Functions desc: Descriptive statistics layer helper functions - contents: @@ -84,6 +86,7 @@ reference: - set_denom_ignore - set_indentation - set_numeric_threshold + - set_limit_data_by - title: Column Headers desc: Column header helpers - contents: @@ -95,6 +98,7 @@ reference: - tplyr_meta - add_variables - add_filters + - add_anti_join - get_metadata - append_metadata - starts_with('get_meta') @@ -109,6 +113,7 @@ reference: - apply_formats - apply_row_masks - collapse_row_labels + - replace_leading_whitespace - str_extract_fmt_group - str_extract_num - str_indent_wrap @@ -130,6 +135,7 @@ reference: - tplyr_adas - tplyr_adlb - tplyr_adsl + - tplyr_adpe - get_data_labels articles: diff --git a/man/add_anti_join.Rd b/man/add_anti_join.Rd new file mode 100644 index 00000000..c6956075 --- /dev/null +++ b/man/add_anti_join.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta.R +\name{add_anti_join} +\alias{add_anti_join} +\title{Add an anti-join onto a tplyr_meta object} +\usage{ +add_anti_join(meta, join_meta, on) +} +\arguments{ +\item{meta}{A tplyr_meta object referring to the target data} + +\item{join_meta}{A tplyr_meta object referring to the population data} + +\item{on}{A list of quosures containing symbols - most likely set to USUBJID.} +} +\value{ +A tplyr_meta object +} +\description{ +An anti-join allows a tplyr_meta object to refer to data that should be +extract from a separate dataset, like the population data of a Tplyr table, +that is unavailable in the target dataset. The primary use case for this is +the presentation of missing subjects, which in a Tplyr table is presented +using the function \code{add_missing_subjects_row()}. The missing subjects +themselves are not present in the target data, and are thus only available in +the population data. The \code{add_anti_join()} function allows you to provide the +meta information relevant to the population data, and then specify the \code{on} +variable that should be used to join with the target dataset and find the +values present in the population data that are missing from the target data. +} +\examples{ + +tm <- tplyr_meta( + rlang::quos(TRT01A, SEX, ETHNIC, RACE), + rlang::quos(TRT01A == "Placebo", TRT01A == "SEX", ETHNIC == "HISPANIC OR LATINO") +) + +tm \%>\% + add_anti_join( + tplyr_meta( + rlang::quos(TRT01A, ETHNIC), + rlang::quos(TRT01A == "Placebo", ETHNIC == "HISPANIC OR LATINO") + ), + on = rlang::quos(USUBJID) + ) +} diff --git a/man/get_meta_subset.Rd b/man/get_meta_subset.Rd index f8028394..725892e0 100644 --- a/man/get_meta_subset.Rd +++ b/man/get_meta_subset.Rd @@ -14,6 +14,7 @@ get_meta_subset(x, row_id, column, add_cols = vars(USUBJID), ...) column, add_cols = vars(USUBJID), target = NULL, + pop_data = NULL, ... ) @@ -32,6 +33,9 @@ string} \item{...}{additional arguments} \item{target}{A data frame to be subset (if not pulled from a Tplyr table)} + +\item{pop_data}{A data frame to be subset through an anti-join (if not pulled +from a Tplyr table)} } \value{ A data.frame diff --git a/tests/testthat/_snaps/count.md b/tests/testthat/_snaps/count.md index 01d31a37..a3c4c87d 100644 --- a/tests/testthat/_snaps/count.md +++ b/tests/testthat/_snaps/count.md @@ -639,3 +639,9 @@ Argument `e` does not inherit "count_layer". Classes: tplyr_layer, desc_layer, environment +# Missing counts on nested count layers function correctly + + Population data was not set separately from the target data. + Missing subject counts may be misleading in this scenario. + Did you mean to use `set_missing_count() instead? + diff --git a/tests/testthat/_snaps/meta.md b/tests/testthat/_snaps/meta.md index 19c02cb6..3103c329 100644 --- a/tests/testthat/_snaps/meta.md +++ b/tests/testthat/_snaps/meta.md @@ -6,6 +6,14 @@ meta must be a tplyr_meta object +--- + + meta must be a tplyr_meta object + +--- + + join_meta must be a tplyr_meta object + --- Filters must be provided as a list of calls @@ -22,6 +30,10 @@ Names must be provided as a list of names +--- + + on must be provided as a list of names + # Metadata extraction and extension error properly t must be a tplyr_table object @@ -75,7 +87,38 @@ Output tplyr_meta: 3 names, 4 filters Names: - a, b, c + a, b, c + Filters: + a == 1, b == 2, c == 3, x == "a" + +# Anti-join extraction works properly + + Population data was not set separately from the target data. + Missing subject counts may be misleading in this scenario. + Did you mean to use `set_missing_count() instead? + +--- + + The `on` variable specified is missing from either the target data or the population data subsets. + Try adding the `on` variables to the `add_cols` parameter + +# Tplyr meta print method works as expected + + Code + print(meta2) + Output + tplyr_meta: 11 names, 5 filters + Names: + TRTP, EFFFL, ITTFL, ANL01FL, SITEGR1, AVISIT, AVISITN, PARAMCD, AVAL, BASE, CHG Filters: - a == 1, b == 2, c == 3, x == "a" + EFFFL == "Y", ITTFL == "Y", PARAMCD == "ACTOT", ANL01FL == "Y", AVISITN == 24 + Anti-join: + Join Meta: + tplyr_meta: 4 names, 2 filters + Names: + TRT01P, EFFFL, ITTFL, SITEGR1 + Filters: + EFFFL == "Y", ITTFL == "Y" + On: + USUBJID diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index c8fc41ce..546da515 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -1094,14 +1094,17 @@ test_that("Missing counts on nested count layers function correctly", { expect_equal(tail(x, 1)$ord_layer_2, 99999) # Also test that label reassignment flows - x <- tplyr_table(tplyr_adsl, TRT01A) %>% - add_layer( - group_count(vars(SEX, RACE)) %>% - set_order_count_method(c("byfactor", "byvarn")) %>% - add_missing_subjects_row(f_str("xx (XX.x%)", distinct_n, distinct_pct), sort_value = 99999) %>% - set_missing_subjects_row_label("New label") - ) %>% - build() + # The warning here is intentional + expect_snapshot_warning({ + x <- tplyr_table(tplyr_adsl, TRT01A) %>% + add_layer( + group_count(vars(SEX, RACE)) %>% + set_order_count_method(c("byfactor", "byvarn")) %>% + add_missing_subjects_row(f_str("xx (XX.x%)", distinct_n, distinct_pct), sort_value = 99999) %>% + set_missing_subjects_row_label("New label") + ) %>% + build() + }) expect_equal(filter(x, row_label2 == " New label")$ord_layer_2, c(99999, 99999)) }) diff --git a/tests/testthat/test-meta.R b/tests/testthat/test-meta.R index 3400c209..a8b8e9b2 100644 --- a/tests/testthat/test-meta.R +++ b/tests/testthat/test-meta.R @@ -86,6 +86,8 @@ test_that("Metadata creation errors generate properly", { # Not providing metadata object expect_snapshot_error(add_variables(mtcars, quos(a))) expect_snapshot_error(add_filters(mtcars, quos(a==1))) + expect_snapshot_error(add_anti_join(mtcars, m, quos(a==1))) + expect_snapshot_error(add_anti_join(m, mtcars, quos(a==1))) # Didn't provide filter expect_snapshot_error(tplyr_meta(quos(a), 'x')) @@ -94,6 +96,7 @@ test_that("Metadata creation errors generate properly", { # Didn't provide names expect_snapshot_error(tplyr_meta('x')) expect_snapshot_error(add_variables(m, 'x')) + expect_snapshot_error(add_anti_join(m, m, 'x')) }) @@ -105,9 +108,12 @@ test_that("Exported metadata function construct metadata properly", { m <- add_variables(m, quos(x)) m <- add_filters(m, quos(x=="a")) + m2 <- add_anti_join(m, m, quos(y)) expect_equal(m$names, quos(a, b, c, x)) expect_equal(m$filters, quos(a==1, b==2, c==3, x=="a")) + expect_equal(m2$anti_join$join_meta, m) + expect_equal(m2$anti_join$on, quos(y)) }) test_that("Descriptive Statistics metadata backend assembles correctly", { @@ -352,3 +358,99 @@ test_that("Metadata print method is accurate", { x <- tplyr_meta(quos(a, b, c), quos(a==1, b==2, c==3, x=="a")) expect_snapshot(print(x)) }) + + +test_that("Anti-join extraction works properly", { + + # This is purposefully a convoluted warning that's unrealistic, hence the + # warning that's generating. + expect_snapshot_warning({ + t <- tplyr_table(tplyr_adsl, TRT01A, cols = ETHNIC) %>% + add_layer( + group_count(RACE, by = SEX) %>% + set_distinct_by(USUBJID) %>% + add_missing_subjects_row() + ) + }) + + x <- build(t, metadata=TRUE) + + # Check that the object looks right + res <- get_meta_result(t, 'c7_1', 'var1_Placebo_HISPANIC OR LATINO') + + expect_equal(unname(map_chr(res$names, as_label)), c("TRT01A", "SEX", "ETHNIC", "RACE")) + expect_equal( + unname(map_chr(res$filters, as_label)), + c("TRT01A == c(\"Placebo\")", "SEX == c(\"F\")", "ETHNIC == c(\"HISPANIC OR LATINO\")", + "TRUE", "TRUE") + ) + expect_equal(unname(map_chr(res$anti_join$join_meta$names, as_label)), c("TRT01A", "ETHNIC")) + expect_equal( + unname(map_chr(res$anti_join$join_meta$filters, as_label)), + c("TRT01A == c(\"Placebo\")", "ETHNIC == c(\"HISPANIC OR LATINO\")", "TRUE", "TRUE") + ) + expect_equal(as_label(res$anti_join$on[[1]]), "USUBJID") + + # Variables needed for the merge aren't there + expect_snapshot_error(get_meta_subset(t, 'c7_1', 'var1_Placebo_HISPANIC OR LATINO', add_cols = quos(SITEID))) + + + sbst <- get_meta_subset(t, 'c7_1', 'var1_Placebo_HISPANIC OR LATINO') + + + cmp <- tplyr_adsl %>% filter( + USUBJID == "01-701-1023" + ) + + # The counted subjects will include female, so this subject would have to be male + # Again - this is a weird example that wouldn't be used in practice, but this is the + # row variable + expect_true(cmp$SEX == "M") + # Since this is column, these would both match the metadata + expect_true(cmp$TRT01A == "Placebo") + expect_true(cmp$ETHNIC == "HISPANIC OR LATINO") + + # and then selecting out the columns these should match + expect_equal( + sbst, + cmp %>% + select(USUBJID, TRT01A, ETHNIC) + ) + + # Now for a real example, but also test for nested counts + t <- tplyr_table(tplyr_adae, TRTA) %>% + set_pop_data(tplyr_adsl) %>% + set_pop_treat_var(TRT01A) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) %>% + set_distinct_by(USUBJID) %>% + add_missing_subjects_row(f_str("xx (XX.x%)", distinct_n, distinct_pct), sort_value = Inf) + ) + + x <- build(t, metadata=TRUE) + + sbst <- get_meta_subset(t, 'c23_1', 'var1_Placebo') + + # If you manually check out x, the count here is 65 + expect_equal(nrow(sbst), 65) + expect_equal(unique(sbst$TRT01A), "Placebo") + +}) + +test_that("Tplyr meta print method works as expected", { + meta <- tplyr_meta( + names = quos(TRTP, EFFFL, ITTFL, ANL01FL, SITEGR1, AVISIT, AVISITN, PARAMCD, AVAL, BASE, CHG), + filters = quos(EFFFL == "Y", ITTFL == "Y", PARAMCD == "ACTOT", ANL01FL == "Y", AVISITN == 24) + ) + + meta2 <- meta %>% + add_anti_join( + join_meta = tplyr_meta( + names = quos(TRT01P, EFFFL, ITTFL, SITEGR1), + filters = quos(EFFFL == "Y", ITTFL == "Y") + ), + on = quos(USUBJID) + ) + + expect_snapshot(print(meta2)) +}) diff --git a/vignettes/custom-metadata.Rmd b/vignettes/custom-metadata.Rmd index 6e9f2fd2..108a6f05 100644 --- a/vignettes/custom-metadata.Rmd +++ b/vignettes/custom-metadata.Rmd @@ -159,6 +159,21 @@ When building a data frame for use with `tplyr_table` metadata, there are really The `row_id` values built by **Tplyr** will always follow the format "n_n", where the first letter of the layer type will either be "c", "d", or "s". The next number is the layer number (i.e. the order in which the layer was inserted to the **Tplyr** table), and then finally the row of that layer within the output. For example, the third row of a count layer that was the second layer in the table would have a `row_id` of "c2_3". In this example, I chose "x4_n" as the format for the "x" to symbolize custom, and these data can be thought of as the fourth layer. That said, these values would typically be masked by the viewer of the table so they really just need to be unique - so you can choose whatever you want. +### Anti-joins + +If the custom metadata you're constructing requires references to data outside your target dataset, this is also possible with a `tplyr_meta` object. If you're looking for non-overlap with the target dataset, you can use an anti-join. Anti-joins can be added to a `tplyr_meta` object using the `add_anti_join()` function. + + +```{r anti_join1} +meta %>% + add_anti_join( + join_meta = tplyr_meta( + names = quos(TRT01P, EFFFL, ITTFL, SITEGR1), + filters = quos(EFFFL == "Y", ITTFL == "Y") + ), + on = quos(USUBJID) + ) +``` ## Appending Existing **Tplyr** Metadata Now that we've created our custom extension of the **Tplyr** metadata, let's extend the existing data frame. To do this, **Tplyr** has the function `append_metadata()`: diff --git a/vignettes/metadata.Rmd b/vignettes/metadata.Rmd index b36b4e84..abdf4e78 100644 --- a/vignettes/metadata.Rmd +++ b/vignettes/metadata.Rmd @@ -119,6 +119,51 @@ cat(c("tplyr_adsl %>%\n", )) ``` +### Anti Joins + +Most data presented within a table refers back to the target dataset from which data are being summarized. In some cases, data presented may refer to information _excluded_ from the summary. This is the case when you use the **Tplyr** function `add_missing_subjects_row()`. In this case, the counts presented refer to data excluded from the target which are present in the population data. The metadata thus needs to refer to that excluded data. To handle this, there's an additional field called an 'Anti Join'. Consider this example: + +```{r anti_join1} +t <- tplyr_table(tplyr_adae, TRTA) %>% + set_pop_data(tplyr_adsl) %>% + set_pop_treat_var(TRT01A) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) %>% + set_distinct_by(USUBJID) %>% + add_missing_subjects_row(f_str("xx (XX.x%)", distinct_n, distinct_pct), sort_value = Inf) + ) + +x <- build(t, metadata=TRUE) + +tail(x) %>% + select(starts_with('row'), var1_Placebo) %>% + kable() +``` + +The missing row in this example counts the subjects within their respective treatment groups who do *not* have any adverse events for the body system "SKIN AND SUBCUTANEOUS TISSUE DISORDERS". Here's what the metadata for the result for the Placebo treatment group looks like. + +```{r anti_join2} +m <- get_meta_result(t, 'c23_1', 'var1_Placebo') +m +``` + +This result has the addition field of 'Anti-join'. This element has two fields, which are the join metadata, and the "on" field, which specifies a merging variable to be used when "anti-joining" with the target data. The join metadata here refers to the data of interest from the population data. Note that while the metadata for the target data has variable names and filter conditions referring to AEBODSYS and AEDECOD, these variables are _not_ present within the join metadata, because that information is not present within the population data. + +While the usual joins we work with focus on the overlap between two sets, an anti-join looks at the non-overlap. The metadata provided here will specifically give us "The subjects within the Placebo treatment group who do **not** have an adverse event within the body system 'SKIN AND SUBCUTANEOUS TISSUE DISORDERS'". + +Extracting this metadata works very much the same way as extracting other results. + +```{r anti_join3} +head(get_meta_subset(t, 'c23_1', 'var1_Placebo')) +``` + +If you're not working with the `tplyr_table` object, then there's some additional information you need to provide to the function. + +```{r anti_join4} +head(get_meta_subset(t$metadata, 'c23_1', 'var1_Placebo', + target=t$target, pop_data=t$pop_data)) +``` + ``` ```{r to string content, results='asis', echo=FALSE} cat(c("tplyr_adsl %>%\n",