From a074e49d99b1b86f869d05bacd690d328693a67c Mon Sep 17 00:00:00 2001 From: Matthew Phelps Date: Mon, 11 Mar 2024 14:16:01 +0100 Subject: [PATCH 1/7] Add lighter test of targets pipeline --- tests/testthat/test-discordent_columns.r | 87 ++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 tests/testthat/test-discordent_columns.r diff --git a/tests/testthat/test-discordent_columns.r b/tests/testthat/test-discordent_columns.r new file mode 100644 index 0000000..fba1d65 --- /dev/null +++ b/tests/testthat/test-discordent_columns.r @@ -0,0 +1,87 @@ +test_that("Discordant columns in result data model", { + # SETUP ------------------------------------------------------------------- + mk_endpoint_def <- function() { + ep <- chef::mk_endpoint_str( + study_metadata = list(), + pop_var = "SAFFL", + pop_value = "Y", + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + stratify_by = list(c("SEX", "AGEGR1")), + data_prepare = mk_adae, + endpoint_label = "A", + custom_pop_filter = + "TRT01A %in% c('Placebo', 'Xanomeline High Dose')", + group_by = list(list( + AESOC = c(), AESEV = c() + )), + stat_by_strata_by_trt = list("E" = chefStats::n_event), + stat_by_strata_across_trt = list( + "RR" = chefStats::RR, + "OR" = chefStats::OR + ), + stat_across_strata_across_trt = list("P-interaction" = chefStats::p_val_interaction) + ) + ep2 <- chef::mk_endpoint_str( + data_prepare = mk_advs, + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + pop_var = "SAFFL", + pop_value = "Y", + stratify_by = list(c("AGEGR1", "SEX")), + stat_by_strata_by_trt = list(chefStats::demographics_counts), + endpoint_label = "Demographics endpoint (categorical measures)" + ) + data.table::rbindlist(list(ep, ep2)) + } + + mk_advs <- function(study_metadata) { + # Read ADSL + adsl <- data.table::as.data.table(pharmaverseadam::adsl) + + # Filter treatment arms + adsl <- adsl[adsl$TRT01A %in% c("Placebo", "Xanomeline High Dose")] + adsl[1, AGEGR1 := NA_character_] + adsl[2:10, SEX := NA_character_] + + # Read ADVS + advs <- data.table::as.data.table(pharmaverseadam::advs) + + # Identify baseline body weight + advs_bw <- advs[advs$PARAMCD == "WEIGHT" & advs$VISIT == "BASELINE"] + + # Create new variable BW_BASELINE + advs_bw[["BW_BASELINE"]] <- advs_bw[["AVAL"]] + + # Merge ADSL, ADAE and baseline body weight from ADVS + adam_out <- + merge(adsl, advs_bw[, c("BW_BASELINE", "USUBJID")], by = "USUBJID", all.x = TRUE) + + return(adam_out) + } + + + # This is needed because mk_adcm it is calling from a new R session, it + # doesn't have access to the helper-* functions from chef + path <- + system.file("templates", package = "chef") |> + file.path("template-pipeline.R") + tmp <- readLines(path) + + tar_dir({ + dir.create("R") + dump("mk_adae", file = "R/mk_adae.R") + dump("mk_advs", file = "R/mk_advs.R") + dump("mk_endpoint_def", file = "R/mk_endpoint_def.R") + + x <- + whisker::whisker.render(tmp, data = list(r_script_dir = "R/")) + writeLines(whisker::whisker.render(tmp, data = list( + r_script_dir = + "R/" + )), con = "_targets.R") + browser() + tar_make() + x <- tar_meta() |> data.table::setDT() + }) +}) From ced9fd0d733b13584e327ec1eaed92359b2362df Mon Sep 17 00:00:00 2001 From: nsjohnsen Date: Wed, 13 Mar 2024 16:43:31 +0100 Subject: [PATCH 2/7] wip --- R/expand_endpoints.R | 13 ++++++------- tests/testthat/test-discordent_columns.r | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/R/expand_endpoints.R b/R/expand_endpoints.R index 693a8db..405959a 100644 --- a/R/expand_endpoints.R +++ b/R/expand_endpoints.R @@ -20,10 +20,11 @@ expand_over_endpoints <- function(ep, analysis_data_container) { ep_with_data[, expand_specification := llist(define_expanded_ep(dat[[1]], group_by[[1]])), by = 1:nrow(ep_with_data)] ep_with_data[["dat"]] <- NULL - ep_expanded <- + ep_expanded_1 <- ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT() - ep_expanded_2 <- add_missing_columns(ep_expanded) + ep_expanded_2 <- add_missing_columns(ep_expanded_1) + ep_expanded_2[, endpoint_id := add_ep_id(.SD, .BY), by = endpoint_spec_id] @@ -76,7 +77,8 @@ expand_over_endpoints <- function(ep, analysis_data_container) { "crit_by_strata_by_trt", "crit_by_strata_across_trt", "fn_type", - "fn_hash" + "fn_hash", + "expand_specification" ) ) @@ -225,16 +227,13 @@ add_ep_id <- function(x, grp) { add_missing_columns <- function(x){ - if(length(intersect(c("endpoint_group_filter", "empty", "endpoint_group_metadata"), names(x)))==2){ + if(length(intersect(c("endpoint_group_filter", "endpoint_group_metadata"), names(x)))==2){ return(x) } x1 <- copy(x) if(length(intersect(c("endpoint_group_filter"), names(x)))==0){ x1[, endpoint_group_filter:=NA] } - if(length(intersect(c("empty"), names(x)))==0){ - x1[, empty:=NA] - } if(length(intersect(c("endpoint_group_metadata"), names(x)))==0){ x1[, endpoint_group_metadata:=list()] } diff --git a/tests/testthat/test-discordent_columns.r b/tests/testthat/test-discordent_columns.r index fba1d65..0783505 100644 --- a/tests/testthat/test-discordent_columns.r +++ b/tests/testthat/test-discordent_columns.r @@ -80,7 +80,7 @@ test_that("Discordant columns in result data model", { r_script_dir = "R/" )), con = "_targets.R") - browser() + #browser() tar_make() x <- tar_meta() |> data.table::setDT() }) From 7c8da2a0885a3451565f9032405cc3901a45e636 Mon Sep 17 00:00:00 2001 From: nsjohnsen Date: Thu, 14 Mar 2024 10:26:13 +0100 Subject: [PATCH 3/7] Preparing bugfix for validation --- R/expand_endpoints.R | 44 +++++++++++++--------------------- man/add_forced_group_levels.Rd | 4 ++-- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/R/expand_endpoints.R b/R/expand_endpoints.R index 405959a..77ba17b 100644 --- a/R/expand_endpoints.R +++ b/R/expand_endpoints.R @@ -20,17 +20,21 @@ expand_over_endpoints <- function(ep, analysis_data_container) { ep_with_data[, expand_specification := llist(define_expanded_ep(dat[[1]], group_by[[1]])), by = 1:nrow(ep_with_data)] ep_with_data[["dat"]] <- NULL - ep_expanded_1 <- - ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT() - ep_expanded_2 <- add_missing_columns(ep_expanded_1) - - ep_expanded_2[, endpoint_id := add_ep_id(.SD, .BY), by = - endpoint_spec_id] + # Expand by groups. If no grouping is present, then add empty group related columns + if(any(!is.na(ep_with_data$expand_specification))){ + ep_exp <- ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT() + }else{ + ep_exp <- ep_with_data[, .SD, .SDcols = setdiff(names(ep_with_data), "expand_specification")] + ep_exp[, endpoint_group_filter := NA] + ep_exp[, endpoint_group_metadata := list()] + } + + ep_exp[, endpoint_id := add_ep_id(.SD, .BY), by = endpoint_spec_id] # Complete endpoint labels by replacing keywords with values - nm_set <- names(ep_expanded_2) - ep_expanded_2[,endpoint_label_evaluated := apply(ep_expanded_2, 1, function(x){ + nm_set <- names(ep_exp) + ep_exp[,endpoint_label_evaluated := apply(ep_exp, 1, function(x){ xlab <- x[["endpoint_label"]] @@ -62,12 +66,12 @@ expand_over_endpoints <- function(ep, analysis_data_container) { } return(xlab) })] - ep_expanded_2[["endpoint_label"]] <- NULL - setnames(ep_expanded_2, "endpoint_label_evaluated", "endpoint_label") + ep_exp[["endpoint_label"]] <- NULL + setnames(ep_exp, "endpoint_label_evaluated", "endpoint_label") keep <- setdiff( - names(ep_expanded_2), + names(ep_exp), c( "data_prepare", "stat_by_strata_by_trt", @@ -82,7 +86,7 @@ expand_over_endpoints <- function(ep, analysis_data_container) { ) ) -out <- ep_expanded_2[, .SD, .SDcols=keep] +out <- ep_exp[, .SD, .SDcols=keep] setkey(out, key_analysis_data) out[] } @@ -225,22 +229,6 @@ add_ep_id <- function(x, grp) { ))] } - -add_missing_columns <- function(x){ - if(length(intersect(c("endpoint_group_filter", "endpoint_group_metadata"), names(x)))==2){ - return(x) - } - x1 <- copy(x) - if(length(intersect(c("endpoint_group_filter"), names(x)))==0){ - x1[, endpoint_group_filter:=NA] - } - if(length(intersect(c("endpoint_group_metadata"), names(x)))==0){ - x1[, endpoint_group_metadata:=list()] - } - - x1 -} - #' Add forced group levels #' #' @description Expand the set of unique group levels of one grouping variables in a table containing all combinations of one or more grouping variables. diff --git a/man/add_forced_group_levels.Rd b/man/add_forced_group_levels.Rd index 6227bd1..a64fb13 100644 --- a/man/add_forced_group_levels.Rd +++ b/man/add_forced_group_levels.Rd @@ -7,9 +7,9 @@ add_forced_group_levels(combos_all, forced_group_levels) } \arguments{ -\item{combos_all}{A data.table containing all combinations of group levels found in the analysis data} +\item{combos_all}{A data.table containing all combinations of group levels found in the analysis data.} -\item{forced_group_levels}{A one column data.table containing a required set of group levels of a grouping variable} +\item{forced_group_levels}{A one column data.table containing a required set of group levels of a grouping variable.} } \value{ A data.table containing all combinations of group levels exapnded with the forced grouping levels. From 72f2b81ea1fc940ac1e2dc86f2b3358af3a06e03 Mon Sep 17 00:00:00 2001 From: nsjohnsen Date: Thu, 14 Mar 2024 14:42:13 +0100 Subject: [PATCH 4/7] Adding unit tests --- tests/testthat/test-discordent_columns.r | 87 ------------------- tests/testthat/test-targets.R | 101 ++++++++++++++++++++++- 2 files changed, 99 insertions(+), 89 deletions(-) delete mode 100644 tests/testthat/test-discordent_columns.r diff --git a/tests/testthat/test-discordent_columns.r b/tests/testthat/test-discordent_columns.r deleted file mode 100644 index 0783505..0000000 --- a/tests/testthat/test-discordent_columns.r +++ /dev/null @@ -1,87 +0,0 @@ -test_that("Discordant columns in result data model", { - # SETUP ------------------------------------------------------------------- - mk_endpoint_def <- function() { - ep <- chef::mk_endpoint_str( - study_metadata = list(), - pop_var = "SAFFL", - pop_value = "Y", - treatment_var = "TRT01A", - treatment_refval = "Xanomeline High Dose", - stratify_by = list(c("SEX", "AGEGR1")), - data_prepare = mk_adae, - endpoint_label = "A", - custom_pop_filter = - "TRT01A %in% c('Placebo', 'Xanomeline High Dose')", - group_by = list(list( - AESOC = c(), AESEV = c() - )), - stat_by_strata_by_trt = list("E" = chefStats::n_event), - stat_by_strata_across_trt = list( - "RR" = chefStats::RR, - "OR" = chefStats::OR - ), - stat_across_strata_across_trt = list("P-interaction" = chefStats::p_val_interaction) - ) - ep2 <- chef::mk_endpoint_str( - data_prepare = mk_advs, - treatment_var = "TRT01A", - treatment_refval = "Xanomeline High Dose", - pop_var = "SAFFL", - pop_value = "Y", - stratify_by = list(c("AGEGR1", "SEX")), - stat_by_strata_by_trt = list(chefStats::demographics_counts), - endpoint_label = "Demographics endpoint (categorical measures)" - ) - data.table::rbindlist(list(ep, ep2)) - } - - mk_advs <- function(study_metadata) { - # Read ADSL - adsl <- data.table::as.data.table(pharmaverseadam::adsl) - - # Filter treatment arms - adsl <- adsl[adsl$TRT01A %in% c("Placebo", "Xanomeline High Dose")] - adsl[1, AGEGR1 := NA_character_] - adsl[2:10, SEX := NA_character_] - - # Read ADVS - advs <- data.table::as.data.table(pharmaverseadam::advs) - - # Identify baseline body weight - advs_bw <- advs[advs$PARAMCD == "WEIGHT" & advs$VISIT == "BASELINE"] - - # Create new variable BW_BASELINE - advs_bw[["BW_BASELINE"]] <- advs_bw[["AVAL"]] - - # Merge ADSL, ADAE and baseline body weight from ADVS - adam_out <- - merge(adsl, advs_bw[, c("BW_BASELINE", "USUBJID")], by = "USUBJID", all.x = TRUE) - - return(adam_out) - } - - - # This is needed because mk_adcm it is calling from a new R session, it - # doesn't have access to the helper-* functions from chef - path <- - system.file("templates", package = "chef") |> - file.path("template-pipeline.R") - tmp <- readLines(path) - - tar_dir({ - dir.create("R") - dump("mk_adae", file = "R/mk_adae.R") - dump("mk_advs", file = "R/mk_advs.R") - dump("mk_endpoint_def", file = "R/mk_endpoint_def.R") - - x <- - whisker::whisker.render(tmp, data = list(r_script_dir = "R/")) - writeLines(whisker::whisker.render(tmp, data = list( - r_script_dir = - "R/" - )), con = "_targets.R") - #browser() - tar_make() - x <- tar_meta() |> data.table::setDT() - }) -}) diff --git a/tests/testthat/test-targets.R b/tests/testthat/test-targets.R index 2e988e3..ffb1173 100644 --- a/tests/testthat/test-targets.R +++ b/tests/testthat/test-targets.R @@ -156,7 +156,6 @@ test_that("branching after prepare for stats step works", branch_group_size = 1 ) - dump("n_subev", file = "R/custom_functions.R") dump("n_sub", file = "R/custom_functions.R", append = TRUE) @@ -168,7 +167,7 @@ test_that("branching after prepare for stats step works", expect_true(all(is.na(x$error))) tar_load(ep_stat) expect_equal(NROW(ep_stat), 12) - expect_equal(NCOL(ep_stat), 39) + expect_equal(NCOL(ep_stat), 37) expect_snapshot(ep_stat$stat_result_value) }) @@ -380,3 +379,101 @@ test_that("Only affected branches outdated when new strata added", }) }) + + +test_that("Check for discordant columns in result data model when having one endpoint spec without grouping and one endpoint spec with grouping", { + + # SETUP ------------------------------------------------------------------- + mk_endpoint_def <- function() { + ep <- mk_endpoint_str( + study_metadata = list(), + pop_var = "SAFFL", + pop_value = "Y", + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + stratify_by = list(c("SEX", "AGEGR1")), + data_prepare = mk_adae, + endpoint_label = "A", + custom_pop_filter = + "TRT01A %in% c('Placebo', 'Xanomeline High Dose')", + group_by = list(list( + AESOC = c(), AESEV = c() + )), + stat_by_strata_by_trt = list(c(n_sub)) + ) + + ep2 <- mk_endpoint_str( + data_prepare = mk_advs, + treatment_var = "TRT01A", + treatment_refval = "Xanomeline High Dose", + pop_var = "SAFFL", + pop_value = "Y", + stratify_by = list(c("AGEGR1", "SEX")), + stat_by_strata_by_trt = list(c(n_sub)), + endpoint_label = "Demographics endpoint (categorical measures)" + ) + + data.table::rbindlist(list(ep, ep2)) + } + + mk_advs <- function(study_metadata) { + # Read ADSL + adsl <- data.table::as.data.table(pharmaverseadam::adsl) + + # Filter treatment arms + adsl <- adsl[adsl$TRT01A %in% c("Placebo", "Xanomeline High Dose")] + adsl[1, AGEGR1 := NA_character_] + adsl[2:10, SEX := NA_character_] + + # Read ADVS + advs <- data.table::as.data.table(pharmaverseadam::advs) + + # Identify baseline body weight + advs_bw <- advs[advs$PARAMCD == "WEIGHT" & advs$VISIT == "BASELINE"] + + # Create new variable BW_BASELINE + advs_bw[["BW_BASELINE"]] <- advs_bw[["AVAL"]] + + # Merge ADSL, ADAE and baseline body weight from ADVS + adam_out <- + merge(adsl, advs_bw[, c("BW_BASELINE", "USUBJID")], by = "USUBJID", all.x = TRUE) + + return(adam_out) + } + + # This is needed because mk_adcm it is calling from a new R session, it + # doesn't have access to the helper-* functions from chef + path <- + system.file("templates", package = "chef") |> + file.path("template-pipeline.R") + tmp <- readLines(path) + +# ACT --------------------------------------------------------------------- + + tar_dir({ + dir.create("R") + dump("n_sub", file = "R/custom_functions.R") + dump("mk_adae", file = "R/mk_adae.R") + dump("mk_advs", file = "R/mk_advs.R") + dump("mk_endpoint_def", file = "R/mk_endpoint_def.R") + + x <- whisker::whisker.render(tmp, data = list(r_script_dir = "R/")) + writeLines(whisker::whisker.render(tmp, data = list( + r_script_dir = "R/")), con = "_targets.R") + + tar_make() + +# EXPECT ------------------------------------------------------------------ + + + targets::tar_load(ep_stat) + expect_equal(nrow(ep_stat), 700) + expect_equal(ncol(ep_stat), 37) + expect_equal(sum(ep_stat$endpoint_spec_id == 1), 690) + expect_equal(sum(ep_stat$endpoint_spec_id == 2), 10) + + x <- tar_meta() |> data.table::setDT() + expect_false(any(!is.na(x$error))) + + }) +}) From 6e56dc6ed4ee902b62e227fd667f2797666e4515 Mon Sep 17 00:00:00 2001 From: nsjohnsen Date: Thu, 14 Mar 2024 15:53:46 +0100 Subject: [PATCH 5/7] Handling undocumented function parameters --- NAMESPACE | 1 - R/add_event_index.R | 2 ++ R/add_id.R | 2 +- R/apply_criterion.R | 2 ++ R/apply_stats.R | 2 +- R/expand_endpoints.R | 3 +++ R/mk_endpoint_str.R | 5 +++- R/mk_filtered_endpoint_dt.R | 41 -------------------------------- R/prepare_for_stats.R | 2 ++ R/try_and_validate.R | 12 ---------- R/use_chef.R | 4 +++- man/add_event_index.Rd | 2 ++ man/add_id.Rd | 2 +- man/apply_criterion_by_strata.Rd | 2 ++ man/apply_criterion_endpoint.Rd | 2 ++ man/apply_stats.Rd | 6 ++--- man/define_expanded_ep.Rd | 3 +++ man/expand_ep_for_stats.Rd | 2 ++ man/expand_over_endpoints.Rd | 2 ++ man/join_adam.Rd | 28 ---------------------- man/mk_endpoint_str.Rd | 6 +++++ man/prepare_for_stats.Rd | 2 ++ man/try_and_validate.Rd | 37 ++++++++++++++++++++++++++++ man/use_chef.Rd | 16 ++++++++----- 24 files changed, 90 insertions(+), 96 deletions(-) delete mode 100644 man/join_adam.Rd create mode 100644 man/try_and_validate.Rd diff --git a/NAMESPACE b/NAMESPACE index 020ffb9..ef0e530 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,7 +18,6 @@ export(format_stats_results) export(group_ep_for_targets) export(index_expanded_ep_groups) export(index_non_null_group_level) -export(join_adam) export(llist) export(load_debug_session) export(mk_endpoint_str) diff --git a/R/add_event_index.R b/R/add_event_index.R index c99d75c..76a9deb 100644 --- a/R/add_event_index.R +++ b/R/add_event_index.R @@ -42,6 +42,8 @@ create_flag <- function(dat, var_value_pairs=NULL, singletons=NULL) { #' of the columns `pop_var`, `pop_value`, `period_var`, `period_value`, #' `endpoint_filter`, `endpoint_group_filter`, and `custom_pop_filter`, which #' are used to define the conditions for event indexing. +#' +#' @param analysis_data_container A data.table containing the analysis data. #' #' @return A `data.table` similar to the input but with an additional #' `event_index` column, which contains the indices of events as determined by diff --git a/R/add_id.R b/R/add_id.R index 39f32a1..c22d52f 100644 --- a/R/add_id.R +++ b/R/add_id.R @@ -1,6 +1,6 @@ #' Add ID to user-defined endpoint groups #' -#' @param ep +#' @param ep A `data.table` containing endpoint definitions. #' #' @return data.table #' @export diff --git a/R/apply_criterion.R b/R/apply_criterion.R index aa1260b..2ab1ff0 100644 --- a/R/apply_criterion.R +++ b/R/apply_criterion.R @@ -6,6 +6,7 @@ #' #' @param ep A `data.table` containing expanded endpoint definitions and #' associated data, typically the output from `add_event_index`. +#' @param analysis_data_container data.table containing the analysis data. #' @param fn_map A `data.table` mapping endpoint definitions to criterion #' functions. #' @@ -60,6 +61,7 @@ apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) { #' #' @param ep A `data.table` containing endpoint data with applied endpoint #' criteria, typically the output from `apply_criterion_endpoint`. +#' @param analysis_data_container data.table containing the analysis data. #' @param fn_map A `data.table` mapping endpoint definitions to by-strata #' criteria functions. #' @param type The type of criterion to apply, either diff --git a/R/apply_stats.R b/R/apply_stats.R index 4a25dd0..e8e3125 100644 --- a/R/apply_stats.R +++ b/R/apply_stats.R @@ -6,7 +6,7 @@ #' #' @param ep A `data.table` containing prepared endpoint data for statistical #' analysis, typically the output from `prepare_for_stats`. -#' @param fn_map A `data_table` mapping endpoint definitions to statistical +#' @param analysis_data_container data.table containing the analysis data. #' functions. #' @param type The type of statistical function. Can be one of #' "stat_by_strata_by_trt", "stat_by_strata_across_trt", or "stat_across_strata_across_trt" diff --git a/R/expand_endpoints.R b/R/expand_endpoints.R index 77ba17b..b29d9b5 100644 --- a/R/expand_endpoints.R +++ b/R/expand_endpoints.R @@ -11,6 +11,7 @@ #' @param ep A `data.table` containing endpoint definitions, where each row #' corresponds to a different endpoint and contains relevant attributes such #' as the endpoint name, type, and criteria. +#' @param analysis_data_container data.table containing the analysis data. #' @return A `data.table` where each row corresponds to an expanded endpoint #' definition #' @export @@ -104,6 +105,8 @@ out[] #' @param group_by A list specifying the grouping for endpoints, where #' each element corresponds to a variable used for grouping endpoints and #' contains the levels for that grouping variable. +#' @param forced_group_levels data.table (optional). Table with group levels +#' that must be included in the expansion, regardless of `group_by`. #' @param col_prefix A prefix used to create the names of the metadata and #' filter columns in the output `data.table`. Defaults to "endpoint_group". #' diff --git a/R/mk_endpoint_str.R b/R/mk_endpoint_str.R index 9d0b71f..549a842 100644 --- a/R/mk_endpoint_str.R +++ b/R/mk_endpoint_str.R @@ -3,6 +3,7 @@ #' @param study_metadata List. Metadata describing the clinical study. #' @param pop_var Character. #' @param pop_value Character. +#' @param custom_pop_filter Character. #' @param treatment_var Character. #' @param treatment_refval Character. #' @param period_var Character. @@ -14,10 +15,12 @@ #' @param data_prepare List. #' @param stat_by_strata_by_trt List. #' @param stat_by_strata_across_trt List. +#' @param stat_across_strata_across_trt List. #' @param crit_endpoint List. #' @param crit_by_strata_by_trt List. #' @param crit_by_strata_across_trt List. #' @param only_strata_with_events Boolean. +#' @param env Environment. #' #' @return A data.table containing the endpoint specification. #' @export @@ -25,7 +28,7 @@ mk_endpoint_str <- function(study_metadata = NULL, pop_var = NULL, pop_value = NULL, - custom_pop_filter=NA_character_, + custom_pop_filter = NA_character_, treatment_var = NULL, treatment_refval = NULL, period_var = NA_character_, diff --git a/R/mk_filtered_endpoint_dt.R b/R/mk_filtered_endpoint_dt.R index dd2f6d9..da8798a 100644 --- a/R/mk_filtered_endpoint_dt.R +++ b/R/mk_filtered_endpoint_dt.R @@ -1,44 +1,3 @@ -#' Filter adam data -#' -#' @param ep_unnest data.table. An unnested endpoint definition table -#' @param adam_db data.table. A table containing the adam datasets associated -#' with each data_prepare -#' -#' @return a data.table with the filtered adam data. -#' @export -#' -join_adam <- - function(ep, - ep_fn_map, - adam_db, - filter_pop = TRUE, - filter_period = TRUE, - filter_trt = TRUE, - filter_user_defined = TRUE) { - checkmate::assert_data_table(ep) - checkmate::assert_data_table(adam_db) - - - ep_adam <- - merge(ep, ep_fn_map[fn_type == "data_prepare"], by = "endpoint_spec_id") - - ep_adam <- - merge(ep_adam, - adam_db[, .(fn_hash, dat)], - by = "fn_hash", - all.x = TRUE, - all.y = FALSE) - # We no longer need to track the data generating (aka ADaM) functions. - # Tracking it, in fact, might increase risk of triggering a unneccessary - # re-run in targets if the fn hash changes without any data change - keep_cols <- - setdiff(names(ep_adam), - c("fn_type", "fn", "fn_name", "fn_hash", "fn_callable")) - ep_adam[, .SD, .SDcols = keep_cols] - - - } - #' Filter applying to a data.table #' #' @param adam_dt data.table::data.table diff --git a/R/prepare_for_stats.R b/R/prepare_for_stats.R index d7fa356..10fd1b5 100644 --- a/R/prepare_for_stats.R +++ b/R/prepare_for_stats.R @@ -9,6 +9,7 @@ #' associated data, typically the output from `apply_criterion_by_strata`. #' @param fn_map A `data.table` mapping endpoint definitions to statistical #' functions. +#' @param analysis_data_container data.table containing the analysis data. #' @param type A character string specifying the type of statistics for which #' the data is being prepared. Valid types are "stat_by_strata_by_trt", #' "stat_by_strata_across_trt", and "stat_across_strata_across_trt". @@ -169,6 +170,7 @@ list_group_and_levels <- function( #' @param ep A `data.table` containing endpoint data to be expanded. #' @param grouping_cols A character vector specifying the columns used for #' grouping in the expansion. +#' @param analysis_data_container data.table containing the analysis data. #' @param data_col The name of the column in `ep` that contains the ADaM #' dataset. #' @param id_col The name of the column in `ep` that contains the unique diff --git a/R/try_and_validate.R b/R/try_and_validate.R index dbfb7e0..57d0cd5 100644 --- a/R/try_and_validate.R +++ b/R/try_and_validate.R @@ -14,18 +14,6 @@ #' #' @return The result of the evaluated expression if successful and valid. #' @export - - - -#' -#' @param expr_ Expression of type `call` to be wrapped -#' @param debug_dir directory for debugging dump [/tmp] -#' @param validator validation function, passthrough default. -#' @param stage_debugging -#' -#' @return data.table::data.table -#' @noRd -#' try_and_validate <- function(expr_, expr_name = NA_character_, #TODO Allow forwarding of meaning full names. diff --git a/R/use_chef.R b/R/use_chef.R index 5ab0f1b..27e89b7 100644 --- a/R/use_chef.R +++ b/R/use_chef.R @@ -18,7 +18,7 @@ #' `mk_adam_*()` and criterion functions for example, and any other functions #' that are used in the pipelines. #' @param pipeline_id Character sting. Alphanumeric only -#' @param endpoint_def_fn If you would like to use an existing +#' @param mk_endpoint_def_fn If you would like to use an existing #' `mk_endpoint_def_*()` function as the starting point for the pipeline, #' supply the unquoted function name here. This assumes there are no arguments #' to the function call and the functions have to be available from the global @@ -39,6 +39,8 @@ #' `my_criteria_fn`). The functions have to be available from the global #' environment (i.e if you type `my_criteria_fn()` into the console, it would #' find the function and try to run in). +#' @param branch_group_size Numeric. +#' @param env Environment. #' #' @return Nothing, run for side effects. #' @export diff --git a/man/add_event_index.Rd b/man/add_event_index.Rd index 816da4a..5a185cb 100644 --- a/man/add_event_index.Rd +++ b/man/add_event_index.Rd @@ -12,6 +12,8 @@ typically the output from \code{expand_over_endpoints}. It assumes the inclusion of the columns \code{pop_var}, \code{pop_value}, \code{period_var}, \code{period_value}, \code{endpoint_filter}, \code{endpoint_group_filter}, and \code{custom_pop_filter}, which are used to define the conditions for event indexing.} + +\item{analysis_data_container}{A data.table containing the analysis data.} } \value{ A \code{data.table} similar to the input but with an additional diff --git a/man/add_id.Rd b/man/add_id.Rd index 0634b71..9740c1c 100644 --- a/man/add_id.Rd +++ b/man/add_id.Rd @@ -7,7 +7,7 @@ add_id(ep) } \arguments{ -\item{ep}{} +\item{ep}{A \code{data.table} containing endpoint definitions.} } \value{ data.table diff --git a/man/apply_criterion_by_strata.Rd b/man/apply_criterion_by_strata.Rd index 0b46b4c..91b56cc 100644 --- a/man/apply_criterion_by_strata.Rd +++ b/man/apply_criterion_by_strata.Rd @@ -15,6 +15,8 @@ apply_criterion_by_strata( \item{ep}{A \code{data.table} containing endpoint data with applied endpoint criteria, typically the output from \code{apply_criterion_endpoint}.} +\item{analysis_data_container}{data.table containing the analysis data.} + \item{fn_map}{A \code{data.table} mapping endpoint definitions to by-strata criteria functions.} diff --git a/man/apply_criterion_endpoint.Rd b/man/apply_criterion_endpoint.Rd index a770fcc..c76b5c0 100644 --- a/man/apply_criterion_endpoint.Rd +++ b/man/apply_criterion_endpoint.Rd @@ -10,6 +10,8 @@ apply_criterion_endpoint(ep, analysis_data_container, fn_map) \item{ep}{A \code{data.table} containing expanded endpoint definitions and associated data, typically the output from \code{add_event_index}.} +\item{analysis_data_container}{data.table containing the analysis data.} + \item{fn_map}{A \code{data.table} mapping endpoint definitions to criterion functions.} } diff --git a/man/apply_stats.Rd b/man/apply_stats.Rd index 064e24e..f9152fe 100644 --- a/man/apply_stats.Rd +++ b/man/apply_stats.Rd @@ -15,11 +15,11 @@ apply_stats( \item{ep}{A \code{data.table} containing prepared endpoint data for statistical analysis, typically the output from \code{prepare_for_stats}.} +\item{analysis_data_container}{data.table containing the analysis data. +functions.} + \item{type}{The type of statistical function. Can be one of "stat_by_strata_by_trt", "stat_by_strata_across_trt", or "stat_across_strata_across_trt"} - -\item{fn_map}{A \code{data_table} mapping endpoint definitions to statistical -functions.} } \value{ A \code{data.table} with statistical results appended. diff --git a/man/define_expanded_ep.Rd b/man/define_expanded_ep.Rd index cced73c..a6e63cf 100644 --- a/man/define_expanded_ep.Rd +++ b/man/define_expanded_ep.Rd @@ -18,6 +18,9 @@ define_expanded_ep( each element corresponds to a variable used for grouping endpoints and contains the levels for that grouping variable.} +\item{forced_group_levels}{data.table (optional). Table with group levels +that must be included in the expansion, regardless of \code{group_by}.} + \item{col_prefix}{A prefix used to create the names of the metadata and filter columns in the output \code{data.table}. Defaults to "endpoint_group".} } diff --git a/man/expand_ep_for_stats.Rd b/man/expand_ep_for_stats.Rd index ba4a653..b3a937c 100644 --- a/man/expand_ep_for_stats.Rd +++ b/man/expand_ep_for_stats.Rd @@ -19,6 +19,8 @@ expand_ep_for_stats( \item{grouping_cols}{A character vector specifying the columns used for grouping in the expansion.} +\item{analysis_data_container}{data.table containing the analysis data.} + \item{data_col}{The name of the column in \code{ep} that contains the ADaM dataset.} diff --git a/man/expand_over_endpoints.Rd b/man/expand_over_endpoints.Rd index eb4551a..b22bff4 100644 --- a/man/expand_over_endpoints.Rd +++ b/man/expand_over_endpoints.Rd @@ -10,6 +10,8 @@ expand_over_endpoints(ep, analysis_data_container) \item{ep}{A \code{data.table} containing endpoint definitions, where each row corresponds to a different endpoint and contains relevant attributes such as the endpoint name, type, and criteria.} + +\item{analysis_data_container}{data.table containing the analysis data.} } \value{ A \code{data.table} where each row corresponds to an expanded endpoint diff --git a/man/join_adam.Rd b/man/join_adam.Rd deleted file mode 100644 index c034aac..0000000 --- a/man/join_adam.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mk_filtered_endpoint_dt.R -\name{join_adam} -\alias{join_adam} -\title{Filter adam data} -\usage{ -join_adam( - ep, - ep_fn_map, - adam_db, - filter_pop = TRUE, - filter_period = TRUE, - filter_trt = TRUE, - filter_user_defined = TRUE -) -} -\arguments{ -\item{adam_db}{data.table. A table containing the adam datasets associated -with each data_prepare} - -\item{ep_unnest}{data.table. An unnested endpoint definition table} -} -\value{ -a data.table with the filtered adam data. -} -\description{ -Filter adam data -} diff --git a/man/mk_endpoint_str.Rd b/man/mk_endpoint_str.Rd index 3f42877..cfbee3e 100644 --- a/man/mk_endpoint_str.Rd +++ b/man/mk_endpoint_str.Rd @@ -35,6 +35,8 @@ mk_endpoint_str( \item{pop_value}{Character.} +\item{custom_pop_filter}{Character.} + \item{treatment_var}{Character.} \item{treatment_refval}{Character.} @@ -57,6 +59,8 @@ mk_endpoint_str( \item{stat_by_strata_across_trt}{List.} +\item{stat_across_strata_across_trt}{List.} + \item{crit_endpoint}{List.} \item{crit_by_strata_by_trt}{List.} @@ -64,6 +68,8 @@ mk_endpoint_str( \item{crit_by_strata_across_trt}{List.} \item{only_strata_with_events}{Boolean.} + +\item{env}{Environment.} } \value{ A data.table containing the endpoint specification. diff --git a/man/prepare_for_stats.Rd b/man/prepare_for_stats.Rd index ec1aaad..f2b3730 100644 --- a/man/prepare_for_stats.Rd +++ b/man/prepare_for_stats.Rd @@ -18,6 +18,8 @@ prepare_for_stats( \item{ep}{A \code{data.table} containing expanded endpoint definitions and associated data, typically the output from \code{apply_criterion_by_strata}.} +\item{analysis_data_container}{data.table containing the analysis data.} + \item{fn_map}{A \code{data.table} mapping endpoint definitions to statistical functions.} diff --git a/man/try_and_validate.Rd b/man/try_and_validate.Rd new file mode 100644 index 0000000..13a40a7 --- /dev/null +++ b/man/try_and_validate.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/try_and_validate.R +\name{try_and_validate} +\alias{try_and_validate} +\title{Try and Validate Wrapper for Statistical Functions} +\usage{ +try_and_validate( + expr_, + expr_name = NA_character_, + debug_dir = "debug", + validator = function(expr_result) { + NA_character_ + }, + stage_debugging = TRUE +) +} +\arguments{ +\item{expr_}{The expression of type \code{call} to be evaluated, typically a call +to a statistical function.} + +\item{expr_name}{The name of the expression, used for debugging purposes.} + +\item{debug_dir}{The directory where debugging information will be stored.} + +\item{validator}{A function used to validate the output of the expression.} + +\item{stage_debugging}{A flag indicating whether to stage debugging +information in case of errors.} +} +\value{ +The result of the evaluated expression if successful and valid. +} +\description{ +Attempts to execute a statistical function and validates its +output. If the function fails or the output is invalid, it provides +meaningful error messages and sets up a debugging environment. +} diff --git a/man/use_chef.Rd b/man/use_chef.Rd index b103509..326d0b4 100644 --- a/man/use_chef.Rd +++ b/man/use_chef.Rd @@ -29,6 +29,13 @@ that are used in the pipelines.} \item{pipeline_id}{Character sting. Alphanumeric only} +\item{mk_endpoint_def_fn}{If you would like to use an existing +\verb{mk_endpoint_def_*()} function as the starting point for the pipeline, +supply the unquoted function name here. This assumes there are no arguments +to the function call and the functions have to be available from the global +enironment (i.e if you type \code{my_fun()} into the console, it would find the +function and try to run in)} + \item{mk_adam_fn}{List of functions used for making adam dataset. This is useful if you want to supply already existing functions. This must be a list, and each element must be an unquoted function name (e.g. @@ -46,12 +53,9 @@ must be a list, and each element must be an unquoted function name (e.g. environment (i.e if you type \code{my_criteria_fn()} into the console, it would find the function and try to run in).} -\item{endpoint_def_fn}{If you would like to use an existing -\verb{mk_endpoint_def_*()} function as the starting point for the pipeline, -supply the unquoted function name here. This assumes there are no arguments -to the function call and the functions have to be available from the global -enironment (i.e if you type \code{my_fun()} into the console, it would find the -function and try to run in)} +\item{branch_group_size}{Numeric.} + +\item{env}{Environment.} } \value{ Nothing, run for side effects. From 062c19f3294bc0b758e17d19cd760ad233efb73e Mon Sep 17 00:00:00 2001 From: nsjohnsen Date: Fri, 15 Mar 2024 09:59:19 +0100 Subject: [PATCH 6/7] Replacing = with <- --- R/prepare_for_stats.R | 20 ++++++++-------- R/try_and_validate.R | 54 +++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/R/prepare_for_stats.R b/R/prepare_for_stats.R index 10fd1b5..a80adfb 100644 --- a/R/prepare_for_stats.R +++ b/R/prepare_for_stats.R @@ -61,7 +61,7 @@ prepare_for_stats <- function(ep, nrow(fn_map[fn_type == type]) == 0 | (type == "stat_across_strata_across_trt" & !any(ep_accepted[[grouping_cols[[1]]]] != "TOTAL_")) ){ - return(data.table::data.table(SKIP_=TRUE)) + return(data.table::data.table(SKIP_ = TRUE)) } if (type %in% c("stat_by_strata_by_trt", "stat_by_strata_across_trt")) { @@ -156,8 +156,8 @@ list_group_and_levels <- function( data, grouping_col ){ - l = list(data[, unique(get(grouping_col))]) - names(l) = grouping_col + l <- list(data[, unique(get(grouping_col))]) + names(l) <- grouping_col return (l) } @@ -189,7 +189,7 @@ expand_ep_for_stats <- function( col_prefix ){ - name_expand_col = paste(col_prefix, "expand_spec", sep="_") + name_expand_col <- paste(col_prefix, "expand_spec", sep="_") ep[,"_i_" := .I] setkey(ep, key_analysis_data) @@ -216,7 +216,7 @@ expand_ep_for_stats <- function( ep_exp[,"_i_":= .I] ep_exp_with_data <- ep_exp[analysis_data_container, nomatch = NULL] - filter_col_name = paste(col_prefix, "filter", sep="_") + filter_col_name <- paste(col_prefix, "filter", sep="_") ep_exp_with_data[, cell_index := llist(create_flag(get(data_col)[[1]], singletons = c(get(filter_col_name)[[1]]))), by = "_i_"] @@ -249,17 +249,17 @@ define_expansion_cell_from_data <- function( col_prefix ){ if (is.character(grouping_cols)){ - grouping_cols = c(grouping_cols) + grouping_cols <- c(grouping_cols) } stopifnot(all(grouping_cols %in% names(row))) # Get the actual grouping variables - grouping_col_values = row[, .SD, .SDcols=grouping_cols] - grouping_var_list = vector(mode="list", length(grouping_col_values)) - names(grouping_var_list) = grouping_col_values + grouping_col_values <- row[, .SD, .SDcols=grouping_cols] + grouping_var_list <- vector(mode="list", length(grouping_col_values)) + names(grouping_var_list) <- grouping_col_values if(row[["only_strata_with_events"]]){ - dat <- row[,get(data_col)][[1]][row[["event_index"]]] + dat <- row[,get(data_col)][[1]][row[["event_index"]]] }else{ dat <- row[,get(data_col)][[1]] } diff --git a/R/try_and_validate.R b/R/try_and_validate.R index 57d0cd5..b4218ec 100644 --- a/R/try_and_validate.R +++ b/R/try_and_validate.R @@ -23,7 +23,7 @@ try_and_validate <- function(expr_, }, stage_debugging = TRUE) { # Capture expression information - expr_sub = substitute(expr_) + expr_sub <- substitute(expr_) if (!identical(class(expr_sub), "call")) { stop(sprintf( "Expr_(%s) must be of class `call`. Found: (%s)", @@ -32,7 +32,7 @@ try_and_validate <- function(expr_, )) } - expr_list = as.list(expr_sub) # un evaluated call list + expr_list <- as.list(expr_sub) # un evaluated call list # Find the function definition in the calling env. if (rlang::is_call_simple(expr_sub)) { expr_fn <- get(expr_list[[1]], envir = parent.frame()) @@ -48,8 +48,7 @@ try_and_validate <- function(expr_, err_msg <- paste0("Failed to EVALUATE function with error:", "\n ", expr_result[[1]]) - } else if (!is.na(validator_err <- - validator(expr_result))) { + } else if (!is.na(validator_err <- validator(expr_result))) { #validate output err_msg <- paste("Failed to VALIDATE function output with error:", @@ -64,12 +63,11 @@ try_and_validate <- function(expr_, # Match arguments with formals if possible if (is.primitive(expr_fn)) { - expr_call_char = deparse(expr_sub) - expr_arg_list = expr_list[-1] + expr_call_char <- deparse(expr_sub) + expr_arg_list <- expr_list[-1] } else { - expr_matched = match.call(definition = expr_fn, - call = expr_sub) - expr_call_char = deparse(expr_matched) + expr_matched <- match.call(definition = expr_fn, call = expr_sub) + expr_call_char <- deparse(expr_matched) expr_arg_list <- as.list(expr_matched)[-1] } # Find a name if unset. @@ -91,16 +89,17 @@ try_and_validate <- function(expr_, debug_file <- stage_debug( fn_name = expr_name, fn = expr_fn, - arg_list = lapply(expr_arg_list, eval, envir = parent.frame()), - err_msg = full_error, - debug_dir = debug_dir + arg_list <- lapply(expr_arg_list, eval, envir = parent.frame()), + err_msg <- full_error, + debug_dir <- debug_dir ) full_error <- paste( full_error, "---", sprintf( - "Debugging session created: Launch with:\n chef::load_debug_session('%s')", + "Debugging session created: Launch with:\n + chef::load_debug_session('%s')", debug_file ), "---", @@ -140,8 +139,8 @@ stage_debug <- debug_env[["ns"]] <- search() dir.create(debug_dir, showWarnings = FALSE, recursive = FALSE) - norm_dir = normalizePath(debug_dir) - filepath = file.path(norm_dir, paste0(fn_name, ".Rdata")) + norm_dir <- normalizePath(debug_dir) + filepath <- file.path(norm_dir, paste0(fn_name, ".Rdata")) saveRDS(debug_env, file = filepath) #Set dynamically @@ -165,20 +164,19 @@ load_debug_session <- function(debug_file) { # Get debug env. debug_env <- readRDS(debug_file) cli::cli_h1("Launching debug session for: {.val {debug_env$fn_name}}") - #message(paste0("Launching debug session for: ", debug_env$fn_name)) cli::cli_h3("Original error msg:") cli::cli_par() cli::cli_verbatim(debug_env$err_msg) cli::cli_text("──") cli::cli_end() - #message(paste("Original error msg:", debug_env$err_msg, sep="\n" )) - if (is.primitive(debug_env$fn)) { cli::cli_alert_danger( - "The inspected function ({.val {deparse(debug_env$fn)}}) is a primitive and cannot be inspected using debugonce.\ - You can still load the debug environemnt and inspect inputs and function: readRDS({.path {debug_file}})", + "The inspected function ({.val {deparse(debug_env$fn)}}) is a + primitive and cannot be inspected using debugonce.\ + You can still load the debug environemnt and inspect + inputs and function: readRDS({.path {debug_file}})", wrap = TRUE ) cli::cli_alert_info("Debug session ended") @@ -195,7 +193,8 @@ load_debug_session <- function(debug_file) { extra_libraries <- setdiff(debug_env$ns, search()) if (length(extra_libraries) > 0) { - cli::cli_alert_warning("The following libraries was available at runtime but isn't currently.") + cli::cli_alert_warning("The following libraries was available at runtime + but isn't currently.") cli::cli_li(extra_libraries) } @@ -218,10 +217,10 @@ load_debug_session <- function(debug_file) { #' #' @return An error message if validation fails, otherwise NA. validate_crit_output <- function(output) { - if (!(isTRUE(output) | - isFALSE(output))) { + if (!(isTRUE(output) | isFALSE(output))) { paste( - "The return value from the endpoint criterion function must be a logical of length 1, i.e.", + "The return value from the endpoint criterion + function must be a logical of length 1, i.e.", "TRUE or FALSE" ) } @@ -241,14 +240,15 @@ validate_crit_output <- function(output) { validate_stat_output <- function(output) { # if not a DT return early if (!data.table::is.data.table(output)) { - err_msg <- paste0("Expected (data.table::data.table) Found: ", class(output)) + err_msg <- paste0("Expected (data.table::data.table). Found: ", + class(output)) return(err_msg) } # if DT check if compliant err_messages <- c() - expected_sorted = sort(c("label","description", "qualifiers", "value")) - actual_sorted = sort(names(output)) + expected_sorted <- sort(c("label", "description", "qualifiers", "value")) + actual_sorted <- sort(names(output)) if (!identical(expected_sorted, actual_sorted)) { actual_diff <- setdiff(actual_sorted, expected_sorted) expected_diff <- setdiff(expected_sorted, actual_sorted) From 155ccf1e2cd5462a56f5beeec944d7a38c0a7989 Mon Sep 17 00:00:00 2001 From: nsjohnsen Date: Fri, 15 Mar 2024 10:05:45 +0100 Subject: [PATCH 7/7] Replacing non ASCII characters --- R/try_and_validate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/try_and_validate.R b/R/try_and_validate.R index b4218ec..31956cd 100644 --- a/R/try_and_validate.R +++ b/R/try_and_validate.R @@ -168,7 +168,7 @@ load_debug_session <- function(debug_file) { cli::cli_h3("Original error msg:") cli::cli_par() cli::cli_verbatim(debug_env$err_msg) - cli::cli_text("──") + cli::cli_text("--") cli::cli_end() if (is.primitive(debug_env$fn)) {