Merge pull request #19 from hta-pharma/results_cols

Fixing bug when no grouping is active in any endpoint specifications and revise function parameter documentation
hta-pharma · Mar 15, 2024 · cc49b01 · cc49b01
2 parents c79b7e1 + 155ccf1
commit cc49b01
Show file tree

Hide file tree

Showing 26 changed files with 247 additions and 169 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -18,7 +18,6 @@ export(format_stats_results)
 export(group_ep_for_targets)
 export(index_expanded_ep_groups)
 export(index_non_null_group_level)
-export(join_adam)
 export(llist)
 export(load_debug_session)
 export(mk_endpoint_str)

diff --git a/R/add_event_index.R b/R/add_event_index.R
@@ -42,6 +42,8 @@ create_flag <- function(dat, var_value_pairs=NULL, singletons=NULL) {
 #'   of the columns `pop_var`, `pop_value`, `period_var`, `period_value`,
 #'   `endpoint_filter`, `endpoint_group_filter`, and `custom_pop_filter`, which
 #'   are used to define the conditions for event indexing.
+#' 
+#' @param analysis_data_container A data.table containing the analysis data.
 #'
 #' @return A `data.table` similar to the input but with an additional
 #'   `event_index` column, which contains the indices of events as determined by

diff --git a/R/add_id.R b/R/add_id.R
@@ -1,6 +1,6 @@
 #' Add ID to user-defined endpoint groups
 #'
-#' @param ep
+#' @param ep A `data.table` containing endpoint definitions.
 #'
 #' @return data.table
 #' @export

diff --git a/R/apply_criterion.R b/R/apply_criterion.R
@@ -6,6 +6,7 @@
 #'
 #' @param ep A `data.table` containing expanded endpoint definitions and
 #'   associated data, typically the output from `add_event_index`.
+#' @param analysis_data_container data.table containing the analysis data.
 #' @param fn_map A `data.table` mapping endpoint definitions to criterion
 #'   functions.
 #'
@@ -60,6 +61,7 @@ apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) {
 #'
 #' @param ep A `data.table` containing endpoint data with applied endpoint
 #'   criteria, typically the output from `apply_criterion_endpoint`.
+#' @param analysis_data_container data.table containing the analysis data.
 #' @param fn_map A `data.table` mapping endpoint definitions to by-strata
 #'   criteria functions.
 #' @param type The type of criterion to apply, either

diff --git a/R/apply_stats.R b/R/apply_stats.R
@@ -6,7 +6,7 @@
 #'
 #' @param ep A `data.table` containing prepared endpoint data for statistical
 #'   analysis, typically the output from `prepare_for_stats`.
-#' @param fn_map A `data_table` mapping endpoint definitions to statistical
+#' @param analysis_data_container data.table containing the analysis data.
 #'   functions.
 #' @param type The type of statistical function. Can be one of
 #'   "stat_by_strata_by_trt", "stat_by_strata_across_trt", or "stat_across_strata_across_trt"

diff --git a/R/expand_endpoints.R b/R/expand_endpoints.R
@@ -11,6 +11,7 @@
 #' @param ep A `data.table` containing endpoint definitions, where each row
 #'   corresponds to a different endpoint and contains relevant attributes such
 #'   as the endpoint name, type, and criteria.
+#' @param analysis_data_container data.table containing the analysis data.
 #' @return A `data.table` where each row corresponds to an expanded endpoint
 #'   definition
 #' @export
@@ -20,16 +21,21 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
   ep_with_data[, expand_specification := llist(define_expanded_ep(dat[[1]], group_by[[1]])),
           by = 1:nrow(ep_with_data)]
   ep_with_data[["dat"]] <- NULL
-  ep_expanded <-
-    ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT()
 
-  ep_expanded_2 <- add_missing_columns(ep_expanded)
-  ep_expanded_2[, endpoint_id := add_ep_id(.SD, .BY), by =
-                endpoint_spec_id]
+  # Expand by groups. If no grouping is present, then add empty group related columns
+  if(any(!is.na(ep_with_data$expand_specification))){
+    ep_exp <- ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT()
+  }else{
+    ep_exp <- ep_with_data[, .SD, .SDcols = setdiff(names(ep_with_data), "expand_specification")]
+    ep_exp[, endpoint_group_filter := NA]
+    ep_exp[, endpoint_group_metadata := list()]
+  }
+
+  ep_exp[, endpoint_id := add_ep_id(.SD, .BY), by = endpoint_spec_id]
 
   # Complete endpoint labels by replacing keywords with values
-  nm_set <- names(ep_expanded_2)
-  ep_expanded_2[,endpoint_label_evaluated := apply(ep_expanded_2, 1, function(x){
+  nm_set <- names(ep_exp)
+  ep_exp[,endpoint_label_evaluated := apply(ep_exp, 1, function(x){
 
     xlab <- x[["endpoint_label"]]
 
@@ -61,12 +67,12 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
     }
     return(xlab)
   })]
-  ep_expanded_2[["endpoint_label"]] <- NULL
-  setnames(ep_expanded_2, "endpoint_label_evaluated", "endpoint_label")
+  ep_exp[["endpoint_label"]] <- NULL
+  setnames(ep_exp, "endpoint_label_evaluated", "endpoint_label")
 
   keep <-
     setdiff(
-      names(ep_expanded_2),
+      names(ep_exp),
       c(
         "data_prepare",
         "stat_by_strata_by_trt",
@@ -76,11 +82,12 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
         "crit_by_strata_by_trt",
         "crit_by_strata_across_trt",
         "fn_type",
-        "fn_hash"
+        "fn_hash",
+        "expand_specification"
       )
     )
 
-out <- ep_expanded_2[, .SD, .SDcols=keep]
+out <- ep_exp[, .SD, .SDcols=keep]
 setkey(out, key_analysis_data)
 out[]
 }
@@ -98,6 +105,8 @@ out[]
 #' @param group_by A list specifying the grouping for endpoints, where
 #'   each element corresponds to a variable used for grouping endpoints and
 #'   contains the levels for that grouping variable.
+#' @param forced_group_levels data.table (optional). Table with group levels 
+#'   that must be included in the expansion, regardless of `group_by`.
 #' @param col_prefix A prefix used to create the names of the metadata and
 #'   filter columns in the output `data.table`. Defaults to "endpoint_group".
 #'
@@ -223,25 +232,6 @@ add_ep_id <- function(x, grp) {
   ))]
 }
 
-
-add_missing_columns <- function(x){
-  if(length(intersect(c("endpoint_group_filter", "empty", "endpoint_group_metadata"), names(x)))==2){
-    return(x)
-  }
-  x1 <- copy(x)
-  if(length(intersect(c("endpoint_group_filter"), names(x)))==0){
-    x1[, endpoint_group_filter:=NA]
-  }
-  if(length(intersect(c("empty"), names(x)))==0){
-    x1[, empty:=NA]
-  }
-  if(length(intersect(c("endpoint_group_metadata"), names(x)))==0){
-    x1[, endpoint_group_metadata:=list()]
-  }
-
-  x1
-}
-
 #' Add forced group levels
 #'
 #' @description Expand the set of unique group levels of one grouping variables in a table containing all combinations of one or more grouping variables.

diff --git a/R/mk_endpoint_str.R b/R/mk_endpoint_str.R
@@ -3,6 +3,7 @@
 #' @param study_metadata List. Metadata describing the clinical study.
 #' @param pop_var Character.
 #' @param pop_value Character.
+#' @param custom_pop_filter Character.
 #' @param treatment_var Character.
 #' @param treatment_refval Character.
 #' @param period_var Character.
@@ -14,18 +15,20 @@
 #' @param data_prepare List.
 #' @param stat_by_strata_by_trt List.
 #' @param stat_by_strata_across_trt List.
+#' @param stat_across_strata_across_trt List.
 #' @param crit_endpoint List.
 #' @param crit_by_strata_by_trt List.
 #' @param crit_by_strata_across_trt List.
 #' @param only_strata_with_events Boolean.
+#' @param env Environment.
 #'
 #' @return A data.table containing the endpoint specification.
 #' @export
 #'
 mk_endpoint_str <- function(study_metadata = NULL,
                             pop_var = NULL,
                             pop_value = NULL,
-                            custom_pop_filter=NA_character_,
+                            custom_pop_filter = NA_character_,
                             treatment_var = NULL,
                             treatment_refval = NULL,
                             period_var = NA_character_,

diff --git a/R/mk_filtered_endpoint_dt.R b/R/mk_filtered_endpoint_dt.R
@@ -1,44 +1,3 @@
-#' Filter adam data
-#'
-#' @param ep_unnest data.table. An unnested endpoint definition table
-#' @param adam_db data.table. A table containing the adam datasets associated
-#'   with each data_prepare
-#'
-#' @return a data.table with the filtered adam data.
-#' @export
-#'
-join_adam <-
-  function(ep,
-           ep_fn_map,
-           adam_db,
-           filter_pop = TRUE,
-           filter_period = TRUE,
-           filter_trt = TRUE,
-           filter_user_defined = TRUE) {
-    checkmate::assert_data_table(ep)
-    checkmate::assert_data_table(adam_db)
-
-
-    ep_adam <-
-      merge(ep, ep_fn_map[fn_type == "data_prepare"], by = "endpoint_spec_id")
-
-    ep_adam <-
-      merge(ep_adam,
-            adam_db[, .(fn_hash, dat)],
-            by = "fn_hash",
-            all.x = TRUE,
-            all.y = FALSE)
-    # We no longer need to track the data generating (aka ADaM) functions.
-    # Tracking it, in fact, might increase risk of triggering a unneccessary
-    # re-run in targets if the fn hash changes without any data change
-    keep_cols <-
-      setdiff(names(ep_adam),
-              c("fn_type", "fn", "fn_name", "fn_hash", "fn_callable"))
-    ep_adam[, .SD, .SDcols = keep_cols]
-
-
-  }
-
 #' Filter applying to a data.table
 #'
 #' @param adam_dt data.table::data.table

diff --git a/R/prepare_for_stats.R b/R/prepare_for_stats.R
@@ -9,6 +9,7 @@
 #'   associated data, typically the output from `apply_criterion_by_strata`.
 #' @param fn_map A `data.table` mapping endpoint definitions to statistical
 #'   functions.
+#' @param analysis_data_container data.table containing the analysis data.
 #' @param type A character string specifying the type of statistics for which
 #'   the data is being prepared. Valid types are "stat_by_strata_by_trt",
 #'   "stat_by_strata_across_trt", and "stat_across_strata_across_trt".
@@ -60,7 +61,7 @@ prepare_for_stats <- function(ep,
      nrow(fn_map[fn_type == type]) == 0 |
      (type == "stat_across_strata_across_trt" & !any(ep_accepted[[grouping_cols[[1]]]] != "TOTAL_"))
      ){
-    return(data.table::data.table(SKIP_=TRUE))
+    return(data.table::data.table(SKIP_ = TRUE))
   }
 
   if (type %in% c("stat_by_strata_by_trt", "stat_by_strata_across_trt")) {
@@ -155,8 +156,8 @@ list_group_and_levels <- function(
     data,
     grouping_col
 ){
-  l = list(data[, unique(get(grouping_col))])
-  names(l) = grouping_col
+  l <- list(data[, unique(get(grouping_col))])
+  names(l) <- grouping_col
   return (l)
 }
 
@@ -169,6 +170,7 @@ list_group_and_levels <- function(
 #' @param ep A `data.table` containing endpoint data to be expanded.
 #' @param grouping_cols A character vector specifying the columns used for
 #'   grouping in the expansion.
+#' @param analysis_data_container data.table containing the analysis data.
 #' @param data_col The name of the column in `ep` that contains the ADaM
 #'   dataset.
 #' @param id_col The name of the column in `ep` that contains the unique
@@ -187,7 +189,7 @@ expand_ep_for_stats <- function(
     col_prefix
 ){
 
-  name_expand_col = paste(col_prefix, "expand_spec", sep="_")
+  name_expand_col <- paste(col_prefix, "expand_spec", sep="_")
 
   ep[,"_i_" := .I]
   setkey(ep, key_analysis_data)
@@ -214,7 +216,7 @@ expand_ep_for_stats <- function(
 
   ep_exp[,"_i_":= .I]
   ep_exp_with_data <- ep_exp[analysis_data_container, nomatch = NULL]
-  filter_col_name = paste(col_prefix, "filter", sep="_")
+  filter_col_name <- paste(col_prefix, "filter", sep="_")
   ep_exp_with_data[, cell_index := llist(create_flag(get(data_col)[[1]],
                                                      singletons = c(get(filter_col_name)[[1]]))),
                    by = "_i_"]
@@ -247,17 +249,17 @@ define_expansion_cell_from_data <- function(
     col_prefix
 ){
   if (is.character(grouping_cols)){
-    grouping_cols = c(grouping_cols)
+    grouping_cols <- c(grouping_cols)
   }
   stopifnot(all(grouping_cols %in% names(row)))
 
   # Get the actual grouping variables
-  grouping_col_values = row[, .SD, .SDcols=grouping_cols]
-  grouping_var_list = vector(mode="list", length(grouping_col_values))
-  names(grouping_var_list) = grouping_col_values
+  grouping_col_values <- row[, .SD, .SDcols=grouping_cols]
+  grouping_var_list <- vector(mode="list", length(grouping_col_values))
+  names(grouping_var_list) <- grouping_col_values
 
   if(row[["only_strata_with_events"]]){
-    dat <-  row[,get(data_col)][[1]][row[["event_index"]]]
+    dat <- row[,get(data_col)][[1]][row[["event_index"]]]
   }else{
     dat <- row[,get(data_col)][[1]]
   }