Skip to content

Commit

Permalink
Merge pull request #19 from hta-pharma/results_cols
Browse files Browse the repository at this point in the history
Fixing bug when no grouping is active in any endpoint specifications and revise function parameter documentation
  • Loading branch information
nsjohnsen authored Mar 15, 2024
2 parents c79b7e1 + 155ccf1 commit cc49b01
Show file tree
Hide file tree
Showing 26 changed files with 247 additions and 169 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ export(format_stats_results)
export(group_ep_for_targets)
export(index_expanded_ep_groups)
export(index_non_null_group_level)
export(join_adam)
export(llist)
export(load_debug_session)
export(mk_endpoint_str)
Expand Down
2 changes: 2 additions & 0 deletions R/add_event_index.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ create_flag <- function(dat, var_value_pairs=NULL, singletons=NULL) {
#' of the columns `pop_var`, `pop_value`, `period_var`, `period_value`,
#' `endpoint_filter`, `endpoint_group_filter`, and `custom_pop_filter`, which
#' are used to define the conditions for event indexing.
#'
#' @param analysis_data_container A data.table containing the analysis data.
#'
#' @return A `data.table` similar to the input but with an additional
#' `event_index` column, which contains the indices of events as determined by
Expand Down
2 changes: 1 addition & 1 deletion R/add_id.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Add ID to user-defined endpoint groups
#'
#' @param ep
#' @param ep A `data.table` containing endpoint definitions.
#'
#' @return data.table
#' @export
Expand Down
2 changes: 2 additions & 0 deletions R/apply_criterion.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#'
#' @param ep A `data.table` containing expanded endpoint definitions and
#' associated data, typically the output from `add_event_index`.
#' @param analysis_data_container data.table containing the analysis data.
#' @param fn_map A `data.table` mapping endpoint definitions to criterion
#' functions.
#'
Expand Down Expand Up @@ -60,6 +61,7 @@ apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) {
#'
#' @param ep A `data.table` containing endpoint data with applied endpoint
#' criteria, typically the output from `apply_criterion_endpoint`.
#' @param analysis_data_container data.table containing the analysis data.
#' @param fn_map A `data.table` mapping endpoint definitions to by-strata
#' criteria functions.
#' @param type The type of criterion to apply, either
Expand Down
2 changes: 1 addition & 1 deletion R/apply_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#'
#' @param ep A `data.table` containing prepared endpoint data for statistical
#' analysis, typically the output from `prepare_for_stats`.
#' @param fn_map A `data_table` mapping endpoint definitions to statistical
#' @param analysis_data_container data.table containing the analysis data.
#' functions.
#' @param type The type of statistical function. Can be one of
#' "stat_by_strata_by_trt", "stat_by_strata_across_trt", or "stat_across_strata_across_trt"
Expand Down
52 changes: 21 additions & 31 deletions R/expand_endpoints.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @param ep A `data.table` containing endpoint definitions, where each row
#' corresponds to a different endpoint and contains relevant attributes such
#' as the endpoint name, type, and criteria.
#' @param analysis_data_container data.table containing the analysis data.
#' @return A `data.table` where each row corresponds to an expanded endpoint
#' definition
#' @export
Expand All @@ -20,16 +21,21 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
ep_with_data[, expand_specification := llist(define_expanded_ep(dat[[1]], group_by[[1]])),
by = 1:nrow(ep_with_data)]
ep_with_data[["dat"]] <- NULL
ep_expanded <-
ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT()

ep_expanded_2 <- add_missing_columns(ep_expanded)
ep_expanded_2[, endpoint_id := add_ep_id(.SD, .BY), by =
endpoint_spec_id]
# Expand by groups. If no grouping is present, then add empty group related columns
if(any(!is.na(ep_with_data$expand_specification))){
ep_exp <- ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT()
}else{
ep_exp <- ep_with_data[, .SD, .SDcols = setdiff(names(ep_with_data), "expand_specification")]
ep_exp[, endpoint_group_filter := NA]
ep_exp[, endpoint_group_metadata := list()]
}

ep_exp[, endpoint_id := add_ep_id(.SD, .BY), by = endpoint_spec_id]

# Complete endpoint labels by replacing keywords with values
nm_set <- names(ep_expanded_2)
ep_expanded_2[,endpoint_label_evaluated := apply(ep_expanded_2, 1, function(x){
nm_set <- names(ep_exp)
ep_exp[,endpoint_label_evaluated := apply(ep_exp, 1, function(x){

xlab <- x[["endpoint_label"]]

Expand Down Expand Up @@ -61,12 +67,12 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
}
return(xlab)
})]
ep_expanded_2[["endpoint_label"]] <- NULL
setnames(ep_expanded_2, "endpoint_label_evaluated", "endpoint_label")
ep_exp[["endpoint_label"]] <- NULL
setnames(ep_exp, "endpoint_label_evaluated", "endpoint_label")

keep <-
setdiff(
names(ep_expanded_2),
names(ep_exp),
c(
"data_prepare",
"stat_by_strata_by_trt",
Expand All @@ -76,11 +82,12 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
"crit_by_strata_by_trt",
"crit_by_strata_across_trt",
"fn_type",
"fn_hash"
"fn_hash",
"expand_specification"
)
)

out <- ep_expanded_2[, .SD, .SDcols=keep]
out <- ep_exp[, .SD, .SDcols=keep]
setkey(out, key_analysis_data)
out[]
}
Expand All @@ -98,6 +105,8 @@ out[]
#' @param group_by A list specifying the grouping for endpoints, where
#' each element corresponds to a variable used for grouping endpoints and
#' contains the levels for that grouping variable.
#' @param forced_group_levels data.table (optional). Table with group levels
#' that must be included in the expansion, regardless of `group_by`.
#' @param col_prefix A prefix used to create the names of the metadata and
#' filter columns in the output `data.table`. Defaults to "endpoint_group".
#'
Expand Down Expand Up @@ -223,25 +232,6 @@ add_ep_id <- function(x, grp) {
))]
}


add_missing_columns <- function(x){
if(length(intersect(c("endpoint_group_filter", "empty", "endpoint_group_metadata"), names(x)))==2){
return(x)
}
x1 <- copy(x)
if(length(intersect(c("endpoint_group_filter"), names(x)))==0){
x1[, endpoint_group_filter:=NA]
}
if(length(intersect(c("empty"), names(x)))==0){
x1[, empty:=NA]
}
if(length(intersect(c("endpoint_group_metadata"), names(x)))==0){
x1[, endpoint_group_metadata:=list()]
}

x1
}

#' Add forced group levels
#'
#' @description Expand the set of unique group levels of one grouping variables in a table containing all combinations of one or more grouping variables.
Expand Down
5 changes: 4 additions & 1 deletion R/mk_endpoint_str.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' @param study_metadata List. Metadata describing the clinical study.
#' @param pop_var Character.
#' @param pop_value Character.
#' @param custom_pop_filter Character.
#' @param treatment_var Character.
#' @param treatment_refval Character.
#' @param period_var Character.
Expand All @@ -14,18 +15,20 @@
#' @param data_prepare List.
#' @param stat_by_strata_by_trt List.
#' @param stat_by_strata_across_trt List.
#' @param stat_across_strata_across_trt List.
#' @param crit_endpoint List.
#' @param crit_by_strata_by_trt List.
#' @param crit_by_strata_across_trt List.
#' @param only_strata_with_events Boolean.
#' @param env Environment.
#'
#' @return A data.table containing the endpoint specification.
#' @export
#'
mk_endpoint_str <- function(study_metadata = NULL,
pop_var = NULL,
pop_value = NULL,
custom_pop_filter=NA_character_,
custom_pop_filter = NA_character_,
treatment_var = NULL,
treatment_refval = NULL,
period_var = NA_character_,
Expand Down
41 changes: 0 additions & 41 deletions R/mk_filtered_endpoint_dt.R
Original file line number Diff line number Diff line change
@@ -1,44 +1,3 @@
#' Filter adam data
#'
#' @param ep_unnest data.table. An unnested endpoint definition table
#' @param adam_db data.table. A table containing the adam datasets associated
#' with each data_prepare
#'
#' @return a data.table with the filtered adam data.
#' @export
#'
join_adam <-
function(ep,
ep_fn_map,
adam_db,
filter_pop = TRUE,
filter_period = TRUE,
filter_trt = TRUE,
filter_user_defined = TRUE) {
checkmate::assert_data_table(ep)
checkmate::assert_data_table(adam_db)


ep_adam <-
merge(ep, ep_fn_map[fn_type == "data_prepare"], by = "endpoint_spec_id")

ep_adam <-
merge(ep_adam,
adam_db[, .(fn_hash, dat)],
by = "fn_hash",
all.x = TRUE,
all.y = FALSE)
# We no longer need to track the data generating (aka ADaM) functions.
# Tracking it, in fact, might increase risk of triggering a unneccessary
# re-run in targets if the fn hash changes without any data change
keep_cols <-
setdiff(names(ep_adam),
c("fn_type", "fn", "fn_name", "fn_hash", "fn_callable"))
ep_adam[, .SD, .SDcols = keep_cols]


}

#' Filter applying to a data.table
#'
#' @param adam_dt data.table::data.table
Expand Down
22 changes: 12 additions & 10 deletions R/prepare_for_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' associated data, typically the output from `apply_criterion_by_strata`.
#' @param fn_map A `data.table` mapping endpoint definitions to statistical
#' functions.
#' @param analysis_data_container data.table containing the analysis data.
#' @param type A character string specifying the type of statistics for which
#' the data is being prepared. Valid types are "stat_by_strata_by_trt",
#' "stat_by_strata_across_trt", and "stat_across_strata_across_trt".
Expand Down Expand Up @@ -60,7 +61,7 @@ prepare_for_stats <- function(ep,
nrow(fn_map[fn_type == type]) == 0 |
(type == "stat_across_strata_across_trt" & !any(ep_accepted[[grouping_cols[[1]]]] != "TOTAL_"))
){
return(data.table::data.table(SKIP_=TRUE))
return(data.table::data.table(SKIP_ = TRUE))
}

if (type %in% c("stat_by_strata_by_trt", "stat_by_strata_across_trt")) {
Expand Down Expand Up @@ -155,8 +156,8 @@ list_group_and_levels <- function(
data,
grouping_col
){
l = list(data[, unique(get(grouping_col))])
names(l) = grouping_col
l <- list(data[, unique(get(grouping_col))])
names(l) <- grouping_col
return (l)
}

Expand All @@ -169,6 +170,7 @@ list_group_and_levels <- function(
#' @param ep A `data.table` containing endpoint data to be expanded.
#' @param grouping_cols A character vector specifying the columns used for
#' grouping in the expansion.
#' @param analysis_data_container data.table containing the analysis data.
#' @param data_col The name of the column in `ep` that contains the ADaM
#' dataset.
#' @param id_col The name of the column in `ep` that contains the unique
Expand All @@ -187,7 +189,7 @@ expand_ep_for_stats <- function(
col_prefix
){

name_expand_col = paste(col_prefix, "expand_spec", sep="_")
name_expand_col <- paste(col_prefix, "expand_spec", sep="_")

ep[,"_i_" := .I]
setkey(ep, key_analysis_data)
Expand All @@ -214,7 +216,7 @@ expand_ep_for_stats <- function(

ep_exp[,"_i_":= .I]
ep_exp_with_data <- ep_exp[analysis_data_container, nomatch = NULL]
filter_col_name = paste(col_prefix, "filter", sep="_")
filter_col_name <- paste(col_prefix, "filter", sep="_")
ep_exp_with_data[, cell_index := llist(create_flag(get(data_col)[[1]],
singletons = c(get(filter_col_name)[[1]]))),
by = "_i_"]
Expand Down Expand Up @@ -247,17 +249,17 @@ define_expansion_cell_from_data <- function(
col_prefix
){
if (is.character(grouping_cols)){
grouping_cols = c(grouping_cols)
grouping_cols <- c(grouping_cols)
}
stopifnot(all(grouping_cols %in% names(row)))

# Get the actual grouping variables
grouping_col_values = row[, .SD, .SDcols=grouping_cols]
grouping_var_list = vector(mode="list", length(grouping_col_values))
names(grouping_var_list) = grouping_col_values
grouping_col_values <- row[, .SD, .SDcols=grouping_cols]
grouping_var_list <- vector(mode="list", length(grouping_col_values))
names(grouping_var_list) <- grouping_col_values

if(row[["only_strata_with_events"]]){
dat <- row[,get(data_col)][[1]][row[["event_index"]]]
dat <- row[,get(data_col)][[1]][row[["event_index"]]]
}else{
dat <- row[,get(data_col)][[1]]
}
Expand Down
Loading

0 comments on commit cc49b01

Please sign in to comment.