Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing bug that appears when no grouping is present in the endpoint specifications #20

Merged
merged 8 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ export(format_stats_results)
export(group_ep_for_targets)
export(index_expanded_ep_groups)
export(index_non_null_group_level)
export(join_adam)
export(llist)
export(load_debug_session)
export(mk_endpoint_str)
Expand Down
2 changes: 2 additions & 0 deletions R/add_event_index.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ create_flag <- function(dat, var_value_pairs=NULL, singletons=NULL) {
#' of the columns `pop_var`, `pop_value`, `period_var`, `period_value`,
#' `endpoint_filter`, `endpoint_group_filter`, and `custom_pop_filter`, which
#' are used to define the conditions for event indexing.
#'
#' @param analysis_data_container A data.table containing the analysis data.
#'
#' @return A `data.table` similar to the input but with an additional
#' `event_index` column, which contains the indices of events as determined by
Expand Down
2 changes: 1 addition & 1 deletion R/add_id.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Add ID to user-defined endpoint groups
#'
#' @param ep
#' @param ep A `data.table` containing endpoint definitions.
#'
#' @return data.table
#' @export
Expand Down
2 changes: 2 additions & 0 deletions R/apply_criterion.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#'
#' @param ep A `data.table` containing expanded endpoint definitions and
#' associated data, typically the output from `add_event_index`.
#' @param analysis_data_container data.table containing the analysis data.
#' @param fn_map A `data.table` mapping endpoint definitions to criterion
#' functions.
#'
Expand Down Expand Up @@ -60,6 +61,7 @@ apply_criterion_endpoint <- function(ep, analysis_data_container, fn_map) {
#'
#' @param ep A `data.table` containing endpoint data with applied endpoint
#' criteria, typically the output from `apply_criterion_endpoint`.
#' @param analysis_data_container data.table containing the analysis data.
#' @param fn_map A `data.table` mapping endpoint definitions to by-strata
#' criteria functions.
#' @param type The type of criterion to apply, either
Expand Down
2 changes: 1 addition & 1 deletion R/apply_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#'
#' @param ep A `data.table` containing prepared endpoint data for statistical
#' analysis, typically the output from `prepare_for_stats`.
#' @param fn_map A `data_table` mapping endpoint definitions to statistical
#' @param analysis_data_container data.table containing the analysis data.
#' functions.
#' @param type The type of statistical function. Can be one of
#' "stat_by_strata_by_trt", "stat_by_strata_across_trt", or "stat_across_strata_across_trt"
Expand Down
52 changes: 21 additions & 31 deletions R/expand_endpoints.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @param ep A `data.table` containing endpoint definitions, where each row
#' corresponds to a different endpoint and contains relevant attributes such
#' as the endpoint name, type, and criteria.
#' @param analysis_data_container data.table containing the analysis data.
#' @return A `data.table` where each row corresponds to an expanded endpoint
#' definition
#' @export
Expand All @@ -20,16 +21,21 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
ep_with_data[, expand_specification := llist(define_expanded_ep(dat[[1]], group_by[[1]])),
by = 1:nrow(ep_with_data)]
ep_with_data[["dat"]] <- NULL
ep_expanded <-
ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT()

ep_expanded_2 <- add_missing_columns(ep_expanded)
ep_expanded_2[, endpoint_id := add_ep_id(.SD, .BY), by =
endpoint_spec_id]
# Expand by groups. If no grouping is present, then add empty group related columns
if(any(!is.na(ep_with_data$expand_specification))){
ep_exp <- ep_with_data %>% tidyr::unnest(col = expand_specification) %>% setDT()
}else{
ep_exp <- ep_with_data[, .SD, .SDcols = setdiff(names(ep_with_data), "expand_specification")]
ep_exp[, endpoint_group_filter := NA]
ep_exp[, endpoint_group_metadata := list()]
}

ep_exp[, endpoint_id := add_ep_id(.SD, .BY), by = endpoint_spec_id]

# Complete endpoint labels by replacing keywords with values
nm_set <- names(ep_expanded_2)
ep_expanded_2[,endpoint_label_evaluated := apply(ep_expanded_2, 1, function(x){
nm_set <- names(ep_exp)
ep_exp[,endpoint_label_evaluated := apply(ep_exp, 1, function(x){

xlab <- x[["endpoint_label"]]

Expand Down Expand Up @@ -61,12 +67,12 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
}
return(xlab)
})]
ep_expanded_2[["endpoint_label"]] <- NULL
setnames(ep_expanded_2, "endpoint_label_evaluated", "endpoint_label")
ep_exp[["endpoint_label"]] <- NULL
setnames(ep_exp, "endpoint_label_evaluated", "endpoint_label")

keep <-
setdiff(
names(ep_expanded_2),
names(ep_exp),
c(
"data_prepare",
"stat_by_strata_by_trt",
Expand All @@ -76,11 +82,12 @@ expand_over_endpoints <- function(ep, analysis_data_container) {
"crit_by_strata_by_trt",
"crit_by_strata_across_trt",
"fn_type",
"fn_hash"
"fn_hash",
"expand_specification"
)
)

out <- ep_expanded_2[, .SD, .SDcols=keep]
out <- ep_exp[, .SD, .SDcols=keep]
setkey(out, key_analysis_data)
out[]
}
Expand All @@ -98,6 +105,8 @@ out[]
#' @param group_by A list specifying the grouping for endpoints, where
#' each element corresponds to a variable used for grouping endpoints and
#' contains the levels for that grouping variable.
#' @param forced_group_levels data.table (optional). Table with group levels
#' that must be included in the expansion, regardless of `group_by`.
#' @param col_prefix A prefix used to create the names of the metadata and
#' filter columns in the output `data.table`. Defaults to "endpoint_group".
#'
Expand Down Expand Up @@ -223,25 +232,6 @@ add_ep_id <- function(x, grp) {
))]
}


add_missing_columns <- function(x){
if(length(intersect(c("endpoint_group_filter", "empty", "endpoint_group_metadata"), names(x)))==2){
return(x)
}
x1 <- copy(x)
if(length(intersect(c("endpoint_group_filter"), names(x)))==0){
x1[, endpoint_group_filter:=NA]
}
if(length(intersect(c("empty"), names(x)))==0){
x1[, empty:=NA]
}
if(length(intersect(c("endpoint_group_metadata"), names(x)))==0){
x1[, endpoint_group_metadata:=list()]
}

x1
}

#' Add forced group levels
#'
#' @description Expand the set of unique group levels of one grouping variables in a table containing all combinations of one or more grouping variables.
Expand Down
5 changes: 4 additions & 1 deletion R/mk_endpoint_str.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' @param study_metadata List. Metadata describing the clinical study.
#' @param pop_var Character.
#' @param pop_value Character.
#' @param custom_pop_filter Character.
#' @param treatment_var Character.
#' @param treatment_refval Character.
#' @param period_var Character.
Expand All @@ -14,18 +15,20 @@
#' @param data_prepare List.
#' @param stat_by_strata_by_trt List.
#' @param stat_by_strata_across_trt List.
#' @param stat_across_strata_across_trt List.
#' @param crit_endpoint List.
#' @param crit_by_strata_by_trt List.
#' @param crit_by_strata_across_trt List.
#' @param only_strata_with_events Boolean.
#' @param env Environment.
#'
#' @return A data.table containing the endpoint specification.
#' @export
#'
mk_endpoint_str <- function(study_metadata = NULL,
pop_var = NULL,
pop_value = NULL,
custom_pop_filter=NA_character_,
custom_pop_filter = NA_character_,
treatment_var = NULL,
treatment_refval = NULL,
period_var = NA_character_,
Expand Down
41 changes: 0 additions & 41 deletions R/mk_filtered_endpoint_dt.R
Original file line number Diff line number Diff line change
@@ -1,44 +1,3 @@
#' Filter adam data
#'
#' @param ep_unnest data.table. An unnested endpoint definition table
#' @param adam_db data.table. A table containing the adam datasets associated
#' with each data_prepare
#'
#' @return a data.table with the filtered adam data.
#' @export
#'
join_adam <-
function(ep,
ep_fn_map,
adam_db,
filter_pop = TRUE,
filter_period = TRUE,
filter_trt = TRUE,
filter_user_defined = TRUE) {
checkmate::assert_data_table(ep)
checkmate::assert_data_table(adam_db)


ep_adam <-
merge(ep, ep_fn_map[fn_type == "data_prepare"], by = "endpoint_spec_id")

ep_adam <-
merge(ep_adam,
adam_db[, .(fn_hash, dat)],
by = "fn_hash",
all.x = TRUE,
all.y = FALSE)
# We no longer need to track the data generating (aka ADaM) functions.
# Tracking it, in fact, might increase risk of triggering a unneccessary
# re-run in targets if the fn hash changes without any data change
keep_cols <-
setdiff(names(ep_adam),
c("fn_type", "fn", "fn_name", "fn_hash", "fn_callable"))
ep_adam[, .SD, .SDcols = keep_cols]


}

#' Filter applying to a data.table
#'
#' @param adam_dt data.table::data.table
Expand Down
22 changes: 12 additions & 10 deletions R/prepare_for_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' associated data, typically the output from `apply_criterion_by_strata`.
#' @param fn_map A `data.table` mapping endpoint definitions to statistical
#' functions.
#' @param analysis_data_container data.table containing the analysis data.
#' @param type A character string specifying the type of statistics for which
#' the data is being prepared. Valid types are "stat_by_strata_by_trt",
#' "stat_by_strata_across_trt", and "stat_across_strata_across_trt".
Expand Down Expand Up @@ -60,7 +61,7 @@ prepare_for_stats <- function(ep,
nrow(fn_map[fn_type == type]) == 0 |
(type == "stat_across_strata_across_trt" & !any(ep_accepted[[grouping_cols[[1]]]] != "TOTAL_"))
){
return(data.table::data.table(SKIP_=TRUE))
return(data.table::data.table(SKIP_ = TRUE))
}

if (type %in% c("stat_by_strata_by_trt", "stat_by_strata_across_trt")) {
Expand Down Expand Up @@ -155,8 +156,8 @@ list_group_and_levels <- function(
data,
grouping_col
){
l = list(data[, unique(get(grouping_col))])
names(l) = grouping_col
l <- list(data[, unique(get(grouping_col))])
names(l) <- grouping_col
return (l)
}

Expand All @@ -169,6 +170,7 @@ list_group_and_levels <- function(
#' @param ep A `data.table` containing endpoint data to be expanded.
#' @param grouping_cols A character vector specifying the columns used for
#' grouping in the expansion.
#' @param analysis_data_container data.table containing the analysis data.
#' @param data_col The name of the column in `ep` that contains the ADaM
#' dataset.
#' @param id_col The name of the column in `ep` that contains the unique
Expand All @@ -187,7 +189,7 @@ expand_ep_for_stats <- function(
col_prefix
){

name_expand_col = paste(col_prefix, "expand_spec", sep="_")
name_expand_col <- paste(col_prefix, "expand_spec", sep="_")

ep[,"_i_" := .I]
setkey(ep, key_analysis_data)
Expand All @@ -214,7 +216,7 @@ expand_ep_for_stats <- function(

ep_exp[,"_i_":= .I]
ep_exp_with_data <- ep_exp[analysis_data_container, nomatch = NULL]
filter_col_name = paste(col_prefix, "filter", sep="_")
filter_col_name <- paste(col_prefix, "filter", sep="_")
ep_exp_with_data[, cell_index := llist(create_flag(get(data_col)[[1]],
singletons = c(get(filter_col_name)[[1]]))),
by = "_i_"]
Expand Down Expand Up @@ -247,17 +249,17 @@ define_expansion_cell_from_data <- function(
col_prefix
){
if (is.character(grouping_cols)){
grouping_cols = c(grouping_cols)
grouping_cols <- c(grouping_cols)
}
stopifnot(all(grouping_cols %in% names(row)))

# Get the actual grouping variables
grouping_col_values = row[, .SD, .SDcols=grouping_cols]
grouping_var_list = vector(mode="list", length(grouping_col_values))
names(grouping_var_list) = grouping_col_values
grouping_col_values <- row[, .SD, .SDcols=grouping_cols]
grouping_var_list <- vector(mode="list", length(grouping_col_values))
names(grouping_var_list) <- grouping_col_values

if(row[["only_strata_with_events"]]){
dat <- row[,get(data_col)][[1]][row[["event_index"]]]
dat <- row[,get(data_col)][[1]][row[["event_index"]]]
}else{
dat <- row[,get(data_col)][[1]]
}
Expand Down
Loading
Loading