From 5f657d22612e8c7dd96eafc93adce75e854bfb12 Mon Sep 17 00:00:00 2001
From: Matthew Phelps <mewp@novonordisk.com>
Date: Tue, 2 Apr 2024 10:49:15 +0200
Subject: [PATCH 1/5] Feature/mod_fn (#14)

* Set up building-block helper functions that will be exported

* Rename building block functions

* Fix unit-test
---
 NAMESPACE                                     |   9 +-
 R/BreslowDayFunction.R                        |   3 +-
 R/across_strata_across_trt.R                  |   4 +-
 R/barnard_test.R                              |   4 +-
 R/building_blocks.R                           |  87 +++++
 R/by_strata_across_trt.R                      |  14 +-
 R/by_strata_by_trt.R                          | 225 +++++++-----
 R/relative_risk.R                             |   2 +-
 R/two_by_two_x.R                              |  10 +-
 R/validate_breslow_day.R                      |   2 +-
 _pkgdown.yml                                  |  17 +-
 man/barnard_test_.Rd                          |  24 ++
 man/breslowdaytest_.Rd                        |  23 ++
 man/count_set.Rd                              |  35 ++
 man/{mk_two_by_two.Rd => make_two_by_two_.Rd} |   6 +-
 ...y_two_by_k.Rd => make_two_by_two_by_k_.Rd} |   6 +-
 man/mean_value.Rd                             |   4 +-
 man/n_event_.Rd                               |  21 ++
 man/n_sub_.Rd                                 |  25 ++
 man/n_subj_event_.Rd                          |  24 ++
 man/p_subj_event_.Rd                          |  29 ++
 tests/testthat/helper-02-skip_on_devops.R     |   6 -
 tests/testthat/helper-setup.R                 |  39 +++
 .../testthat/test-across_strata_across_trt.R  |   2 +-
 tests/testthat/test-by_strata_across_trt.R    | 122 +++----
 tests/testthat/test-by_strata_by_trt.R        | 323 ++++++++++--------
 tests/testthat/test-core_stat_functions.R     |  14 +-
 tests/testthat/test-two_by_twos.R             |   4 +-
 vignettes/add_functions.Rmd                   | 210 ++++++++++++
 vignettes/function_types.Rmd                  | 173 ----------
 30 files changed, 952 insertions(+), 515 deletions(-)
 create mode 100644 R/building_blocks.R
 create mode 100644 man/barnard_test_.Rd
 create mode 100644 man/breslowdaytest_.Rd
 create mode 100644 man/count_set.Rd
 rename man/{mk_two_by_two.Rd => make_two_by_two_.Rd} (95%)
 rename man/{make_two_by_two_by_k.Rd => make_two_by_two_by_k_.Rd} (93%)
 create mode 100644 man/n_event_.Rd
 create mode 100644 man/n_sub_.Rd
 create mode 100644 man/n_subj_event_.Rd
 create mode 100644 man/p_subj_event_.Rd
 delete mode 100644 tests/testthat/helper-02-skip_on_devops.R
 create mode 100644 tests/testthat/helper-setup.R
 create mode 100644 vignettes/add_functions.Rmd

diff --git a/NAMESPACE b/NAMESPACE
index 8f35e32..c65874d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,18 +3,23 @@
 export(OR)
 export(RD)
 export(RR)
+export(count_set)
 export(demographics_continuous)
 export(demographics_counts)
 export(hedges_g)
-export(make_two_by_two_by_k)
+export(make_two_by_two_)
+export(make_two_by_two_by_k_)
 export(mean_value)
-export(mk_two_by_two)
 export(n_event)
+export(n_event_)
 export(n_event_100y)
+export(n_sub_)
 export(n_subj)
 export(n_subj_event)
+export(n_subj_event_)
 export(obs_time_by_trt)
 export(p_subj_event)
+export(p_subj_event_)
 export(p_subj_event_by_trt)
 export(p_val)
 export(p_val_interaction)
diff --git a/R/BreslowDayFunction.R b/R/BreslowDayFunction.R
index 0782840..63e7ab0 100644
--- a/R/BreslowDayFunction.R
+++ b/R/BreslowDayFunction.R
@@ -7,8 +7,7 @@
 #' @return A vector with three values statistic - Breslow and Day test
 #'   statistic pval - p value evtl. based on the Tarone test statistic using a
 #'    \eqn{\chi^2(K-1)} distribution
-#' @noRd
-breslowdaytest <- function(x, odds_ratio = NA, correct = FALSE) {
+breslowdaytest_ <- function(x, odds_ratio = NA, correct = FALSE) {
 
   # Function to perform the Breslow and Day (1980) test including the
   # corrected test by Tarone Uses the equations in Lachin (2000),
diff --git a/R/across_strata_across_trt.R b/R/across_strata_across_trt.R
index 88e8fc9..3ba2743 100644
--- a/R/across_strata_across_trt.R
+++ b/R/across_strata_across_trt.R
@@ -25,7 +25,7 @@ p_val_interaction <- function(dat,
                               ...) {
 
   two_by_two_by_k <-
-    make_two_by_two_by_k(
+    make_two_by_two_by_k_(
       dat = dat,
       event_index = event_index,
       strata_var = strata_var,
@@ -36,7 +36,7 @@ p_val_interaction <- function(dat,
 
   valid <- validate_breslow_day(two_by_two_by_k)
   if (valid) {
-    stat <- breslowdaytest(
+    stat <- breslowdaytest_(
       two_by_two_by_k,
       odds_ratio = odds_ratio,
       correct = correct
diff --git a/R/barnard_test.R b/R/barnard_test.R
index 614b818..4dc2c34 100644
--- a/R/barnard_test.R
+++ b/R/barnard_test.R
@@ -9,8 +9,8 @@
 #'
 #' @return 2-sided p-value for the Barnards Unconditional Exact test
 #' @importFrom Barnard barnard.test
-#' @noRd
-barnard_test <-
+
+barnard_test_ <-
   function(two_by_two,
            verbose = FALSE,
            safe_mode = TRUE) {
diff --git a/R/building_blocks.R b/R/building_blocks.R
new file mode 100644
index 0000000..4e79059
--- /dev/null
+++ b/R/building_blocks.R
@@ -0,0 +1,87 @@
+#' Building-block: Number of subjects
+#'
+#' @param cell_index A vector of integers referencing the rows of `dat` (as
+#'   specified by the `INDEX_` column in `dat`) that match the population to be
+#'   analyzed. See the "Endpoint Events" vignette in {ramnog} for more
+#'   information.
+#' @param subjectid_var character. Name of the subject identifier variable in
+#'   the data (default is "USUBJID").
+#' @param dat data.table. The analysis data set.
+#'
+#' @return An integer
+#' @export
+#' 
+n_sub_ <-
+  function(dat,
+           cell_index,
+           subjectid_var) {
+    dat[list(cell_index)] |>
+      data.table::uniqueN(by = c(subjectid_var))
+  }
+
+#' Building-block: Number of events (multiple events counted multiple times)
+#'
+#' @param dat data.table. The analysis data set.
+#' @param intersect_index A vector of intergers referencing the rows of `dat`
+#'   that match both (1) the population to be analyzed and (2) the
+#'   defenition of an event
+#'
+#' @return an integer value
+#' @export
+#'
+n_event_ <- function(dat, intersect_index) {
+  dat[list(intersect_index)] |>
+    NROW()
+}
+
+
+#' Building-block: Number of subjects with at least one event
+#'
+#' @param dat data.table. The analysis data set.
+#' @param intersect_index A vector of intergers referencing the rows of `dat`
+#'   that match both (1) the population to be analyzed and (2) the defenition of
+#'   an event
+#' @param subjectid_var character. Name of the subject identifier variable in
+#'   the data (default is "USUBJID").
+#'
+#' @return an interger value
+#' @export
+#'
+n_subj_event_ <- function(dat, intersect_index, subjectid_var) {
+  dat[list(intersect_index)] |>
+    data.table::uniqueN(by = subjectid_var)
+}
+
+#' Building-block: Proportion of subjects having at least one event
+#'
+#' @param dat data.table. The analysis data set.
+#' @param cell_index A vector of integers referencing the rows of `dat` (as
+#'   specified by the `INDEX_` column in `dat`) that match the population to be
+#'   analyzed. See the "Endpoint Events" vignette in {ramnog} for more
+#'   information.
+#' @param intersect_index A vector of intergers referencing the rows of `dat`
+#'   that match both (1) the population to be analyzed and (2) the defenition of
+#'   an event
+#' @param subjectid_var character. Name of the subject identifier variable in
+#'   the data (default is "USUBJID").
+#'
+#' @return an integer value
+#' @export 
+p_subj_event_ <-
+  function(dat,
+           cell_index,
+           intersect_index,
+           subjectid_var) {
+    n_sub <- n_sub_(dat, cell_index = cell_index,subjectid_var = subjectid_var)
+    
+    # If there are no subjects, no need for further calculations
+    if (n_sub == 0) {
+      return(NaN)
+    }
+    
+    # Filter analysis data to cell specific content
+    n_subj_event <-
+      n_subj_event_(dat, intersect_index = intersect_index, subjectid_var = subjectid_var)
+    
+    n_subj_event / n_sub * 100
+  }
diff --git a/R/by_strata_across_trt.R b/R/by_strata_across_trt.R
index 596476f..9a036a6 100644
--- a/R/by_strata_across_trt.R
+++ b/R/by_strata_across_trt.R
@@ -27,7 +27,7 @@ RR <- function(dat,
                ...) {
   # Test a 2x2 contingency table
   two_by_two <-
-    mk_two_by_two(
+    make_two_by_two_(
       dat = dat,
       event_index = event_index,
       cell_index = cell_index,
@@ -35,7 +35,7 @@ RR <- function(dat,
       treatment_refval = treatment_refval,
       subjectid_var = subjectid_var
     )
-  out <- relative_risk(two_by_two)
+  out <- relative_risk_(two_by_two)
   desc <-
     vapply(names(out), function(x) {
       switch(x,
@@ -43,7 +43,7 @@ RR <- function(dat,
         "RRLL" = "Relative Risk 95%-CI lower limit",
         "RRUL" = "Relative Risk 95%-CI upper limit",
         "SE" = "Relative Risk standard error",
-        error_name_mismatch(x, "relative_risk()")
+        error_name_mismatch(x, "relative_risk_()")
       )
     }, FUN.VALUE = character(1L))
 
@@ -84,7 +84,7 @@ OR <- function(dat,
                ...) {
   # Test a 2x2 contingency table
   two_by_two <-
-    mk_two_by_two(
+    make_two_by_two_(
       dat = dat,
       event_index = event_index,
       cell_index = cell_index,
@@ -144,7 +144,7 @@ RD <- function(dat,
                ...) {
   # Test a 2x2 contingency table
   two_by_two <-
-    mk_two_by_two(
+    make_two_by_two_(
       dat = dat,
       event_index = event_index,
       cell_index = cell_index,
@@ -208,7 +208,7 @@ p_val <-
            threshold_upper = 200,
            ...) {
     two_by_two <-
-      mk_two_by_two(
+      make_two_by_two_(
         dat = dat,
         event_index = event_index,
         cell_index = cell_index,
@@ -223,7 +223,7 @@ p_val <-
       message("----- Calculating Barnards test - this may take a minute \n")
     }
     pval <- switch(test_method,
-      "barnard" = barnard_test(two_by_two = two_by_two, safe_mode = safe_mode),
+      "barnard" = barnard_test_(two_by_two = two_by_two, safe_mode = safe_mode),
       "fisher" = stats::fisher.test(two_by_two)$p.val
     )
 
diff --git a/R/by_strata_by_trt.R b/R/by_strata_by_trt.R
index b2201a6..567c2fe 100644
--- a/R/by_strata_by_trt.R
+++ b/R/by_strata_by_trt.R
@@ -17,9 +17,10 @@ n_subj <- function(dat,
                    cell_index,
                    subjectid_var,
                    ...) {
-
-  stat <- dat[list(cell_index)] |>
-    data.table::uniqueN(by = c(subjectid_var))
+  stat <-
+    n_sub_(dat = dat,
+           cell_index = cell_index,
+           subjectid_var = subjectid_var)
   
   out <- data.table(
     description = "Number of subjects",
@@ -27,7 +28,6 @@ n_subj <- function(dat,
     label = "N",
     value = as.double(stat)
   )
-  
   out[]
 }
 
@@ -55,10 +55,8 @@ n_event <-
            cell_index,
            subjectid_var,
            ...) {
-    
     intersect_index <- intersect(event_index, cell_index)
-    stat <- dat[list(intersect_index)] |>
-      NROW()
+    stat <- n_event_(dat = dat, intersect_index = intersect_index)
     
     return(
       data.table::data.table(
@@ -95,18 +93,18 @@ n_subj_event <-
            cell_index,
            subjectid_var,
            ...) {
-    
     intersect_index <- intersect(event_index, cell_index)
-    stat <- dat[list(intersect_index)] |>
-      data.table::uniqueN(by = subjectid_var) |>
-      as.double()
-
+    stat <-
+      n_subj_event_(dat = dat,
+                    intersect_index = intersect_index,
+                    subjectid_var = subjectid_var)
+    
     return(
       data.table::data.table(
         description = "Number of subjects with events",
         qualifiers = NA_character_,
         label = "n",
-        value = stat
+        value = as.double(stat)
       )
     )
   }
@@ -126,7 +124,7 @@ n_subj_event <-
 #'   for more information.
 #' @param subjectid_var character. Name of the subject identifier variable in the data (default is "USUBJID").
 #' @param ... Optional parameters.
-#'  
+#'
 #' @return A data.table containing the percentage of subjects with events for the given combination of treatment and stratum
 #' @export
 p_subj_event <-
@@ -135,36 +133,90 @@ p_subj_event <-
            cell_index,
            subjectid_var,
            ...) {
+    intersect_index <- intersect(event_index, cell_index)
+    stat <-
+      p_subj_event_(
+        dat = dat,
+        cell_index = cell_index,
+        intersect_index = intersect_index,
+        subjectid_var = subjectid_var
+      )
     
-    n_sub <- dat[list(cell_index)] |>
-      data.table::uniqueN(by = c(subjectid_var))
-
-    # If there are no subjects, no need for further calculations
-    if (n_sub == 0) {
-      return(
-        data.table::data.table(
-          description = "Proportion of subjects with events",
-          qualifiers = NA_character_,
-          label = "(%)",
-          value = NaN
-        )
+    return(
+      data.table::data.table(
+        description = "Proportion of subjects with events",
+        qualifiers = NA_character_,
+        label = "(%)",
+        value = stat
       )
-    }
+    )
+  }
+
 
-    # Filter analysis data to cell specific content
-    intersect_index <- intersect(cell_index, event_index)
-    n_subev <- dat[list(intersect_index)] |>
-      data.table::uniqueN(by = c(subjectid_var))
 
-    return(data.table::data.table(
-      description = "Proportion of subjects with events",
+#' @title Produce counts of Number of subjects, number of events, number of
+#'   subjects with events, and proportion of subjects with events
+#'
+#' @description A short cut - instead of calling the individual function
+#'   (`n_sub`, `n_event`, `n_subj_event`, `p_subj_event`), one call to this
+#'   function will produce all the described functions. This can be useful to
+#'   save compute time as inside the chef pipeline there will be fewer
+#'   iterations.
+#'
+#'
+#' @param dat data.table. The analysis data set.
+#' @param event_index vector of integers that index the rows in `dat` that match
+#'   the definition of an 'event'. Matching is done via the `INDEX_` column in
+#'   `dat`.
+#' @param cell_index A vector of integers referencing the rows of `dat` (as
+#'   specified by the `INDEX_` column in `dat`) that match the population to be
+#'   analyzed. See the "Endpoint Events" vignette in {ramnog}
+#'   for more information.
+#' @param subjectid_var character. Name of the subject identifier variable in the data (default is "USUBJID").
+#' @param ... Optional parameters.
+#' @return a data.table containing all statistical outputs
+#' @export
+#'
+#' @examples
+count_set <- function(dat,
+                   event_index,
+                   cell_index,
+                   subjectid_var,
+                   ...) {
+  intersect_index <- intersect(event_index, cell_index)
+  n_subjects <-
+    n_sub_(dat = dat,
+           cell_index = cell_index,
+           subjectid_var = subjectid_var)
+  n_events <- n_event_(dat = dat, intersect_index = intersect_index)
+  n_subjects_with_event <-
+    n_subj_event_(dat = dat,
+                  intersect_index = intersect_index,
+                  subjectid_var = subjectid_var)
+  proportion_subjects_with_event <-
+    p_subj_event_(
+      dat = dat,
+      cell_index = cell_index,
+      intersect_index = intersect_index,
+      subjectid_var = subjectid_var
+    )
+  
+  description_vec <- c("Number of subjects", "Number of events", "Number of subjects with events", "Proportion of subjects with events")
+  label_vec <- c("N","E", "n", "(%)")
+  stat_vec <- c(n_subjects, n_events, n_subjects_with_event, proportion_subjects_with_event)
+  
+  return(
+    data.table::data.table(
+      description = description_vec,
       qualifiers = NA_character_,
-      label = "(%)",
-      value = n_subev / n_sub * 100
-    ))
-  }
+      label = label_vec,
+      value = stat_vec
+    )
+  )
+}
 
-#' Calculate summary statistics for demographics on a continuous variable 
+
+#' Calculate summary statistics for demographics on a continuous variable
 #'
 #'@description Calculate a set of summary statistics (mean, median, sd, min, max, n_non_missing, n_missing) on a continuous variable for demographics endpoints.
 #'
@@ -188,7 +240,6 @@ demographics_continuous <- function(dat,
                                     subjectid_var,
                                     var,
                                     ...) {
-
   # Filter analysis data to cell specific content
   intersect_index <- intersect(cell_index, event_index)
   dat_cell <- dat[list(intersect_index)] |>
@@ -197,11 +248,11 @@ demographics_continuous <- function(dat,
   # Return statistics depending on the type of variable (continuous or categorical)
   stat <- dat_cell[,
                    .(
-                     mean = mean(get(var),na.rm=TRUE),
-                     median = median(get(var), na.rm=TRUE),
+                     mean = mean(get(var), na.rm = TRUE),
+                     median = median(get(var), na.rm = TRUE),
                      sd = sd(get(var), na.rm = TRUE),
-                     min = min(get(var),na.rm = TRUE),
-                     max = max(get(var),na.rm = TRUE),
+                     min = min(get(var), na.rm = TRUE),
+                     max = max(get(var), na.rm = TRUE),
                      n_non_missing = sum(!is.na(get(var))),
                      n_missing = sum(is.na(get(var)))
                    )]
@@ -213,7 +264,7 @@ demographics_continuous <- function(dat,
       value = as.double(unlist(stat[1, .SD]))
     )
   )
-
+  
 }
 
 
@@ -240,7 +291,6 @@ demographics_counts <- function(dat,
                                 stratify_by,
                                 strata_var,
                                 ...) {
-  
   # Filter analysis data to cell specific content
   dat_cell <- dat[list(cell_index)] |>
     unique(by = c(subjectid_var))
@@ -277,20 +327,21 @@ total_missing_counts <- function(dat_cell, stratify_by) {
     stat <- dat_cell[, .(n_non_missing = sum(!is.na(get(strata_i))),
                          n_missing = sum(is.na(get(strata_i))))]
   })
-
-  desc <- paste0("Demographics")
+  value <- NULL
+  
   out <- data.table::rbindlist(stat) |>
     data.table::transpose(keep.names = "label") |>
     data.table::setnames(new = c("label", stratify_by_subset)) |>
     data.table::melt.data.table(measure.vars = stratify_by_subset,
                                 variable.name = "qualifiers")
   out[, `:=`(value = as.double(value), description = "Demographics")]
+  out[]
 }
 
 
-#' Calculate percentage of subjects with events
+#' Calculate mean value
 #'
-#' @description Calculate the percentage of subjects with events by treatment and strata.
+#' @description Calculate the mean value of a variable
 #'
 #' @param dat data.table. The analysis data set.
 #' @param event_index vector of integers that index the rows in `dat` that match
@@ -303,7 +354,7 @@ total_missing_counts <- function(dat_cell, stratify_by) {
 #' @param subjectid_var character. Name of the subject identifier variable in the data (default is "USUBJID").
 #' @param var character. Name of the variable in the analysis data that is subject to the statistics.
 #' @param ... Optional parameters.
-#' 
+#'
 #' @return A data.table containing the percentage of subjects with events by treatment.
 #' @export
 #'
@@ -316,10 +367,10 @@ mean_value <- function(dat,
   intersect_index <- intersect(cell_index, event_index)
   dat_cell <- dat[J(intersect_index)] |>
     unique(by = c(subjectid_var))
-
+  
   stat <- dat_cell[[var]] |>
     mean()
-
+  
   return(
     data.table(
       label = "mean",
@@ -346,7 +397,7 @@ mean_value <- function(dat,
 #' @param treatment_var character. Name of the treatment variable in the data.
 #' @param treatment_value character. Value of the treatment variable in the data.
 #' @param ... Optional parameters.
-#' 
+#'
 #' @return A data.table containing the percentage of subjects with events by treatment.
 #' @export
 p_subj_event_by_trt <-
@@ -357,11 +408,9 @@ p_subj_event_by_trt <-
            treatment_var,
            treatment_value,
            ...) {
-
     n_sub <- dat[dat[[treatment_var]] == treatment_value] |>
-      unique(by = c(subjectid_var)) |>
-      NROW()
-
+      uniqueN(by = c(subjectid_var))
+    
     if (n_sub == 0) {
       return(
         data.table(
@@ -375,15 +424,16 @@ p_subj_event_by_trt <-
     
     intersect_index <- intersect(cell_index, event_index)
     n_subev <- dat[list(intersect_index)] |>
-      unique(by = c(subjectid_var)) |>
-      NROW()
-
+      uniqueN(by = c(subjectid_var))
+    
     out <-
-      data.table(description = "Proportion of subjects with events",
-                 qualifiers = NA_character_,
-                 label = "(%)",
-                 value = n_subev / n_sub * 100)
-
+      data.table(
+        description = "Proportion of subjects with events",
+        qualifiers = NA_character_,
+        label = "(%)",
+        value = n_subev / n_sub * 100
+      )
+    
     return(out)
   }
 
@@ -404,11 +454,10 @@ obs_time_by_trt <- function(dat,
                             cell_index,
                             subjectid_var,
                             ...) {
-
   obs_time <- dat[J(cell_index)] |>
     unique(by = c(subjectid_var)) |>
     with(sum(INTRDURY, na.rm = TRUE))
-
+  
   out <-
     data.table(
       description = "Observation time (years)",
@@ -416,7 +465,7 @@ obs_time_by_trt <- function(dat,
       label = "Obs. time",
       value = round(obs_time)
     )
-
+  
   return(out)
 }
 
@@ -435,7 +484,7 @@ obs_time_by_trt <- function(dat,
 #' @param treatment_var character. Name of the treatment variable in the data.
 #' @param treatment_value character. Value of the treatment variable in the data.
 #' @param ... Optional parameters.
-#' 
+#'
 #' @return A data.table containing the number of events per 100 years of exposure.
 #' @export
 n_event_100y <- function(dat,
@@ -444,24 +493,25 @@ n_event_100y <- function(dat,
                          subjectid_var,
                          treatment_var,
                          treatment_value,
-                         ...){
-
+                         ...) {
   # Observation time (years) in treatment arm
   obs_time <- obs_time_by_trt(dat = dat,
                               cell_index = dat[["INDEX_"]][which(dat[[treatment_var]] == treatment_value)],
                               subjectid_var = subjectid_var,)[["value"]]
-
+  
   # Number of events
   intersect_index <- intersect(event_index, cell_index)
   n_event <- dat[list(intersect_index)] |>
     NROW()
-
+  
   out <-
-    data.table(description = "Events per 100 years of exposure",
-               qualifiers = NA_character_,
-               label = "R",
-               value = round(n_event / obs_time * 100))
-
+    data.table(
+      description = "Events per 100 years of exposure",
+      qualifiers = NA_character_,
+      label = "R",
+      value = round(n_event / obs_time * 100)
+    )
+  
   return(out)
 }
 
@@ -488,7 +538,6 @@ mean_value <- function(dat,
                        subjectid_var,
                        var,
                        ...) {
-  
   # Filter analysis data to cell specific content
   intersect_index <- intersect(cell_index, event_index)
   dat_cell <- dat[list(intersect_index)] |>
@@ -497,14 +546,12 @@ mean_value <- function(dat,
   stat <- dat_cell[[var]] |>
     mean()
   
-  return(
-    data.table(
-      label = "mean",
-      description = "Mean value",
-      qualifiers = var,
-      value = stat
-    )
-  )
+  return(data.table(
+    label = "mean",
+    description = "Mean value",
+    qualifiers = var,
+    value = stat
+  ))
 }
 
 #' Standard deviation
@@ -533,10 +580,10 @@ sd_value <- function(dat,
   intersect_index <- intersect(cell_index, event_index)
   dat_cell <- dat[J(intersect_index)] |>
     unique(by = c(subjectid_var))
-
+  
   stat <- dat_cell[[var]] |>
     sd()
-
+  
   return(
     data.table(
       label = "SD",
diff --git a/R/relative_risk.R b/R/relative_risk.R
index 5b9ae13..46b94db 100644
--- a/R/relative_risk.R
+++ b/R/relative_risk.R
@@ -19,7 +19,7 @@
 #'   the lower 95%CI
 #' @noRd
 
-relative_risk <- function(two_by_two) {
+relative_risk_ <- function(two_by_two) {
   #AMNOG rule - if any cells are zero, add 0.5 to each cell
 
   if (prod(two_by_two) == 0) {
diff --git a/R/two_by_two_x.R b/R/two_by_two_x.R
index 353240c..f50f75e 100644
--- a/R/two_by_two_x.R
+++ b/R/two_by_two_x.R
@@ -26,7 +26,7 @@
 #'@return A matrix
 #' @export
 #'
-mk_two_by_two <-
+make_two_by_two_ <-
   function(dat,
            event_index,
            cell_index,
@@ -39,7 +39,7 @@ mk_two_by_two <-
       length()
     if (n_trt_levels != 2) {
       stop(
-        "mk_two_by_two only supports copmarison between two treatment levels. ",
+        "make_two_by_two_ only supports copmarison between two treatment levels. ",
         "This dataset has ",
         n_trt_levels,
         " treatment levels"
@@ -133,7 +133,7 @@ ensure_complete_two_by_two <- function(two_by_two_long, treatment_var) {
 #' @return A two-by-two-by-k array where k represents the number of subgroups
 #'   (strata).
 #' @export
-make_two_by_two_by_k <-
+make_two_by_two_by_k_ <-
   function(dat,
            event_index,
            strata_var,
@@ -141,12 +141,12 @@ make_two_by_two_by_k <-
            treatment_refval,
            subjectid_var) {
     # Cell index is not relevant for `across_strata_across_trt`, but to simplify
-    # the code we make a dummy cell_index to pass to mk_two_by_two
+    # the code we make a dummy cell_index to pass to make_two_by_two_
     cell_index <- dat$INDEX_
 
     x <- lapply(
       split(dat, by = strata_var),
-      mk_two_by_two,
+      make_two_by_two_,
       event_index,
       cell_index,
       treatment_var,
diff --git a/R/validate_breslow_day.R b/R/validate_breslow_day.R
index 9ddbb78..98ec4f5 100644
--- a/R/validate_breslow_day.R
+++ b/R/validate_breslow_day.R
@@ -1,7 +1,7 @@
 #' Validate inputs to breslow-day function
 #'
 #' @param two_by_two_by_k A two-by-two-by-k table, usually produced
-#' by `make_two_by_two_by_k()`.
+#' by `make_two_by_two_by_k_()`.
 #'
 #' @return A Boolean TRUE/FALSE
 #' @noRd
diff --git a/_pkgdown.yml b/_pkgdown.yml
index f7ba2e3..d6e6c4b 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -10,6 +10,7 @@ reference:
   - n_event
   - n_subj_event
   - p_subj_event
+  - count_set
 - title: By strata, across treatment levels
   desc: |
     Function that operate by strata level but across treatment levels
@@ -22,6 +23,19 @@ reference:
   desc: |
     Function that operate across strata levels and across treatment levels
   contents: p_val_interaction
+- title: Building-block functions
+  desc: |
+    These are used as building blocks when making the chef-facing functions. They are exported so that they can also be use in chefCriteria but they cannot be called directly in a chef pipeline
+  contents:
+  - breslowdaytest_
+  - barnard_test_
+  - make_two_by_two_by_k_
+  - make_two_by_two_
+  - n_sub_
+  - n_event_
+  - n_subj_event_
+  - p_subj_event_
+  - use_chefStats
 - title: Misc
   desc: |
     Non-assigned
@@ -29,11 +43,8 @@ reference:
   - demographics_continuous
   - demographics_counts
   - hedges_g
-  - make_two_by_two_by_k
   - mean_value
-  - mk_two_by_two
   - n_event_100y
   - obs_time_by_trt
   - p_subj_event_by_trt
   - sd_value
-  - use_chefStats
diff --git a/man/barnard_test_.Rd b/man/barnard_test_.Rd
new file mode 100644
index 0000000..c4e6f1c
--- /dev/null
+++ b/man/barnard_test_.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/barnard_test.R
+\name{barnard_test_}
+\alias{barnard_test_}
+\title{Barnards Unconditional Exact Test with trial data}
+\usage{
+barnard_test_(two_by_two, verbose = FALSE, safe_mode = TRUE)
+}
+\arguments{
+\item{two_by_two}{A two-by-two table where the first column contains counts
+of events.}
+
+\item{verbose}{Set to TRUE if you want the normal output from running
+barnard.test to print to the console. Default is FALSE}
+
+\item{safe_mode}{If invalid input is provided, should the function stop with
+error (default), or return an NA (safe_mode = FALSE)}
+}
+\value{
+2-sided p-value for the Barnards Unconditional Exact test
+}
+\description{
+Barnards Unconditional Exact Test with trial data
+}
diff --git a/man/breslowdaytest_.Rd b/man/breslowdaytest_.Rd
new file mode 100644
index 0000000..2e281be
--- /dev/null
+++ b/man/breslowdaytest_.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/BreslowDayFunction.R
+\name{breslowdaytest_}
+\alias{breslowdaytest_}
+\title{Breslow-Day test}
+\usage{
+breslowdaytest_(x, odds_ratio = NA, correct = FALSE)
+}
+\arguments{
+\item{x}{a 2x2xK contingency table}
+
+\item{odds_ratio}{Odds Ration (default = NA)}
+
+\item{correct}{if TRUE Tarones correction is returned. Default = FALSE.}
+}
+\value{
+A vector with three values statistic - Breslow and Day test
+statistic pval - p value evtl. based on the Tarone test statistic using a
+\eqn{\chi^2(K-1)} distribution
+}
+\description{
+Breslow-Day test
+}
diff --git a/man/count_set.Rd b/man/count_set.Rd
new file mode 100644
index 0000000..56f32a4
--- /dev/null
+++ b/man/count_set.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/by_strata_by_trt.R
+\name{count_set}
+\alias{count_set}
+\title{Produce counts of Number of subjects, number of events, number of
+subjects with events, and proportion of subjects with events}
+\usage{
+count_set(dat, event_index, cell_index, subjectid_var, ...)
+}
+\arguments{
+\item{dat}{data.table. The analysis data set.}
+
+\item{event_index}{vector of integers that index the rows in \code{dat} that match
+the definition of an 'event'. Matching is done via the \code{INDEX_} column in
+\code{dat}.}
+
+\item{cell_index}{A vector of integers referencing the rows of \code{dat} (as
+specified by the \code{INDEX_} column in \code{dat}) that match the population to be
+analyzed. See the "Endpoint Events" vignette in {ramnog}
+for more information.}
+
+\item{subjectid_var}{character. Name of the subject identifier variable in the data (default is "USUBJID").}
+
+\item{...}{Optional parameters.}
+}
+\value{
+a data.table containing all statistical outputs
+}
+\description{
+A short cut - instead of calling the individual function
+(\code{n_sub}, \code{n_event}, \code{n_subj_event}, \code{p_subj_event}), one call to this
+function will produce all the described functions. This can be useful to
+save compute time as inside the chef pipeline there will be fewer
+iterations.
+}
diff --git a/man/mk_two_by_two.Rd b/man/make_two_by_two_.Rd
similarity index 95%
rename from man/mk_two_by_two.Rd
rename to man/make_two_by_two_.Rd
index 97fa08d..1e5f072 100644
--- a/man/mk_two_by_two.Rd
+++ b/man/make_two_by_two_.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/two_by_two_x.R
-\name{mk_two_by_two}
-\alias{mk_two_by_two}
+\name{make_two_by_two_}
+\alias{make_two_by_two_}
 \title{Make a two-by-two table}
 \usage{
-mk_two_by_two(
+make_two_by_two_(
   dat,
   event_index,
   cell_index,
diff --git a/man/make_two_by_two_by_k.Rd b/man/make_two_by_two_by_k_.Rd
similarity index 93%
rename from man/make_two_by_two_by_k.Rd
rename to man/make_two_by_two_by_k_.Rd
index 00c06e0..8635196 100644
--- a/man/make_two_by_two_by_k.Rd
+++ b/man/make_two_by_two_by_k_.Rd
@@ -1,11 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/two_by_two_x.R
-\name{make_two_by_two_by_k}
-\alias{make_two_by_two_by_k}
+\name{make_two_by_two_by_k_}
+\alias{make_two_by_two_by_k_}
 \title{Make 2x2xk contingency tables from summarized adam data. This function is NOT
 generalized}
 \usage{
-make_two_by_two_by_k(
+make_two_by_two_by_k_(
   dat,
   event_index,
   strata_var,
diff --git a/man/mean_value.Rd b/man/mean_value.Rd
index 107190c..7b031c0 100644
--- a/man/mean_value.Rd
+++ b/man/mean_value.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/by_strata_by_trt.R
 \name{mean_value}
 \alias{mean_value}
-\title{Calculate percentage of subjects with events}
+\title{Calculate mean value}
 \usage{
 mean_value(dat, event_index, cell_index, subjectid_var, var, ...)
 
@@ -32,5 +32,5 @@ A data.table containing the percentage of subjects with events by treatment.
 A data.table containing the mean value.
 }
 \description{
-Calculate the percentage of subjects with events by treatment and strata.
+Calculate the mean value of a variable
 }
diff --git a/man/n_event_.Rd b/man/n_event_.Rd
new file mode 100644
index 0000000..0b32a71
--- /dev/null
+++ b/man/n_event_.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/building_blocks.R
+\name{n_event_}
+\alias{n_event_}
+\title{Building-block: Number of events (multiple events counted multiple times)}
+\usage{
+n_event_(dat, intersect_index)
+}
+\arguments{
+\item{dat}{data.table. The analysis data set.}
+
+\item{intersect_index}{A vector of intergers referencing the rows of \code{dat}
+that match both (1) the population to be analyzed and (2) the
+defenition of an event}
+}
+\value{
+an integer value
+}
+\description{
+Building-block: Number of events (multiple events counted multiple times)
+}
diff --git a/man/n_sub_.Rd b/man/n_sub_.Rd
new file mode 100644
index 0000000..0a6a257
--- /dev/null
+++ b/man/n_sub_.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/building_blocks.R
+\name{n_sub_}
+\alias{n_sub_}
+\title{Building-block: Number of subjects}
+\usage{
+n_sub_(dat, cell_index, subjectid_var)
+}
+\arguments{
+\item{dat}{data.table. The analysis data set.}
+
+\item{cell_index}{A vector of integers referencing the rows of \code{dat} (as
+specified by the \code{INDEX_} column in \code{dat}) that match the population to be
+analyzed. See the "Endpoint Events" vignette in {ramnog} for more
+information.}
+
+\item{subjectid_var}{character. Name of the subject identifier variable in
+the data (default is "USUBJID").}
+}
+\value{
+An integer
+}
+\description{
+Building-block: Number of subjects
+}
diff --git a/man/n_subj_event_.Rd b/man/n_subj_event_.Rd
new file mode 100644
index 0000000..1a69d87
--- /dev/null
+++ b/man/n_subj_event_.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/building_blocks.R
+\name{n_subj_event_}
+\alias{n_subj_event_}
+\title{Building-block: Number of subjects with at least one event}
+\usage{
+n_subj_event_(dat, intersect_index, subjectid_var)
+}
+\arguments{
+\item{dat}{data.table. The analysis data set.}
+
+\item{intersect_index}{A vector of intergers referencing the rows of \code{dat}
+that match both (1) the population to be analyzed and (2) the defenition of
+an event}
+
+\item{subjectid_var}{character. Name of the subject identifier variable in
+the data (default is "USUBJID").}
+}
+\value{
+an interger value
+}
+\description{
+Building-block: Number of subjects with at least one event
+}
diff --git a/man/p_subj_event_.Rd b/man/p_subj_event_.Rd
new file mode 100644
index 0000000..7153b2a
--- /dev/null
+++ b/man/p_subj_event_.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/building_blocks.R
+\name{p_subj_event_}
+\alias{p_subj_event_}
+\title{Building-block: Proportion of subjects having at least one event}
+\usage{
+p_subj_event_(dat, cell_index, intersect_index, subjectid_var)
+}
+\arguments{
+\item{dat}{data.table. The analysis data set.}
+
+\item{cell_index}{A vector of integers referencing the rows of \code{dat} (as
+specified by the \code{INDEX_} column in \code{dat}) that match the population to be
+analyzed. See the "Endpoint Events" vignette in {ramnog} for more
+information.}
+
+\item{intersect_index}{A vector of intergers referencing the rows of \code{dat}
+that match both (1) the population to be analyzed and (2) the defenition of
+an event}
+
+\item{subjectid_var}{character. Name of the subject identifier variable in
+the data (default is "USUBJID").}
+}
+\value{
+an integer value
+}
+\description{
+Building-block: Proportion of subjects having at least one event
+}
diff --git a/tests/testthat/helper-02-skip_on_devops.R b/tests/testthat/helper-02-skip_on_devops.R
deleted file mode 100644
index 6cdce80..0000000
--- a/tests/testthat/helper-02-skip_on_devops.R
+++ /dev/null
@@ -1,6 +0,0 @@
-skip_on_devops <- function() {
-  if (!identical(Sys.getenv("ON_DEVOPS"), "TRUE")) {
-    return(invisible(TRUE))
-  }
-  testthat::skip("On DevOps")
-}
diff --git a/tests/testthat/helper-setup.R b/tests/testthat/helper-setup.R
new file mode 100644
index 0000000..ed556fa
--- /dev/null
+++ b/tests/testthat/helper-setup.R
@@ -0,0 +1,39 @@
+setup_basic_counts <- function() {
+  SAFFL <- SEX <- TRT01A <- AEDECOD <- NULL
+  input <- mk_adae(study_metadata = NULL)
+  input[, INDEX_ := .I] |> setkey(INDEX_)
+  cell_index_f <-
+    input[SAFFL == "Y" &
+            TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
+  cell_index_total <-
+    input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
+  event_index <-
+    input[AEDECOD == "ERYTHEMA"][["INDEX_"]]
+  return(list(
+    input = input,
+    cell_index_f = cell_index_f,
+    cell_index_total = cell_index_total,
+    event_index = event_index
+  ))
+}
+
+
+setup_by_strata_across_trt <- function(){
+  SAFFL <- SEX <- TRT01A <- NULL
+  input <- mk_adae(study_metadata = NULL)
+  input[, INDEX_ := .I] |> data.table::setkey(INDEX_)
+  cell_index_f <-
+    input[SAFFL == "Y" &
+            (TRT01A == "Placebo" | TRT01A == "Xanomeline High Dose") & SEX == "F"][["INDEX_"]]
+  cell_index_total <-
+    input[SAFFL == "Y" & (TRT01A == "Placebo" | TRT01A == "Xanomeline High Dose")][["INDEX_"]]
+  event_index <-
+    input[AEDECOD == "ERYTHEMA"][["INDEX_"]]
+  
+  return(list(
+    input = input,
+    cell_index_f = cell_index_f,
+    cell_index_total = cell_index_total,
+    event_index = event_index
+  ))
+}
diff --git a/tests/testthat/test-across_strata_across_trt.R b/tests/testthat/test-across_strata_across_trt.R
index a725384..a4e6aaa 100644
--- a/tests/testthat/test-across_strata_across_trt.R
+++ b/tests/testthat/test-across_strata_across_trt.R
@@ -104,7 +104,7 @@ test_that("p-val interaction work (breslow-day)", {
 
   # Combine male and female 2x2 tables
   arr <- simplify2array(list(two_by_two_m, two_by_two_f))
-  expected <- breslowdaytest(arr,
+  expected <- breslowdaytest_(arr,
     odds_ratio = NA,
     correct = FALSE
   )
diff --git a/tests/testthat/test-by_strata_across_trt.R b/tests/testthat/test-by_strata_across_trt.R
index 9c1fb4b..a54455a 100644
--- a/tests/testthat/test-by_strata_across_trt.R
+++ b/tests/testthat/test-by_strata_across_trt.R
@@ -1,22 +1,17 @@
 test_that("RR works", {
   # SETUP -------------------------------------------------------------------
-  ep <- ep_base(
-    stratify_by = list(c("SEX")),
-    data_prepare = mk_adae,
-    custom_pop_filter = "TRT01A != 'Xanomeline Low Dose'",
-    endpoint_filter = "AEDECOD == \"ERYTHEMA\"",
-    stat_by_strata_across_trt = list(RR)
-  ) |>
-    helper_pipeline_by_strata_across_trt()
+  setup <- setup_by_strata_across_trt()
   # ACT ---------------------------------------------------------------------
-  actual <-
-    chef::apply_stats(ep$ep,
-      ep$analysis_data_container,
-      type = "stat_by_strata_across_trt"
-    ) |>
-    tidyr::unnest(cols = stat_result) |>
-    setDT()
-
+  AEDECOD <- TRT01A <- SAFFL <- USUBJID <- label <- NULL
+  actual_total <- RR(
+    dat = setup$input,
+    event_index = setup$event_index,
+    cell_index = setup$cell_index_total,
+    treatment_var = "TRT01A",
+    treatment_refval = "Xanomeline High Dose",
+    subjectid_var = "USUBJID"
+  )
+  
   # EXPECT ------------------------------------------------------------------
   x <- mk_adae()
   x[, has_event := FALSE]
@@ -54,36 +49,43 @@ test_that("RR works", {
     NROW()
 
   rr <- (a / sum(a, b)) / (c_ / sum(c_, d))
-  expect_equal(actual[label == "RR" &
-    strata_var == "TOTAL_"]$value, rr)
-  expect_type(actual$value, "double")
+  expect_equal(actual_total[label == "RR"]$value, rr)
+  expect_type(actual_total$value, "double")
 })
 
 
 test_that("RR works when 0 events", {
   # SETUP -------------------------------------------------------------------
-  ep <- ep_base(
-    data_prepare = mk_adae,
-    stratify_by = list(c("SEX")),
-    custom_pop_filter = "TRT01A != 'Xanomeline Low Dose'",
-    endpoint_filter = "AESEV == \"SEVERE\"",
-    group_by = list(list(AESOC = "GASTROINTESTINAL DISORDERS")),
-    stat_by_strata_across_trt = list(RR)
-  ) |>
-    helper_pipeline_by_strata_across_trt()
+  AESEV <- TRT01A <- SAFFL <- USUBJID <- label <-  AESOC <- SEX <- NULL
+  input <- mk_adae(study_metadata = NULL)
+  input[, INDEX_ := .I] |> data.table::setkey(INDEX_)
+  cell_index_f <-
+    input[SAFFL == "Y" &
+            (TRT01A == "Placebo" |
+               TRT01A == "Xanomeline High Dose")
+             & SEX == "F"][["INDEX_"]]
+  cell_index_total <-
+    input[SAFFL == "Y" &
+            (TRT01A == "Placebo" |
+               TRT01A == "Xanomeline High Dose")][["INDEX_"]]
+  event_index <-
+    input[AESEV == "SEVERE" & AESOC == "GASTROINTESTINAL DISORDERS"][["INDEX_"]]
+  
+  
   # ACT ---------------------------------------------------------------------
-  actual <-
-    chef::apply_stats(ep$ep,
-      ep$analysis_data_container,
-      type = "stat_by_strata_across_trt"
-    ) |>
-    tidyr::unnest(stat_result) |>
-    setDT()
-  actual <- actual[strata_var == "SEX"]
-
+  actual_female <- RR(
+    dat = input,
+    event_index = event_index,
+    cell_index = cell_index_f,
+    treatment_var = "TRT01A",
+    treatment_refval = "Xanomeline High Dose",
+    subjectid_var = "USUBJID"
+  )
+  
+  
   # EXPECT ------------------------------------------------------------------
-  x <- mk_adae()[TRT01A != "Xanomeline Low Dose"]
-  x <- x[SAFFL == "Y" & SEX == "M"]
+  x <- mk_adae(study_metadata = NULL)[TRT01A != "Xanomeline Low Dose"]
+  x <- x[SAFFL == "Y" & SEX == "F"]
   x[, event := FALSE]
   x[AESEV == "SEVERE" &
     AESOC == "GASTROINTESTINAL DISORDERS", event := TRUE]
@@ -111,34 +113,33 @@ test_that("RR works when 0 events", {
     x[!(event) & TRT01A == "Placebo"] |> NROW() + 0.5
 
   rr <- (a / sum(a, b)) / (c / sum(c, d))
-  expect_equal(actual[label == "RR" &
-    strata_var == "SEX" &
-    grepl("M", stat_filter)]$value, rr)
-  expect_type(actual$value, "double")
+  se <-
+    sqrt(+1 / a + 1 / c - 1 / sum(a,b) - 1 /
+           sum(c,d))
+  
+  expect_equal(actual_female[label == "RR"]$value, rr)
+  expect_equal(actual_female[label == "SE"]$value, se)
+  
+  
 })
 
 
 test_that("OR works", {
   # SETUP -------------------------------------------------------------------
-  ep <- ep_base(
-    stratify_by = list(c("SEX")),
-    data_prepare = mk_adae,
-    custom_pop_filter = "TRT01A != 'Xanomeline Low Dose'",
-    endpoint_filter = "AEDECOD == \"ERYTHEMA\"",
-    stat_by_strata_across_trt = list(OR)
-  ) |>
-    helper_pipeline_by_strata_across_trt()
+  setup <- setup_by_strata_across_trt()
   # ACT ---------------------------------------------------------------------
   actual <-
-    chef::apply_stats(ep$ep,
-      ep$analysis_data_container,
-      type = "stat_by_strata_across_trt"
-    ) |>
-    tidyr::unnest(cols = stat_result) |>
-    setDT()
-
+    OR(
+      dat = setup$input,
+      event_index = setup$event_index,
+      cell_index = setup$cell_index_total,
+      treatment_var = "TRT01A",
+      treatment_refval = "Xanomeline High Dose",
+      subjectid_var = "USUBJID"
+    )
+  
   # EXPECT ------------------------------------------------------------------
-  x <- mk_adae()
+  x <- mk_adae(study_metadata = NULL)
   x[, has_event := FALSE]
   x[AEDECOD == "ERYTHEMA", has_event := TRUE]
   x <- x |>
@@ -174,8 +175,7 @@ test_that("OR works", {
     NROW()
 
   or <- prod(a, d) / prod(b, c_)
-  expect_equal(actual[label == "OR" &
-    strata_var == "TOTAL_"]$value, or)
+  expect_equal(actual[label == "OR"]$value, or)
   expect_type(actual$value, "double")
 })
 
diff --git a/tests/testthat/test-by_strata_by_trt.R b/tests/testthat/test-by_strata_by_trt.R
index e6136f9..b466d4f 100644
--- a/tests/testthat/test-by_strata_by_trt.R
+++ b/tests/testthat/test-by_strata_by_trt.R
@@ -1,27 +1,25 @@
 test_that("n_subj works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
+            setup <- setup_basic_counts()
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
-              n_subj(dat = input,
-                     cell_index = cell_index_f,
-                     subjectid_var = "USUBJID")
+              n_subj(
+                dat = setup$input,
+                cell_index = setup$cell_index_f,
+                subjectid_var = "USUBJID"
+              )
             
             
             actual_total <-
-              n_subj(dat = input,
-                     cell_index = cell_index_total,
-                     subjectid_var = "USUBJID")
+              n_subj(
+                dat = setup$input,
+                cell_index = setup$cell_index_total,
+                subjectid_var = "USUBJID"
+              )
             # EXPECT ------------------------------------------------------------------
-            x <- mk_adae()
+            x <- mk_adae(study_metadata = NULL)
             expected_female <-
               x[TRT01A == "Placebo" &
                   SAFFL == "Y" &  SEX == "F"] |>
@@ -40,36 +38,27 @@ test_that("n_subj works",
 test_that("n_event works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            event_index <-
-              input[AEDECOD == "ERYTHEMA"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
-            
+            setup <- setup_basic_counts()
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
               n_event(
-                dat = input,
-                cell_index = cell_index_f,
-                event_index = event_index,
+                dat = setup$input,
+                cell_index = setup$cell_index_f,
+                event_index = setup$event_index,
                 subjectid_var = "USUBJID"
               )
             
             
             actual_total <- n_event(
-              dat = input,
-              cell_index = cell_index_total,
-              event_index = event_index,
+              dat = setup$input,
+              cell_index = setup$cell_index_total,
+              event_index = setup$event_index,
               subjectid_var = "USUBJID"
             )
             
             # EXPECT ------------------------------------------------------------------
-            x <- mk_adae()
+            x <- mk_adae(study_metadata = NULL)
             expected_total <-
               x[TRT01A == "Placebo" &
                   SAFFL == "Y" & AEDECOD == "ERYTHEMA"] |>
@@ -88,37 +77,26 @@ test_that("n_event works",
 test_that("p_subj_event works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
-            event_index <-
-              input[AEDECOD == "ERYTHEMA"][["INDEX_"]]
-            
-            
-            
+            setup <- setup_basic_counts()
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
               p_subj_event(
-                dat = input,
-                cell_index = cell_index_f,
-                event_index = event_index,
+                dat = setup$input,
+                cell_index = setup$cell_index_f,
+                event_index = setup$event_index,
                 subjectid_var = "USUBJID"
               )
             
             
             actual_total <- p_subj_event(
-              dat = input,
-              cell_index = cell_index_total,
-              event_index = event_index,
+              dat = setup$input,
+              cell_index = setup$cell_index_total,
+              event_index = setup$event_index,
               subjectid_var = "USUBJID"
             )
             # EXPECT ------------------------------------------------------------------
-            x <- mk_adae()
+            x <- mk_adae(study_metadata = NULL)
             expected_total_numerator <-
               x[TRT01A == "Placebo" &
                   SAFFL == "Y" &  AEDECOD == "ERYTHEMA"] |>
@@ -153,17 +131,10 @@ test_that("p_subj_event works",
 test_that("n_event, n_subj_event, p_subj_event return 0 when no events",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo"][["INDEX_"]]
-            event_index <-
-              input[AESEV == "SEVERE" &
-                      AESOC == "GASTROINTESTINAL DISORDERS"][["INDEX_"]]
+            setup <- setup_basic_counts()
+            setup$event_index <-
+              setup$input[AESEV == "SEVERE" &
+                            AESOC == "GASTROINTESTINAL DISORDERS"][["INDEX_"]]
             # ACT ---------------------------------------------------------------------
             
             
@@ -171,25 +142,45 @@ test_that("n_event, n_subj_event, p_subj_event return 0 when no events",
             actual_female <-  list(
               p_subj_event =
                 p_subj_event(
-                  input,
-                  cell_index = cell_index_f,
-                  event_index = event_index,
+                  setup$input,
+                  cell_index = setup$cell_index_f,
+                  event_index = setup$event_index,
                   "USUBJID"
                 )$value,
-              n_event = n_event(input, cell_index_f, event_index, "USUBJID")$value,
-              n_subj_event = n_subj_event(input, cell_index_f, event_index, "USUBJID")$value
+              n_event = n_event(
+                setup$input,
+                setup$cell_index_f,
+                setup$event_index,
+                "USUBJID"
+              )$value,
+              n_subj_event = n_subj_event(
+                setup$input,
+                setup$cell_index_f,
+                setup$event_index,
+                "USUBJID"
+              )$value
             )
             
             actual_total <-  list(
               p_subj_event =
                 p_subj_event(
-                  input,
-                  cell_index = cell_index_total,
-                  event_index = event_index,
+                  setup$input,
+                  cell_index = setup$cell_index_total,
+                  event_index = setup$event_index,
                   "USUBJID"
                 )$value,
-              n_event = n_event(input, cell_index_total, event_index, "USUBJID")$value,
-              n_subj_event = n_subj_event(input, cell_index_total, event_index, "USUBJID")$value
+              n_event = n_event(
+                setup$input,
+                setup$cell_index_total,
+                setup$event_index,
+                "USUBJID"
+              )$value,
+              n_subj_event = n_subj_event(
+                setup$input,
+                setup$cell_index_total,
+                setup$event_index,
+                "USUBJID"
+              )$value
             )
             # EXPECT ------------------------------------------------------------------
             
@@ -203,30 +194,24 @@ test_that("n_event, n_subj_event, p_subj_event return 0 when no events",
 test_that("mean_value works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
+            setup <- setup_basic_counts()
             
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
               mean_value(
-                dat = input,
-                event_index = input[["INDEX_"]],
-                cell_index = cell_index_f,
+                dat = setup$input,
+                event_index = setup$input[["INDEX_"]],
+                cell_index = setup$cell_index_f,
                 subjectid_var = "USUBJID",
                 var = "AGE"
               )
             
             actual_total <-
               mean_value(
-                dat = input,
-                event_index = input[["INDEX_"]],
-                cell_index = cell_index_total,
+                dat = setup$input,
+                event_index = setup$input[["INDEX_"]],
+                cell_index = setup$cell_index_total,
                 subjectid_var = "USUBJID",
                 var = "AGE"
               )
@@ -254,30 +239,24 @@ test_that("mean_value works",
 test_that("sd_value works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
+            setup <- setup_basic_counts()
             
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
               sd_value(
-                dat = input,
-                event_index = input[["INDEX_"]],
-                cell_index = cell_index_f,
+                dat = setup$input,
+                event_index = setup$input[["INDEX_"]],
+                cell_index = setup$cell_index_f,
                 subjectid_var = "USUBJID",
                 var = "AGE"
               )
             
             actual_total <-
               sd_value(
-                dat = input,
-                event_index = input[["INDEX_"]],
-                cell_index = cell_index_total,
+                dat = setup$input,
+                event_index = setup$input[["INDEX_"]],
+                cell_index = setup$cell_index_total,
                 subjectid_var = "USUBJID",
                 var = "AGE"
               )
@@ -306,22 +285,16 @@ test_that("sd_value works",
 test_that("p_subj_event_by_trt works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
-            event_index <- which(input$ASEV == "MILD")
+            setup <- setup_basic_counts()
+            setup$event_index <- which(setup$input$ASEV == "MILD")
             
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
               p_subj_event_by_trt(
-                dat = input,
-                event_index = event_index,
-                cell_index = cell_index_f,
+                dat = setup$input,
+                event_index = setup$event_index,
+                cell_index = setup$cell_index_f,
                 subjectid_var = "USUBJID",
                 treatment_var = "TRT01A",
                 treatment_value = "Placebo"
@@ -329,9 +302,9 @@ test_that("p_subj_event_by_trt works",
             
             actual_total <-
               p_subj_event_by_trt(
-                dat = input,
-                event_index = event_index,
-                cell_index = cell_index_total,
+                dat = setup$input,
+                event_index = setup$event_index,
+                cell_index = setup$cell_index_total,
                 subjectid_var = "USUBJID",
                 treatment_var = "TRT01A",
                 treatment_value = "Placebo"
@@ -366,30 +339,98 @@ test_that("p_subj_event_by_trt works",
             expect_type(actual_female$value, "double")
           })
 
+test_that("count_set works", {
+  # SETUP -------------------------------------------------------------------
+  setup <- setup_basic_counts()
+  # ACT ---------------------------------------------------------------------
+  actual_female <-
+    count_set(
+      dat = setup$input,
+      cell_index = setup$cell_index_f,
+      event_index = setup$event_index,
+      subjectid_var = "USUBJID"
+    )
+  
+  
+  actual_total <- count_set(
+    dat = setup$input,
+    cell_index = setup$cell_index_total,
+    event_index = setup$event_index,
+    subjectid_var = "USUBJID"
+  )
+  # EXPECT ------------------------------------------------------------------
+  x <- mk_adae(study_metadata = NULL)
+  
+  # Totals
+  exepected_total_E <- x[TRT01A == "Placebo" &
+                     SAFFL == "Y" &
+                     AEDECOD == "ERYTHEMA"] |>
+    NROW()
+  expected_total_numerator <-
+    x[TRT01A == "Placebo" &
+        SAFFL == "Y" &  AEDECOD == "ERYTHEMA"] |>
+    uniqueN(by = "USUBJID")
+  expected_total_denomenator <-
+    x[TRT01A == "Placebo" &
+        SAFFL == "Y"] |>
+    uniqueN(by = "USUBJID")
+  expected_total <-
+    expected_total_numerator / expected_total_denomenator * 100
+  
+  # Female
+  expected_female_E <- x[TRT01A == "Placebo" &
+                            SAFFL == "Y" &
+                            SEX == "F" &
+                            AEDECOD == "ERYTHEMA"] |>
+    NROW()
+  expected_female_numerator <-
+    x[TRT01A == "Placebo" &
+        SAFFL == "Y" &
+        SEX == "F" & AEDECOD == "ERYTHEMA"] |>
+    uniqueN(by = "USUBJID")
+  expected_female_denomenator <-
+    x[TRT01A == "Placebo" &
+        SAFFL == "Y" &
+        SEX == "F"] |>
+    uniqueN(by = "USUBJID")
+  expected_female <-
+    expected_female_numerator / expected_female_denomenator * 100
+  
+  
+  expect_equal(actual_total[label == "N"]$value, expected_total_denomenator)
+  expect_equal(actual_total[label == "n"]$value, expected_total_numerator)
+  expect_equal(actual_total[label == "E"]$value, exepected_total_E)
+  expect_equal(actual_total[label == "(%)"]$value, expected_total)
+  
+  expect_equal(actual_female[label == "N"]$value, expected_female_denomenator)
+  expect_equal(actual_female[label == "n"]$value, expected_female_numerator)
+  expect_equal(actual_female[label == "E"]$value, expected_female_E)
+  expect_equal(actual_female[label == "(%)"]$value,
+               expected_female)
+  
+})
+
 
 test_that("obs_time_by_trt works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            input[, INTRDURY := TRTDURD]
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
-            
+            setup <- setup_basic_counts()
+            setup$input[, INTRDURY := TRTDURD]
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
-              obs_time_by_trt(dat = input,
-                              cell_index = cell_index_f,
-                              subjectid_var = "USUBJID")
+              obs_time_by_trt(
+                dat = setup$input,
+                cell_index = setup$cell_index_f,
+                subjectid_var = "USUBJID"
+              )
             
             actual_total <-
-              obs_time_by_trt(dat = input,
-                              cell_index = cell_index_total,
-                              subjectid_var = "USUBJID")
+              obs_time_by_trt(
+                dat = setup$input,
+                cell_index = setup$cell_index_total,
+                subjectid_var = "USUBJID"
+              )
             
             # EXPECT ------------------------------------------------------------------
             
@@ -414,23 +455,17 @@ test_that("obs_time_by_trt works",
 test_that("n_event_100y works",
           {
             # SETUP -------------------------------------------------------------------
-            input <- mk_adae()
-            input[, INDEX_ := .I] |> setkey(INDEX_)
-            input[, INTRDURY := TRTDURD]
-            cell_index_f <-
-              input[SAFFL == "Y" &
-                      TRT01A == "Placebo" & SEX == "F"][["INDEX_"]]
-            cell_index_total <-
-              input[SAFFL == "Y" & TRT01A == "Placebo"][["INDEX_"]]
-            event_index <- which(input$ASEV == "MILD")
+            setup <- setup_basic_counts()
+            setup$input[, INTRDURY := TRTDURD]
+            setup$event_index <- which(setup$input$ASEV == "MILD")
             
             # ACT ---------------------------------------------------------------------
             
             actual_female <-
               n_event_100y(
-                dat = input,
-                event_index = event_index,
-                cell_index = cell_index_f,
+                dat = setup$input,
+                event_index = setup$event_index,
+                cell_index = setup$cell_index_f,
                 subjectid_var = "USUBJID",
                 treatment_var = "TRT01A",
                 treatment_value = "Placebo"
@@ -438,9 +473,9 @@ test_that("n_event_100y works",
             
             actual_total <-
               n_event_100y(
-                dat = input,
-                event_index = event_index,
-                cell_index = cell_index_total,
+                dat = setup$input,
+                event_index = setup$event_index,
+                cell_index = setup$cell_index_total,
                 subjectid_var = "USUBJID",
                 treatment_var = "TRT01A",
                 treatment_value = "Placebo"
@@ -458,13 +493,15 @@ test_that("n_event_100y works",
             
             n_event_f <-
               nrow(x[SAFFL == "Y" &
-                       TRT01A == "Placebo" & ASEV == "MILD" & SEX == "F"])
+                       TRT01A == "Placebo" &
+                       ASEV == "MILD" & SEX == "F"])
             expected_female <- round(n_event_f / obs_time * 100)
             expect_equal(actual_female$value, expected_female)
             
             
             n_event_t <-
-              nrow(x[SAFFL == "Y" & TRT01A == "Placebo" & ASEV == "MILD"])
+              nrow(x[SAFFL == "Y" &
+                       TRT01A == "Placebo" & ASEV == "MILD"])
             expected_total <- round(n_event_t / obs_time * 100)
             expect_equal(actual_total$value, expected_total)
             
diff --git a/tests/testthat/test-core_stat_functions.R b/tests/testthat/test-core_stat_functions.R
index 6079435..3d218ef 100644
--- a/tests/testthat/test-core_stat_functions.R
+++ b/tests/testthat/test-core_stat_functions.R
@@ -2,32 +2,32 @@
 test_that("bernard works", {
   two_by_two_1 <-
     data.frame(N_subev = c(14, 5), N_no_event = c(396, 404))
-  results <- barnard_test(two_by_two_1)
+  results <- barnard_test_(two_by_two_1)
   expect_equal(results, 0.03893186, tolerance = 1e-6)
 })
 
 # test_that("bernard works with 1 and 0 events", {
 # two_by_two_2<- data.frame(N_subev = c(1, 0), N_no_event = c(396, 404))
-#   results <- barnard_test(two_by_two_2)
+#   results <- barnard_test_(two_by_two_2)
 #   expect_equal(results, 0.3693807, tolerance = 1e-6)
 # })
 
 test_that("bernard fails when no events", {
   tbt_no_events <-
     data.frame(N_subev = c(0, 0), N_no_event = c(396, 404))
-  expect_error(barnard_test(tbt_no_events))
+  expect_error(barnard_test_(tbt_no_events))
 })
 
 test_that("bernard fails when no patients in one arm", {
   tbt_no_exposed <-
     data.frame(N_subev = c(0, 5), N_no_event = c(0, 404))
-  expect_error(barnard_test(tbt_no_exposed))
+  expect_error(barnard_test_(tbt_no_exposed))
 })
 
 test_that("relative risk works", {
   two_by_two_1 <-
     data.frame(N_subev = c(14, 5), N_no_event = c(396, 404))
-  results <- relative_risk(two_by_two_1)
+  results <- relative_risk_(two_by_two_1)
   x <- unlist(results)
   check_vals <- c(2.793171, 0.5162795, 7.683435, 1.015406)
   names(check_vals) <- c("RR", "SE", "RRUL", "RRLL")
@@ -64,7 +64,7 @@ test_that("Breslow-Day works", {
     array(c(unlist(x)),
           dim = c(2, 2, 2),
           dimnames = list(c("A", "B"), c("N_subev", "N_subjt"), c("F", "M")))
-  expect_equal(breslowdaytest(two_by_two_by_k)$p.value, 0.02737539, tolerance = 1e-6)
+  expect_equal(breslowdaytest_(two_by_two_by_k)$p.value, 0.02737539, tolerance = 1e-6)
 })
 
 test_that("Breslow-Day errors when no events in a subgourp", {
@@ -77,5 +77,5 @@ test_that("Breslow-Day errors when no events in a subgourp", {
           dim = c(2, 2, 2),
           dimnames = list(c("A", "B"), c("N_subev", "N_subjt"), c("F", "M")))
 
-  expect_error(breslowdaytest(two_by_two_by_k_2))
+  expect_error(breslowdaytest_(two_by_two_by_k_2))
 })
diff --git a/tests/testthat/test-two_by_twos.R b/tests/testthat/test-two_by_twos.R
index 35bc0d1..991dbf8 100644
--- a/tests/testthat/test-two_by_twos.R
+++ b/tests/testthat/test-two_by_twos.R
@@ -1,4 +1,4 @@
-test_that("mk_two_by_two adds empty levels in 2x2 table", {
+test_that("make_two_by_two_ adds empty levels in 2x2 table", {
 
 # SETUP -------------------------------------------------------------------
   input <- mk_adae()
@@ -10,7 +10,7 @@ test_that("mk_two_by_two adds empty levels in 2x2 table", {
 
 
 # ACT ---------------------------------------------------------------------
-  actual <- mk_two_by_two(
+  actual <- make_two_by_two_(
       dat = input,
       event_index = event_index,
       cell_index = cell_index,
diff --git a/vignettes/add_functions.Rmd b/vignettes/add_functions.Rmd
new file mode 100644
index 0000000..42681fe
--- /dev/null
+++ b/vignettes/add_functions.Rmd
@@ -0,0 +1,210 @@
+---
+title: "Adding new functions"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Adding new functions}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+
+```{r setup, echo=FALSE}
+library(chefStats)
+library(data.table)
+library(kableExtra)
+```
+
+To add new functions to chefStats, you follow three general steps:
+
+
+  1. Consider what type of function you need: `stat_by_strata_by_trt`, `stat_by_strata_across_trt` or `stat_across_strata_across_trt` (see article [Function types](https://hta-pharma.github.io/chefStats/articles/function_types.html).
+  2. Decide what information the function needs in order to compute the desired results. See section [Interface with chef](#interface-with-chef)
+  3. Write the function definition (use `use_chefStats()` to help create the template)
+  4. Write the appropriate unit-tests for the function (using [testthat](https://testthat.r-lib.org/) framework)
+<br>
+
+### Walk through example
+
+Here we show how we would add the function `n_subj_event()`, which counts the number of subjects experiencing the event by stratification level and treatment level, if it didn't already exist in chefStats.
+
+  1. Since the function produces a number by stratification level and by treatment level, this will be a `stat_by_strata_by_trt`
+  2. The function will need to know (see section [Interface with chef](#interface-with-chef) for more details on how to pass this information from chef to chefStats):
+      - Who was eligible to have the event
+      - Who has the event
+      - What the stratification "group" (e.g. SEX, AGE, etc) and level ("MALE", ">65", etc). is
+      - What the treatment level is (e.g. "TreatmentA")
+  3. The function definition might look like this:
+  
+```{r, eval=FALSE}
+  n_subj_event <-
+  function(dat,
+           event_index,
+           cell_index,
+           subjectid_var,
+           ... # the `...` are required for all chefStats functions to "collect" any unused arguments passed from chef
+           ) {
+    # Please see the "Interface with chef" section for details on what
+    # `event_index` and `cell_index`
+    
+    # `intersect()` provides us with a vector of rows in `dat` that match both
+    # `event_index` and `cell_index` - aka records that were BOTH eligible to
+    # have the event (`cell_index`) AND had the event (`event_index`)
+    index <- intersect(event_index, cell_index)
+    
+    # Return all matching rows in `dat` where `INDEX_`
+    # matches `index`.
+    event_rows <- dat[INDEX_ %in% index]
+    
+    # `dat` contains event data, meaning subjects can appear more than once if
+    # they have >1 event, so we need to remove these extra rows to get a proper
+    # count
+    event_rows_unique_by_subject <- unique(event_rows, by = subjectid_var)
+    
+    stat <-   NROW(event_rows_unique_by_subject)
+
+    # The return object has to be a data.table object with the following 3
+    # columns. The `value` column always has to be a double (not an integer)
+    return(
+      data.table(
+        description = "Number of subjects with events",
+        label = "n_subj_events",
+        value = as.double(stat)
+      )
+    )
+
+  }
+
+
+```
+  
+<style>
+/* Remove border around tab content */
+.tab-content {
+    border: none !important;
+}
+</style>
+
+<br>
+
+### Interface with chef {.tabset .tabset-pills}
+
+The statistical functions from chefStats will be called within the context of a chef pipeline. The category of the function determines what arguments are passed from chef to the function, however some arguments are passed to all categories.
+
+The following table describes arguments that are passed to **all** chefStats function:
+
+```{r, echo=FALSE}
+dt <-
+  data.table::data.table(
+    `Argument Names` = c(
+      "`dat`",
+      "`event_index`",
+      "`strata_var`",
+      "`treatment_var`",
+      "`subjectid_var`"
+    ),
+    Description = c(
+      "A `data.table` containing the analysis data set produced by the `prepare_data` function. To allow flexability for creative use of chefStats functions, this dataset is **not** filtered to the exact records needed for each analysis when passed to chefStats. Instead this filtering is done inside the chefStats functions. This is done using the `INDEX_` column from `dat` that serves as a row ID, and is used for filtering with, for example, `cell_index` or `event_index`",
+      "A `vector` of indicies indicating which rows (as specified in `INDEX_` column of `dat`) are considered to be events for the endpoint specification under evaluation",
+      "A `character` indicating which stratatification is being used (e.g. SEX, AGE, etc)",
+      "A `character` indicating the name of the column in `dat` containing the treatment information used for the endpoint",
+      "A `character` specifying the name of the column in `dat` containing the subject ID. Defaults to \"USUBJID\""
+    )
+  )
+dt |>
+  kable(format = "html",
+        table.attr = "class='table table-bordered'",
+        caption = "Arguments always passed to chefStats functions") |>
+  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
+  row_spec(0, background = "lightblue")
+```
+
+<br><br>
+
+Additionally, each function type receives the following arguments
+
+#### stat_by_strata_by_trt
+
+```{r, echo=FALSE}
+dt <-
+  data.table::data.table(
+    `Argument Names` = c(
+      "`cell_index`",
+      "`strata_val`",
+      "`treatment_val`"
+    ),
+    Description = c(
+      "A `vector` of indicies specifying which rows in `dat` are considered to be part of the analysis for the given strata level and treatment level under evaluation. For example, if the current instance of the function was analysis \"Number of Events\" for SEX==\"M\" and TRT01A == \"Placebo\", then `cell_index` would be a vector of records in `dat$INDEX_` that match those parameters. You can thus obtain the analysis set by filtering `dat` via: `dat[cell_index %in% INDEX_]`",
+      "A `character` specifying the stratification level under evaluation. For example if `strat_var ==\"SEX\"`, then `strat_val` could be either `\"M\"` or `\"F\"`",
+      "A `character` specifying the treatment level (or treatment arm). *Not to be confused with the `treatment_refval` that specifies the reference treatment value."
+    )
+  )
+dt |>
+  kable(format = "html",
+        table.attr = "class='table table-bordered'",
+        caption = "Additional arguments passed to by_strata_by_trt functions") |>
+  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
+  row_spec(0, background = "lightblue")
+```
+
+
+#### stat_by_strata_across_trt
+
+
+```{r, echo=FALSE}
+dt <-
+  data.table::data.table(
+    `Argument Names` = c("`cell_index`",
+                         "`strata_val`",
+                         "`treatment_refval`"),
+    Description = c(
+      "A `vector` of indicies specifying which rows in `dat` are considered to be part of the analysis for the given strata level and treatment level under evaluation. For example, if the current instance of the function was analysis \"Number of Events\" for SEX==\"M\" and TRT01A == \"Placebo\", then `cell_index` would be a vector of records in `dat$INDEX_` that match those parameters. You can thus obtain the analysis set by filtering `dat` via: `dat[cell_index %in% INDEX_]`",
+      "A `character` specifying the stratification level under evaluation. For example if `strat_var ==\"SEX\"`, then `strat_val` could be either `\"M\"` or `\"F\"`",
+      "A `character` specifying the treatment reference level. *Not to be confused with the `treatment_val` that specifies the treatment value for `by_strata_by_trt` functions"
+    )
+  )
+dt |>
+  kable(format = "html",
+        table.attr = "class='table table-bordered'",
+        caption = "Additional arguments passed to by_strata_across_trt functions") |>
+  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
+  row_spec(0, background = "lightblue")
+```
+#### stat_by_strata_across_trt
+
+
+```{r, echo=FALSE}
+dt <-
+  data.table::data.table(
+    `Argument Names` = c("`strata_val`",
+                         "`treatment_refval`"),
+    Description = c(
+      "A `character` specifying the stratification level under evaluation. For example if `strat_var ==\"SEX\"`, then `strat_val` could be either `\"M\"` or `\"F\"`",
+      "A `character` specifying the treatment reference level. *Not to be confused with the `treatment_val` that specifies the treatment value for `by_strata_by_trt` functions"
+    )
+  )
+dt |>
+  kable(format = "html",
+        table.attr = "class='table table-bordered'",
+        caption = "Additional arguments passed to across_strata_across_trt functions") |>
+  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
+  row_spec(0, background = "lightblue")
+```
+
+
+
+<br>
+
+### Using building-blocks
+
+When possible, utilize building-block functions when making new statistical functions. For example, if the new functions requires a 2x2 table, use the `make_two_by_two_()` function instead of writing a new one.
+
+This also allows you to easily write functions that collapse several chefStats functions into one function call. For example, on call to `count_set()` is the same as one call each to `n_sub()`, `n_event()`, `n_subj_event()` and `p_subj_event()`. The only rational for combining functions like this is to save compute time, due to the way chef pipelines are constructed. 
+
+Building block function names are always suffixed with an underscore `_` to indicate they cannot be called from inside a chef pipeline. For example, `n_event_()` is a building block that is used to make `n_event()`, but `n_event_()` can also be use to build other functions, such as `count_set()`, because it does not format it's output for a chef pipeline. Conversely, `n_event()` does format the output, so it can be use in a chef pipeline, but not as a building block.
diff --git a/vignettes/function_types.Rmd b/vignettes/function_types.Rmd
index 93f7a76..8c4163d 100644
--- a/vignettes/function_types.Rmd
+++ b/vignettes/function_types.Rmd
@@ -95,176 +95,3 @@ Whereas a `stat_across_strata_across_trt` will only be called once per stratific
   
   1. strata_var == "SEX"
   1. strata_var == "AGE"
-
-
-## Adding new functions
-
-To add new functions to chefStats, you follow three general steps:
-
-
-  1. Consider what type of function you need: `stat_by_strata_by_trt`, `stat_by_strata_across_trt` or `stat_across_strata_across_trt`.
-  2. Decide what information the function needs in order to compute the desired results. See section [Interface with chef](#interface-with-chef)
-  3. Write the function definition (use `use_chefStats()` to help create the template)
-  4. Write the appropriate unit-tests for the function (using [testthat](https://testthat.r-lib.org/) framework)
-
-### Walk through example
-
-Here we show how we would add the function `n_subj_event()`, which counts the number of subjects experiencing the event by stratification level and treatment level, if it didn't already exist in chefStats.
-
-  1. Since the function produces a number by stratification level and by treatment level, this will be a `stat_by_strata_by_trt`
-  2. The function will need to know (see section [Interface with chef](#interface-with-chef) for more details on how to pass this information from chef to chefStats):
-      - Who was eligible to have the event
-      - Who has the event
-      - What the stratification "group" (e.g. SEX, AGE, etc) and level ("MALE", ">65", etc). is
-      - What the treatment level is (e.g. "TreatmentA")
-  3. The function definition might look like this:
-  
-```{r, eval=FALSE}
-  n_subj_event <-
-  function(dat,
-           event_index,
-           cell_index,
-           subjectid_var,
-           ... # the `...` are required for all chefStats functions to "collect" any unused arguments passed from chef
-           ) {
-    # Please see the "Interface with chef" section for details on what
-    # `event_index` and `cell_index`
-    
-    # `intersect()` provides us with a vector of rows in `dat` that match both
-    # `event_index` and `cell_index` - aka records that were BOTH eligible to
-    # have the event (`cell_index`) AND had the event (`event_index`)
-    index <- intersect(event_index, cell_index)
-    
-    # Return all matching rows in `dat` where `INDEX_`
-    # matches `index`.
-    event_rows <- dat[INDEX_ %in% index]
-    
-    # `dat` contains event data, meaning subjects can appear more than once if
-    # they have >1 event, so we need to remove these extra rows to get a proper
-    # count
-    event_rows_unique_by_subject <- unique(event_rows, by = subjectid_var)
-    
-    stat <-   NROW(event_rows_unique_by_subject)
-
-    # The return object has to be a data.table object with the following 3
-    # columns. The `value` column always has to be a double (not an integer)
-    return(
-      data.table(
-        description = "Number of subjects with events",
-        label = "n_subj_events",
-        value = as.double(stat)
-      )
-    )
-
-  }
-
-
-```
-  
-<style>
-/* Remove border around tab content */
-.tab-content {
-    border: none !important;
-}
-</style>
-### Interface with chef {.tabset .tabset-pills}
-
-The statistical functions from chefStats will be called within the context of a chef pipeline. The category of the function determines what arguments are passed from chef to the function, however some arguments are passed to all categories.
-
-The following table describes arguments that are passed to **all** chefStats function:
-
-```{r, echo=FALSE}
-dt <-
-  data.table::data.table(
-    `Argument Names` = c(
-      "`dat`",
-      "`event_index`",
-      "`strata_var`",
-      "`treatment_var`",
-      "`subjectid_var`"
-    ),
-    Description = c(
-      "A `data.table` containing the analysis data set produced by the `prepare_data` function. To allow flexability for creative use of chefStats functions, this dataset is **not** filtered to the exact records needed for each analysis when passed to chefStats. Instead this filtering is done inside the chefStats functions. This is done using the `INDEX_` column from `dat` that serves as a row ID, and is used for filtering with, for example, `cell_index` or `event_index`",
-      "A `vector` of indicies indicating which rows (as specified in `INDEX_` column of `dat`) are considered to be events for the endpoint specification under evaluation",
-      "A `character` indicating which stratatification is being used (e.g. SEX, AGE, etc)",
-      "A `character` indicating the name of the column in `dat` containing the treatment information used for the endpoint",
-      "A `character` specifying the name of the column in `dat` containing the subject ID. Defaults to \"USUBJID\""
-    )
-  )
-dt |>
-  kable(format = "html",
-        table.attr = "class='table table-bordered'",
-        caption = "Arguments always passed to chefStats functions") |>
-  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
-  row_spec(0, background = "lightblue")
-```
-
-Additionally, each function type receives the following arguments
-
-#### stat_by_strata_by_trt
-
-```{r, echo=FALSE}
-dt <-
-  data.table::data.table(
-    `Argument Names` = c(
-      "`cell_index`",
-      "`strata_val`",
-      "`treatment_val`"
-    ),
-    Description = c(
-      "A `vector` of indicies specifying which rows in `dat` are considered to be part of the analysis for the given strata level and treatment level under evaluation. For example, if the current instance of the function was analysis \"Number of Events\" for SEX==\"M\" and TRT01A == \"Placebo\", then `cell_index` would be a vector of records in `dat$INDEX_` that match those parameters. You can thus obtain the analysis set by filtering `dat` via: `dat[cell_index %in% INDEX_]`",
-      "A `character` specifying the stratification level under evaluation. For example if `strat_var ==\"SEX\"`, then `strat_val` could be either `\"M\"` or `\"F\"`",
-      "A `character` specifying the treatment level (or treatment arm). *Not to be confused with the `treatment_refval` that specifies the reference treatment value."
-    )
-  )
-dt |>
-  kable(format = "html",
-        table.attr = "class='table table-bordered'",
-        caption = "Additional arguments passed to by_strata_by_trt functions") |>
-  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
-  row_spec(0, background = "lightblue")
-```
-
-
-#### stat_by_strata_across_trt
-
-
-```{r, echo=FALSE}
-dt <-
-  data.table::data.table(
-    `Argument Names` = c("`cell_index`",
-                         "`strata_val`",
-                         "`treatment_refval`"),
-    Description = c(
-      "A `vector` of indicies specifying which rows in `dat` are considered to be part of the analysis for the given strata level and treatment level under evaluation. For example, if the current instance of the function was analysis \"Number of Events\" for SEX==\"M\" and TRT01A == \"Placebo\", then `cell_index` would be a vector of records in `dat$INDEX_` that match those parameters. You can thus obtain the analysis set by filtering `dat` via: `dat[cell_index %in% INDEX_]`",
-      "A `character` specifying the stratification level under evaluation. For example if `strat_var ==\"SEX\"`, then `strat_val` could be either `\"M\"` or `\"F\"`",
-      "A `character` specifying the treatment reference level. *Not to be confused with the `treatment_val` that specifies the treatment value for `by_strata_by_trt` functions"
-    )
-  )
-dt |>
-  kable(format = "html",
-        table.attr = "class='table table-bordered'",
-        caption = "Additional arguments passed to by_strata_across_trt functions") |>
-  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
-  row_spec(0, background = "lightblue")
-```
-#### stat_by_strata_across_trt
-
-
-```{r, echo=FALSE}
-dt <-
-  data.table::data.table(
-    `Argument Names` = c("`strata_val`",
-                         "`treatment_refval`"),
-    Description = c(
-      "A `character` specifying the stratification level under evaluation. For example if `strat_var ==\"SEX\"`, then `strat_val` could be either `\"M\"` or `\"F\"`",
-      "A `character` specifying the treatment reference level. *Not to be confused with the `treatment_val` that specifies the treatment value for `by_strata_by_trt` functions"
-    )
-  )
-dt |>
-  kable(format = "html",
-        table.attr = "class='table table-bordered'",
-        caption = "Additional arguments passed to across_strata_across_trt functions") |>
-  kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
-  row_spec(0, background = "lightblue")
-```

From fadbaa8f9f18e19a75d8979b6fe67aa0be988520 Mon Sep 17 00:00:00 2001
From: nsjohnsen <99595467+nsjohnsen@users.noreply.github.com>
Date: Tue, 2 Apr 2024 11:11:39 +0200
Subject: [PATCH 2/5] Adding author (#15)

Added missing and very important author
---
 DESCRIPTION          | 3 +++
 R/by_strata_by_trt.R | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 1da2504..269aeba 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,6 +8,9 @@ Authors@R:
       person(given = "NOSJ (Nicolai Skov Johnsen)",
              role = "aut",
              email = "nosj@novonordisk.com"),
+      person(given = "HSPU (Henrik Sparre Spiegelhauer)",
+             role = "aut",
+             email = "hspu@novonordisk.com"),       
       person(given = "ABIU (Anders Bilgrau)",
              role = "aut",
              email = "abiu@novonordisk.com"),
diff --git a/R/by_strata_by_trt.R b/R/by_strata_by_trt.R
index 567c2fe..c364e78 100644
--- a/R/by_strata_by_trt.R
+++ b/R/by_strata_by_trt.R
@@ -177,7 +177,6 @@ p_subj_event <-
 #' @return a data.table containing all statistical outputs
 #' @export
 #'
-#' @examples
 count_set <- function(dat,
                    event_index,
                    cell_index,

From 9fcb8e6a94e7f76cf3345c212deeae4abbbb3370 Mon Sep 17 00:00:00 2001
From: nsjohnsen <99595467+nsjohnsen@users.noreply.github.com>
Date: Tue, 2 Apr 2024 13:11:30 +0200
Subject: [PATCH 3/5] Minor updates in documentation (#16)

* Minor updates in documentation

* Minor adjustment in readme
---
 README.Rmd                  |  3 +--
 README.md                   |  5 ++---
 vignettes/add_functions.Rmd | 34 +++++++++++++++++-----------------
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 6442840..f2ce755 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -17,5 +17,4 @@ As the functions found in chefStats are designed to be used with chef, it may be
 
 # Developer Documentation
 
-Please refer to {ramnog} for general developer documentation.
-[Ramnog Developer Documentation](https://hta-pharma.github.io/ramnog/articles/#:~:text=Debugging-,Development,-Git%20Workflow)
+Please refer to {ramnog} for general [developer documentation](https://hta-pharma.github.io/ramnog/articles/#:~:text=Debugging-,Development,-Git%20Workflow).
diff --git a/README.md b/README.md
index a682b6e..abdca04 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,5 @@ it may be unwieldy to use these functions independently.
 
 # Developer Documentation
 
-Please refer to {ramnog} for general developer documentation. [Ramnog
-Developer
-Documentation](https://hta-pharma.github.io/ramnog/articles/#:~:text=Debugging-,Development,-Git%20Workflow)
+Please refer to {ramnog} for general [developer
+documentation](https://hta-pharma.github.io/ramnog/articles/#:~:text=Debugging-,Development,-Git%20Workflow).
diff --git a/vignettes/add_functions.Rmd b/vignettes/add_functions.Rmd
index 42681fe..653adc3 100644
--- a/vignettes/add_functions.Rmd
+++ b/vignettes/add_functions.Rmd
@@ -21,24 +21,24 @@ library(data.table)
 library(kableExtra)
 ```
 
-To add new functions to chefStats, you follow three general steps:
+To add new functions to {chefStats}, you must follow these four general steps:
 
 
-  1. Consider what type of function you need: `stat_by_strata_by_trt`, `stat_by_strata_across_trt` or `stat_across_strata_across_trt` (see article [Function types](https://hta-pharma.github.io/chefStats/articles/function_types.html).
-  2. Decide what information the function needs in order to compute the desired results. See section [Interface with chef](#interface-with-chef)
+  1. Consider what type of function you need: `stat_by_strata_by_trt`, `stat_by_strata_across_trt` or `stat_across_strata_across_trt`. See article [Function types](https://hta-pharma.github.io/chefStats/articles/function_types.html.
+  2. Decide what information the function needs in order to compute the desired results. See section [Interface with {chef}](#interface-with-chef)
   3. Write the function definition (use `use_chefStats()` to help create the template)
   4. Write the appropriate unit-tests for the function (using [testthat](https://testthat.r-lib.org/) framework)
 <br>
 
-### Walk through example
+### Walkthrough example
 
-Here we show how we would add the function `n_subj_event()`, which counts the number of subjects experiencing the event by stratification level and treatment level, if it didn't already exist in chefStats.
+Here we show how we would add the function `n_subj_event()`, which counts the number of subjects experiencing the event by stratification level and treatment level, if it did not already exist in {chefStats}.
 
   1. Since the function produces a number by stratification level and by treatment level, this will be a `stat_by_strata_by_trt`
-  2. The function will need to know (see section [Interface with chef](#interface-with-chef) for more details on how to pass this information from chef to chefStats):
+  2. The function will need to know. See section [Interface with chef](#interface-with-chef) for more details on how to pass this information from {chef} to {chefStats}:
       - Who was eligible to have the event
       - Who has the event
-      - What the stratification "group" (e.g. SEX, AGE, etc) and level ("MALE", ">65", etc). is
+      - What the stratification "group" (e.g. SEX, AGE, etc) and level ("MALE", ">65", etc.) is
       - What the treatment level is (e.g. "TreatmentA")
   3. The function definition might look like this:
   
@@ -48,9 +48,9 @@ Here we show how we would add the function `n_subj_event()`, which counts the nu
            event_index,
            cell_index,
            subjectid_var,
-           ... # the `...` are required for all chefStats functions to "collect" any unused arguments passed from chef
+           ... # the `...` are required for all {chefStats} functions to "collect" any unused arguments passed from {chef}
            ) {
-    # Please see the "Interface with chef" section for details on what
+    # Please see the "Interface with {chef}" section for details on what
     # `event_index` and `cell_index`
     
     # `intersect()` provides us with a vector of rows in `dat` that match both
@@ -67,7 +67,7 @@ Here we show how we would add the function `n_subj_event()`, which counts the nu
     # count
     event_rows_unique_by_subject <- unique(event_rows, by = subjectid_var)
     
-    stat <-   NROW(event_rows_unique_by_subject)
+    stat <- NROW(event_rows_unique_by_subject)
 
     # The return object has to be a data.table object with the following 3
     # columns. The `value` column always has to be a double (not an integer)
@@ -93,11 +93,11 @@ Here we show how we would add the function `n_subj_event()`, which counts the nu
 
 <br>
 
-### Interface with chef {.tabset .tabset-pills}
+### Interface with {chef} {.tabset .tabset-pills}
 
-The statistical functions from chefStats will be called within the context of a chef pipeline. The category of the function determines what arguments are passed from chef to the function, however some arguments are passed to all categories.
+The statistical functions from {chefStats} will be called within the context of a {chef} pipeline. The category of the function determines what arguments are passed from {chef} to the function, however some arguments are passed to all categories.
 
-The following table describes arguments that are passed to **all** chefStats function:
+The following table describes arguments that are passed to **all** {chefStats} function:
 
 ```{r, echo=FALSE}
 dt <-
@@ -110,7 +110,7 @@ dt <-
       "`subjectid_var`"
     ),
     Description = c(
-      "A `data.table` containing the analysis data set produced by the `prepare_data` function. To allow flexability for creative use of chefStats functions, this dataset is **not** filtered to the exact records needed for each analysis when passed to chefStats. Instead this filtering is done inside the chefStats functions. This is done using the `INDEX_` column from `dat` that serves as a row ID, and is used for filtering with, for example, `cell_index` or `event_index`",
+      "A `data.table` containing the analysis data set produced by the `prepare_data` function. To allow flexability for creative use of {chefStats} functions, this dataset is **not** filtered to the exact records needed for each analysis when passed to {chefStats}. Instead this filtering is done inside the {chefStats} functions. This is done using the `INDEX_` column from `dat` that serves as a row ID, and is used for filtering with, for example, `cell_index` or `event_index`",
       "A `vector` of indicies indicating which rows (as specified in `INDEX_` column of `dat`) are considered to be events for the endpoint specification under evaluation",
       "A `character` indicating which stratatification is being used (e.g. SEX, AGE, etc)",
       "A `character` indicating the name of the column in `dat` containing the treatment information used for the endpoint",
@@ -120,7 +120,7 @@ dt <-
 dt |>
   kable(format = "html",
         table.attr = "class='table table-bordered'",
-        caption = "Arguments always passed to chefStats functions") |>
+        caption = "Arguments always passed to {chefStats} functions") |>
   kable_styling(bootstrap_options = c("hover", "condensed", "responsive")) |>
   row_spec(0, background = "lightblue")
 ```
@@ -205,6 +205,6 @@ dt |>
 
 When possible, utilize building-block functions when making new statistical functions. For example, if the new functions requires a 2x2 table, use the `make_two_by_two_()` function instead of writing a new one.
 
-This also allows you to easily write functions that collapse several chefStats functions into one function call. For example, on call to `count_set()` is the same as one call each to `n_sub()`, `n_event()`, `n_subj_event()` and `p_subj_event()`. The only rational for combining functions like this is to save compute time, due to the way chef pipelines are constructed. 
+This also allows you to easily write functions that collapse several {chefStats} functions into one function call. For example, on call to `count_set()` is the same as one call each to `n_sub()`, `n_event()`, `n_subj_event()` and `p_subj_event()`. The only rational for combining functions like this is to save compute time, due to the way {chef} pipelines are constructed. 
 
-Building block function names are always suffixed with an underscore `_` to indicate they cannot be called from inside a chef pipeline. For example, `n_event_()` is a building block that is used to make `n_event()`, but `n_event_()` can also be use to build other functions, such as `count_set()`, because it does not format it's output for a chef pipeline. Conversely, `n_event()` does format the output, so it can be use in a chef pipeline, but not as a building block.
+Building block function names are always suffixed with an underscore `_` to indicate they cannot be called from inside a {chef} pipeline. For example, `n_event_()` is a building block that is used to make `n_event()`, but `n_event_()` can also be use to build other functions, such as `count_set()`, because it does not format it's output for a {chef} pipeline. Conversely, `n_event()` does format the output, so it can be use in a {chef} pipeline, but not as a building block.

From afd9c4997b7c0b8f833af390b1fa5498beef7e71 Mon Sep 17 00:00:00 2001
From: nsjohnsen <99595467+nsjohnsen@users.noreply.github.com>
Date: Mon, 8 Apr 2024 09:26:14 +0200
Subject: [PATCH 4/5] Adding snapshot values (#17)

---
 tests/testthat/_snaps/demographics.md | 112 ++++++++++++++++++++++----
 tests/testthat/test-demographics.R    |  20 +++--
 2 files changed, 109 insertions(+), 23 deletions(-)

diff --git a/tests/testthat/_snaps/demographics.md b/tests/testthat/_snaps/demographics.md
index 9d0b477..3189855 100644
--- a/tests/testthat/_snaps/demographics.md
+++ b/tests/testthat/_snaps/demographics.md
@@ -1,21 +1,101 @@
-# Demographics (categorical) work when strata provided
+# Demographics (categorical) work when strata is provided
 
-    Code
-      actual_total
-    Output
-                 label qualifiers value  description
-                <char>     <fctr> <num>       <char>
-      1: n_non_missing     AGEGR1    85 Demographics
-      2:     n_missing     AGEGR1     1 Demographics
-      3: n_non_missing        SEX    82 Demographics
-      4:     n_missing        SEX     4 Demographics
+    {
+      "type": "list",
+      "attributes": {
+        "names": {
+          "type": "character",
+          "attributes": {},
+          "value": ["label", "qualifiers", "value", "description"]
+        },
+        "row.names": {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2, 3, 4]
+        },
+        "class": {
+          "type": "character",
+          "attributes": {},
+          "value": ["data.frame"]
+        }
+      },
+      "value": [
+        {
+          "type": "character",
+          "attributes": {},
+          "value": ["n_non_missing", "n_missing", "n_non_missing", "n_missing"]
+        },
+        {
+          "type": "integer",
+          "attributes": {
+            "levels": {
+              "type": "character",
+              "attributes": {},
+              "value": ["AGEGR1", "SEX"]
+            },
+            "class": {
+              "type": "character",
+              "attributes": {},
+              "value": ["factor"]
+            }
+          },
+          "value": [1, 1, 2, 2]
+        },
+        {
+          "type": "double",
+          "attributes": {},
+          "value": [85, 1, 82, 4]
+        },
+        {
+          "type": "character",
+          "attributes": {},
+          "value": ["Demographics", "Demographics", "Demographics", "Demographics"]
+        }
+      ]
+    }
 
 ---
 
-    Code
-      actual_f
-    Output
-                 label  description qualifiers value
-                <char>       <char>     <char> <num>
-      1: n_non_missing Demographics       <NA>    52
+    {
+      "type": "list",
+      "attributes": {
+        "names": {
+          "type": "character",
+          "attributes": {},
+          "value": ["label", "description", "qualifiers", "value"]
+        },
+        "row.names": {
+          "type": "integer",
+          "attributes": {},
+          "value": [1]
+        },
+        "class": {
+          "type": "character",
+          "attributes": {},
+          "value": ["data.frame"]
+        }
+      },
+      "value": [
+        {
+          "type": "character",
+          "attributes": {},
+          "value": ["n_non_missing"]
+        },
+        {
+          "type": "character",
+          "attributes": {},
+          "value": ["Demographics"]
+        },
+        {
+          "type": "character",
+          "attributes": {},
+          "value": [null]
+        },
+        {
+          "type": "double",
+          "attributes": {},
+          "value": [52]
+        }
+      ]
+    }
 
diff --git a/tests/testthat/test-demographics.R b/tests/testthat/test-demographics.R
index ae72c31..0937875 100644
--- a/tests/testthat/test-demographics.R
+++ b/tests/testthat/test-demographics.R
@@ -1,4 +1,5 @@
-test_that("Demographics (categorical) work when strata provided", {
+test_that("Demographics (categorical) work when strata is provided", {
+
   # SETUP -------------------------------------------------------------------
   input <- mk_advs()
   input[, INDEX_ := .I] |> setkey(INDEX_)
@@ -27,7 +28,6 @@ test_that("Demographics (categorical) work when strata provided", {
     strata_var = "SEX"
   )
 
-
   # EXPECT ------------------------------------------------------------------
 
   x <- mk_advs()
@@ -36,15 +36,21 @@ test_that("Demographics (categorical) work when strata provided", {
   x[, missing_sex := FALSE]
   x[is.na(SEX), missing_sex := TRUE]
   b <-
-    x[TRT01A=="Placebo", .N, by = .(missing_sex, SEX)] |> setorder(SEX)
+    x[TRT01A == "Placebo", .N, by = .(missing_sex, SEX)] |> setorder(SEX)
+
+  expect_equal(actual_total[qualifiers == "SEX" & label == "n_missing", value],
+               b[(missing_sex), N])
+
+  expect_snapshot_value(as.data.frame(actual_total),
+                        tolerance = 1e-6, style = "json2")
+
+  expect_snapshot_value(as.data.frame(actual_f),
+                        tolerance = 1e-6, style = "json2")
 
-  expect_equal(actual_total[qualifiers=="SEX" & label=="n_missing", value], b[(missing_sex), N])
-  expect_snapshot(actual_total)
-  expect_snapshot(actual_f)
 })
 
 
-test_that("Demographics (continuous) work when no strata level provided", {
+test_that("Demographics (continuous) work when no strata level is provided", {
   # SETUP -------------------------------------------------------------------
   ep <- chef::mk_endpoint_str(
     data_prepare = mk_advs,

From 60bfeea97925f3dba1c0fd59a265fbf5839e0544 Mon Sep 17 00:00:00 2001
From: nsjohnsen <99595467+nsjohnsen@users.noreply.github.com>
Date: Mon, 8 Apr 2024 10:34:30 +0200
Subject: [PATCH 5/5] Adjusting snapshot value tolerance (#18)

---
 tests/testthat/test-demographics.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-demographics.R b/tests/testthat/test-demographics.R
index 0937875..2776cdb 100644
--- a/tests/testthat/test-demographics.R
+++ b/tests/testthat/test-demographics.R
@@ -42,10 +42,10 @@ test_that("Demographics (categorical) work when strata is provided", {
                b[(missing_sex), N])
 
   expect_snapshot_value(as.data.frame(actual_total),
-                        tolerance = 1e-6, style = "json2")
+                        tolerance = 1e-8, style = "json2")
 
   expect_snapshot_value(as.data.frame(actual_f),
-                        tolerance = 1e-6, style = "json2")
+                        tolerance = 1e-8, style = "json2")
 
 })