From e10c0f1c2229b14993ec9b693edf069e73d75427 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Wed, 28 Dec 2022 14:52:55 -0500 Subject: [PATCH 01/48] Initial introduction of framework for hug formatting --- R/format.R | 256 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 207 insertions(+), 49 deletions(-) diff --git a/R/format.R b/R/format.R index 92308000..be8c978b 100644 --- a/R/format.R +++ b/R/format.R @@ -125,32 +125,33 @@ f_str <- function(format_string, ..., empty=c(.overall='')) { # Check format string class assert_has_class(format_string, "character") - # Capture the format groups + # Do a pre-check of the format string to catch invalid auto specifications + if (str_detect(format_string, "AA|aa")) { + stop(paste0("In f_str(), only use a single 'A' or 'a' on the integer or", + " decimal side to trigger auto precision."), call.=TRUE) + } - # This regex does a few things so let's break it into pieces - # (a(\\+\\d)?|x+) -> a, possibly followed by + and a digit, or 1 or more x's - # This captures the integer, with either the auto formats or x's - # (\\.(a(\\+\\d)?|x+)?)? -> a period, then possibly the same a <+digit>, or multiple x's - # This captures the decimal places, but they don't have to exist - rx <- "(a(\\+\\d+)?|x+)(\\.(a(\\+\\d+)?|x+)?)?" - formats <- str_extract_all(format_string, regex(rx))[[1]] + # Parse out the format string sections + rx <- get_format_string_regex() + formats <- str_extract_all(format_string, rx)[[1]] # Duplicate any '%' to escape them format_string_1 <- str_replace_all(format_string, "%", "%%") # Make the sprintf ready string - repl_str <- str_replace_all(format_string_1, regex(rx), "%s") + repl_str <- str_replace_all(format_string_1, rx, "%s") - # Make sure that if two formats were found, two varaibles exist + # Make sure that if two formats were found, two variables exist assert_that(length(formats) == length(vars), msg = paste0("In `f_str` ", length(formats), " formats were entered in the format string ", format_string, "but ", length(vars), " variables were assigned.")) # Pull out the integer and decimal - settings <- map(formats, separate_int_dig) + settings <- map(formats, gather_settings) # A value in settings will be <0 if it's an auto format - auto_precision <- any(map_lgl(settings, ~any(attr(.x, 'auto')))) + auto_precision <- any(map_lgl(settings, ~ any(as.logical(.[c('auto_int', 'auto_dec')])))) + hug_formatting <- any(map_lgl(settings, ~ !is.na(.['hug_char']))) # All ellipsis variables are names assert_that(all(map_lgl(vars, function(x) class(x) == "name")), @@ -164,40 +165,145 @@ f_str <- function(format_string, ..., empty=c(.overall='')) { size = nchar(format_string), repl_str = repl_str, auto_precision = auto_precision, + hug_formatting = hug_formatting, empty=empty ), class="f_str" ) } -#' Evaluate a portion of a format string to check the integer and digit lengths +#' Generate the format string parsing regular expression #' -#' @param x String to have sections counted -#' -#' @return A named vector with the names "int" and "dec", countaining numeric values +#' @return A regular expression object with the compiled expression #' #' @noRd -separate_int_dig <- function(x){ +get_format_string_regex <- function() { + + # On the integer side, find an a that may be followed by a + and a number + # so this could look like a or a+1, a+2, etc. + int_auto <- "a(\\+\\d+)?" + + # Same as above, but look for an A and a non-whitespace character preceding + # the A + int_auto_hug <- "(\\S+)A(\\+\\d+)?" + + # Look for one or more X's, with a non-whitespace character preceding + int_fixed_hug <- "(\\S+)X+" + + # Look for one ore more x's + int_fixed <- "x+" + + # Look for an A or a that may be followed by a + and a number + # so this could look like a or a+1, a+2, etc. + # A's will be invalid here but that will be caught by error checking + # in parse_hug_char() + dec_auto <- "[A|a](\\+\\d+)?" + + # One or more X or x - again X is invalid but caught later + dec_fixed <- "[X|x]+" + + # Now prepare to piece the chunks together - all of the int side pieces are + # combined with "or's". The decimal side comes after that, and this specifies + # that it will find them if they exist, but the integer side will be found + # even if they don't + joined_string <- "(%s|%s|%s|%s)(\\.(%s|%s)?)?" + + # Concatenate it all together and convert it to a regex + regex( + sprintf( + joined_string, + int_auto, + int_auto_hug, + int_fixed_hug, + int_fixed, + dec_auto, + dec_fixed + ) + ) +} - # Initialize a vector and name the elements - out <- numeric(2) - names(out) <- c('int', 'dec') - attr(out, 'auto') <- c('int'=FALSE, 'dec'=FALSE) +#' Gather the settings for a specific format string section +#' +#' This function will collect specific settings about a format string section, +#' including integer and decimal length, whether autos were turned on, and hug +#' character settings/ +#' +#' @param x A character string representing a format string section +#' +#' @return A named list of settings +gather_settings <- function(x) { - # Count the characters on each side of the decimal - fields <- str_split(x, "\\.")[[1]] + settings <- list( + int = 0, + dec = 0, + auto_int = FALSE, + auto_dec = FALSE, + hug_char = NA_character_ + ) - num_chars <- map(fields, parse_fmt) - auto <- map_lgl(num_chars, ~attr(.x, 'auto')) - num_chars <- as.numeric(num_chars) - attr(num_chars, 'auto') <- auto + settings <- parse_hug_char(x, settings) + settings <- separate_int_dig(x, settings) + + settings +} + +#' Find if a hug character exists and attach to settings +#' +#' @param x Format string section +#' @param settings A list of settings for a format string section +#' +#' @return List of settings +#' @noRd +parse_hug_char <- function(x, settings) { + + # Find hugging + if (str_detect(x, "X|A")) { + + # Look for characters preceding X or A that aren't X or A + hug_char_rx <- regex("([^XA]+)[XA]") + + # Search the hug character and pull out all matches + # x is guaranteed to be a single element vector so pull out first + # element of the list + hug_char_match <- str_match_all(x, hug_char_rx)[[1]] + + # If no rows, then X or A was used with no specified hug character + if (nrow(hug_char_match) == 0) { + stop( + paste0("In f_str(), an 'X' or 'A' was used but no hug character ", + "was specified, such as a parenthesis. Use 'X' or 'A' to bind ", + "a character within a format string."), + call.=FALSE + ) + } + + # The match matrix can't be more than one row. If it is, it was probably + # because X or A were placed before and after a decimal, so show the user + if (nrow(hug_char_match) > 1) { + err_msg <- paste0( + "In f_str(), invalid format string specification. The following section", + " failed to parse:\n\t'", x, + "'\nThe issue is present with a hug character. Was 'X' or 'A' used after", + " a decimal?" + ) + stop(err_msg, call.=FALSE) + } + + # If X or A was used after the decimal at all, that's also invalid so error + # out as well + if (str_detect(hug_char_match[1,1], fixed("."))) { + stop( + paste0("In f_str(), 'X' or 'A' can only be used on the left side of a", + " decimal within a format string."), + call.=FALSE + ) + } - # Insert the number of characters into the named vector - for (i in seq_along(num_chars)) { - out[i] <- num_chars[i] - attr(out, 'auto')[i] <- attr(num_chars, 'auto')[i] + # The hug char is in a capture group, so we pull it out of the match + settings$hug_char <- hug_char_match[1,2] } - out + + settings } #' Parse a portion of a string format @@ -211,7 +317,7 @@ separate_int_dig <- function(x){ #' @noRd parse_fmt <- function(x) { # If it's an auto format, grab the output value - if (grepl('a', x)) { + if (grepl('a|A', x)) { # Pick out the digit add <- replace_na(as.double(str_extract(x, '\\d+')), 0) # Auto formats will be -1 - the specified precision @@ -226,6 +332,36 @@ parse_fmt <- function(x) { val } +#' Evaluate a portion of a format string to check the integer and digit lengths +#' +#' @param x Format string section +#' @param settings A list of settings for a format string section +#' +#' @return List of settings +#' +#' @noRd +separate_int_dig <- function(x, settings){ + + # Count the characters on each side of the decimal + fields <- str_split(x, "\\.")[[1]] + # Label the split segments + names(fields) <- c('int', 'dec')[1:length(fields)] + + # Parse out length and auto info from each field and apply to settings + num_chars <- map(fields, parse_fmt) + auto <- map_lgl(num_chars, ~attr(.x, 'auto')) + + settings[names(num_chars)] <- as.numeric(num_chars) + settings[paste0("auto_", names(auto))] <- auto + + + # If a hug character is specified,subtract if from the integer length + # if (!is.na(settings$hug_char)) { + # settings$int <- settings$int - nchar(settings$hug_char) + # } + settings +} + #' Set the format strings and associated summaries to be performed in a layer #' #' 'Tplyr' gives you extensive control over how strings are presented. @@ -462,27 +598,35 @@ num_fmt <- function(val, i, fmt=NULL, autos=NULL) { assert_has_class(fmt, 'f_str') assert_that(i <= length(fmt$formats), msg="In `num_fmt` supplied ") - # Auto precision requires that integer and decimal are - # pulled from the row. If auto, settings will be the amount to add - # to max prec, so add those together. Otherwise pull the manually - # specified value - int_len <- ifelse(attr(fmt$settings[[i]],'auto')['int'], - fmt$settings[[i]]['int'] + autos['int'], - fmt$settings[[i]]['int']) + # Auto precision requires that integer and decimal are pulled from the row. If + # auto, settings will be the amount to add to max prec, so add those together. + # Otherwise pull the manually specified value + int_len <- ifelse(fmt$setting[[i]]$auto_int, + fmt$setting[[i]]$int + autos['int'], + fmt$setting[[i]]$int) - decimals <- ifelse(attr(fmt$settings[[i]],'auto')['dec'], - fmt$settings[[i]]['dec'] + autos['dec'], - fmt$settings[[i]]['dec']) + decimals <- ifelse(fmt$setting[[i]]$auto_dec, + fmt$setting[[i]]$dec + autos['dec'], + fmt$setting[[i]]$dec) # Set nsmall to input decimals - nsmall = decimals + nsmall <- decimals # Increment digits for to compensate for display if (decimals > 0) decimals <- decimals + 1 # Empty return string - if (is.na(val)) { - return(str_pad(fmt$empty[1], int_len+decimals, side="left")) + if (is.na(val)) {if (is.na(fmt$settings[[i]]$hug_char)) { + return(str_pad(fmt$empty[1], int_len+decimals, side="left")) + } else{ + return( + str_pad( + paste0(fmt$settings[[i]]$hug_char, fmt$empty[1]), + int_len+decimals, + side="left") + ) + } + } # Use two different rounding methods based on if someone is matching with IBM rounding @@ -496,8 +640,8 @@ num_fmt <- function(val, i, fmt=NULL, autos=NULL) { } # Form the string - return( - format( + if (is.na(fmt$settings[[i]]$hug_char)) { + fmt_num <- format( # Round rounded, # Set width of format string @@ -505,7 +649,21 @@ num_fmt <- function(val, i, fmt=NULL, autos=NULL) { # Decimals to display nsmall=nsmall ) - ) + } else { + fmt_num <- str_pad( + paste0( + #Paste the hug character + fmt$settings[[i]]$hug_char, + format( + rounded, + nsmall=nsmall + ) + ), + width=(int_len+decimals) + ) + } + + fmt_num } #' Check if format strings have been applied to a layer From 4db9d72d4c0c075950200ee05c7514b0be8587dd Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Wed, 28 Dec 2022 14:53:10 -0500 Subject: [PATCH 02/48] Update count layer specific formatting for hug formatting --- R/count.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/count.R b/R/count.R index 3bb0063f..fb00efcc 100644 --- a/R/count.R +++ b/R/count.R @@ -342,10 +342,13 @@ prepare_format_metadata.count_layer <- function(x) { # If a layer_width flag is present, edit the formatting string to display the maximum # character length - if (str_detect(format_strings[['n_counts']]$format_string, "a")) { - # Replace the flag with however many xs + if (str_detect(format_strings[['n_counts']]$format_string, "a|A")) { + # Replace 'a' with appropriate 'x' replaced_string <- str_replace(format_strings[['n_counts']]$format_string, "a", paste(rep("x", n_width), collapse = "")) + # Replace 'A' with appropriate 'X' + replaced_string <- str_replace(replaced_string, "A", + paste(rep("X", n_width), collapse = "")) # Make a new f_str and replace the old one format_strings[['n_counts']] <- f_str(replaced_string, !!!format_strings$n_counts$vars) From 30f8d0798eb90d10097dac7de9e438866b345b32 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 15:01:17 -0500 Subject: [PATCH 03/48] Fix spacing for autos with multiple hug chars --- R/format.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/format.R b/R/format.R index be8c978b..d9bcdc29 100644 --- a/R/format.R +++ b/R/format.R @@ -354,11 +354,11 @@ separate_int_dig <- function(x, settings){ settings[names(num_chars)] <- as.numeric(num_chars) settings[paste0("auto_", names(auto))] <- auto - # If a hug character is specified,subtract if from the integer length - # if (!is.na(settings$hug_char)) { - # settings$int <- settings$int - nchar(settings$hug_char) - # } + if (!is.na(settings$hug_char) && settings$auto_int) { + settings$int <- settings$int + (nchar(settings$hug_char) - 1) + } + settings } From 495864e5922773d39d395b14d4e45abb1e8c7711 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 15:02:21 -0500 Subject: [PATCH 04/48] These assertions aren't necessary because it's an internal method --- R/format.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/format.R b/R/format.R index d9bcdc29..21ca9ff2 100644 --- a/R/format.R +++ b/R/format.R @@ -594,10 +594,6 @@ name_translator <- function(fmt_strings) { #' @noRd num_fmt <- function(val, i, fmt=NULL, autos=NULL) { - assert_that(is.numeric(val)) - assert_has_class(fmt, 'f_str') - assert_that(i <= length(fmt$formats), msg="In `num_fmt` supplied ") - # Auto precision requires that integer and decimal are pulled from the row. If # auto, settings will be the amount to add to max prec, so add those together. # Otherwise pull the manually specified value From b0992043a307d4c717341419ce6193b179e4b13f Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 16:15:15 -0500 Subject: [PATCH 05/48] Update to interpret auto with hugging --- R/shift.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/shift.R b/R/shift.R index e46d9cc1..a6a38d7b 100644 --- a/R/shift.R +++ b/R/shift.R @@ -81,17 +81,19 @@ prepare_format_metadata.shift_layer <- function(x) { evalq({ - # Pull max character length from counts. Should be at least 1 n_width <- max(c(nchar(numeric_data$n), 1L)) # If a layer_width flag is present, edit the formatting string to display the maximum # character length - if(str_detect(format_strings$format_string, "a")) { + if(str_detect(format_strings$format_string, "a|A")) { # Replace the flag with however many xs replaced_string <- str_replace(format_strings$format_string, "a", paste(rep("x", n_width), collapse = "")) + replaced_string <- str_replace(replaced_string, "A", + paste(rep("X", n_width), collapse = "")) + # Make a new f_str and replace the old one format_strings <- f_str(replaced_string, !!!format_strings$vars) } From 2927e5607057252b5bdb9b33b2ed2f91852196d8 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 16:16:30 -0500 Subject: [PATCH 06/48] Initial updates for hug characters --- tests/testthat/test-format.R | 288 ++++++++++++++++++++++++++++------- 1 file changed, 235 insertions(+), 53 deletions(-) diff --git a/tests/testthat/test-format.R b/tests/testthat/test-format.R index e5b50f22..c321bfd4 100644 --- a/tests/testthat/test-format.R +++ b/tests/testthat/test-format.R @@ -10,6 +10,14 @@ test_that("Error is thrown when format doesn't match variables", { expect_snapshot_error(f_str("xx.x", a, b)) }) +# Hug character errors generate properly +test_that("Hug character format string errors generate properly in f_str()", { + expect_error(f_str("(AA.a+1)", a), "*only use a single 'A' or 'a'*") + expect_error(f_str("XX", a), "*an 'X' or 'A' was used but no hug character*") + expect_error(f_str("xx.X", a), "*'X' or 'A' can only be used*") + expect_error(f_str("(X.X)", a), "*The following section failed to parse*") +}) + # Variables are picked up appropriately test_that("Variables are properly captured", { fmt1 <- f_str('xx', a) @@ -42,6 +50,13 @@ fmt17 <- f_str("xx.a (xx.a+2)", a, b) fmt18 <- f_str("xx.a (xx.a+2%) [a.xx]", a, b, c) fmt19 <- f_str("xx.a --->>>> xx.xx <<<------ a%, xx.a+1%%", a, b, c, d) fmt20 <- f_str("xx.a+12 --->>>> xx.xx <<<------ a+123%, xx.a+1234%%", a, b, c, d) +fmt21 <- f_str("(X.x)", a) +fmt22 <- f_str("(A.x)", a) +fmt23 <- f_str("(A+1.x)", a) +fmt24 <- f_str("({[X.x)]}", a) +fmt25 <- f_str("({[A.x)]}", a) +fmt26 <- f_str("({[A+1.x)]}", a) + # Regex tests test_that("Format strings are parsed correctly", { @@ -65,6 +80,12 @@ test_that("Format strings are parsed correctly", { expect_equal(fmt18$formats, c("xx.a", "xx.a+2", "a.xx")) expect_equal(fmt19$formats, c("xx.a", "xx.xx", "a", "xx.a+1")) expect_equal(fmt20$formats, c("xx.a+12", "xx.xx", "a+123", "xx.a+1234")) + expect_equal(fmt21$formats, c("(X.x")) + expect_equal(fmt22$formats, c("(A.x")) + expect_equal(fmt23$formats, c("(A+1.x")) + expect_equal(fmt24$formats, c("({[X.x")) + expect_equal(fmt25$formats, c("({[A.x")) + expect_equal(fmt26$formats, c("({[A+1.x")) }) @@ -90,124 +111,285 @@ test_that("Replacement strings are parsed correctly", { expect_equal(fmt18$repl_str, "%s (%s%%) [%s]") expect_equal(fmt19$repl_str, "%s --->>>> %s <<<------ %s%%, %s%%%%") expect_equal(fmt20$repl_str, "%s --->>>> %s <<<------ %s%%, %s%%%%") + expect_equal(fmt21$repl_str, "%s)") + expect_equal(fmt22$repl_str, "%s)") + expect_equal(fmt23$repl_str, "%s)") + expect_equal(fmt24$repl_str, "%s)]}") + expect_equal(fmt25$repl_str, "%s)]}") + expect_equal(fmt26$repl_str, "%s)]}") }) # Auto precision is detected and precision formats are properly set test_that("Format string setting and autoprecision are detected appropriately", { #f_str("xx", a) - s1 <- list(c('int'=2,'dec'=0)) - attr(s1[[1]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) + s1 <- list(list(int=2, dec=0, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt1$settings, s1) #f_str("a", a) - s2 <- list(c('int'=0,'dec'=0)) - attr(s2[[1]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) + s2 <- list(list(int=0, dec=0, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt2$settings, s2) #f_str("a+1", a) - s3 <- list(c('int'=1,'dec'=0)) - attr(s3[[1]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) + s3 <- list(list(int=1, dec=0, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt3$settings, s3) #f_str("a+2", a) - s4 <- list(c('int'=2,'dec'=0)) - attr(s4[[1]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) + s4 <- list(list(int=2, dec=0, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt4$settings, s4) #f_str("xx.x", a) - s5 <- list(c('int'=2,'dec'=1)) - attr(s5[[1]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) + s5 <- list(list(int=2, dec=1, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt5$settings, s5) #f_str("xx.xx", a) - s6 <- list(c('int'=2,'dec'=2)) - attr(s6[[1]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) + s6 <- list(list(int=2, dec=2, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt6$settings, s6) #f_str("a.xx", a) - s7 <- list(c('int'=0,'dec'=2)) - attr(s7[[1]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) + s7 <- list(list(int=0, dec=2, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt7$settings, s7) #f_str("xx.a", a) - s8 <- list(c('int'=2,'dec'=0)) - attr(s8[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) + s8 <- list(list(int=2, dec=0, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_)) expect_equal(fmt8$settings, s8) #f_str("a+1.xx", a) - s9 <- list(c('int'=1,'dec'=2)) - attr(s9[[1]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) + s9 <- list(list(int=1, dec=2, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt9$settings, s9) #f_str("a+2.xx", a) - s10 <- list(c('int'=2,'dec'=2)) - attr(s10[[1]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) + s10 <- list(list(int=2, dec=2, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_)) expect_equal(fmt10$settings, s10) #f_str("xx.a+1", a) - s11 <- list(c('int'=2,'dec'=1)) - attr(s11[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) + s11 <- list(list(int=2, dec=1, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_)) expect_equal(fmt11$settings, s11) #f_str("xx.a+2", a) - s12 <- list(c('int'=2,'dec'=2)) - attr(s12[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) + s12 <- list(list(int=2, dec=2, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_)) expect_equal(fmt12$settings, s12) #f_str("xx xx", a, b) - s13 <- list(c('int'=2,'dec'=0), c('int'=2,'dec'=0)) - attr(s13[[1]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) - attr(s13[[2]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) + s13 <- list( + list(int=2, dec=0, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_), + list(int=2, dec=0, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_) + ) expect_equal(fmt13$settings, s13) #f_str("xx.x xx", a, b) - s14 <- list(c('int'=2,'dec'=1), c('int'=2,'dec'=0)) - attr(s14[[1]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) - attr(s14[[2]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) + s14 <- list( + list(int=2, dec=1, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_), + list(int=2, dec=0, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_) + ) expect_equal(fmt14$settings, s14) #f_str("xx.x xx.xx", a, b) - s15 <- list(c('int'=2,'dec'=1), c('int'=2,'dec'=2)) - attr(s15[[1]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) - attr(s15[[2]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) + s15 <- list( + list(int=2, dec=1, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_), + list(int=2, dec=2, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_) + ) expect_equal(fmt15$settings, s15) #f_str("xx.a xx.a+1", a, b) - s16 <- list(c('int'=2,'dec'=0), c('int'=2,'dec'=1)) - attr(s16[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) - attr(s16[[2]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) + s16 <- list( + list(int=2, dec=0, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_), + list(int=2, dec=1, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_) + ) expect_equal(fmt16$settings, s16) #f_str("xx.a (xx.a+2)", a, b) - s17 <- list(c('int'=2,'dec'=0), c('int'=2,'dec'=2)) - attr(s17[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) - attr(s17[[2]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) + s17 <- list( + list(int=2, dec=0, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_), + list(int=2, dec=2, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_) + ) expect_equal(fmt17$settings, s17) #f_str("xx.a (xx.a+2%) [a.xx]", a, b, c) - s18 <- list(c('int'=2,'dec'=0), c('int'=2,'dec'=2), c('int'=0,'dec'=2)) - attr(s18[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) - attr(s18[[2]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) - attr(s18[[3]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) + s18 <- list( + list(int=2, dec=0, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_), + list(int=2, dec=2, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_), + list(int=0, dec=2, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_) + ) expect_equal(fmt18$settings, s18) #f_str("xx.a --->>>> xx.xx <<<------ a%, xx.a+1%%", a, b, c, d) - s19 <- list(c('int'=2,'dec'=0), c('int'=2,'dec'=2), c('int'=0,'dec'=0), c('int'=2,'dec'=1)) - attr(s19[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) - attr(s19[[2]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) - attr(s19[[3]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) - attr(s19[[4]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) + s19 <- list( + list(int=2, dec=0, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_), + list(int=2, dec=2, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_), + list(int=0, dec=0, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_), + list(int=2, dec=1, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_) + ) expect_equal(fmt19$settings, s19) #f_str("xx.a+12 --->>>> xx.xx <<<------ a+123%, xx.a+1234%%", a, b, c, d) - s20 <- list(c('int'=2,'dec'=12), c('int'=2,'dec'=2), c('int'=123,'dec'=0), c('int'=2,'dec'=1234)) - attr(s20[[1]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) - attr(s20[[2]], 'auto') <- c('int'=FALSE, 'dec'=FALSE) - attr(s20[[3]], 'auto') <- c('int'=TRUE, 'dec'=FALSE) - attr(s20[[4]], 'auto') <- c('int'=FALSE, 'dec'=TRUE) + s20 <- list( + list(int=2, dec=12, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_), + list(int=2, dec=2, auto_int=FALSE, auto_dec=FALSE, hug_char=NA_character_), + list(int=123, dec=0, auto_int=TRUE, auto_dec=FALSE, hug_char=NA_character_), + list(int=2, dec=1234, auto_int=FALSE, auto_dec=TRUE, hug_char=NA_character_) + ) expect_equal(fmt20$settings, s20) + # f_str("(X.x)", a) + s21 <- list(list(int=2, dec=1, auto_int=FALSE, auto_dec=FALSE, hug_char="(")) + expect_equal(fmt21$settings, s21) + + # f_str("(A.x)", a) + s22 <- list(list(int=0, dec=1, auto_int=TRUE, auto_dec=FALSE, hug_char="(")) + expect_equal(fmt22$settings, s22) + + # f_str("(A+1.x)", a) + s23 <- list(list(int=1, dec=1, auto_int=TRUE, auto_dec=FALSE, hug_char="(")) + expect_equal(fmt23$settings, s23) + + # f_str("({[X.x)]}", a) + s24 <- list(list(int=4, dec=1, auto_int=FALSE, auto_dec=FALSE, hug_char="({[")) + expect_equal(fmt24$settings, s24) + + # f_str("({[A.x)]}", a) + s25 <- list(list(int=2, dec=1, auto_int=TRUE, auto_dec=FALSE, hug_char="({[")) + expect_equal(fmt25$settings, s25) + + # f_str("({[A+1.a+1)]}", a) + s26 <- list(list(int=3, dec=1, auto_int=TRUE, auto_dec=FALSE, hug_char="({[")) + expect_equal(fmt26$settings, s26) + +}) + +## Hug character formatting testing ---- +load(test_path('adsl.Rdata')) +load(test_path('adlb.Rdata')) + +y <- adlb %>% + mutate( + AVAL = if_else( + AVISIT == "End of Treatment", + 85.301, + AVAL + ) + ) + +# TODO: Fix to use the non-distinct count +test_that("Hug character formatting applies properly for count layers", { + # These tests are various applications of appropriate hug character formats + + # Single hug character, count layer, manual + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a (XXX.x%)", n, pct)) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)")) + + # Multi hug character, count layer, manual + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a {(XXX.x%)}", n, pct)) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 {(0.0%)}", " 8 {(9.3%)}", "78 {(90.7%)}")) + + # Single hug character, count layer, auto + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a (A.x%)", n, pct)) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)")) + + # Multi hug character, count layer, auto + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a {(A.x%)}", n, pct)) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 {(0.0%)}", " 8 {(9.3%)}", "78 {(90.7%)}")) }) +test_that("Hug character formatting applies properly for desc layers", { + + # Single hug character, desc layer, manual + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x (XX.x)", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 (NA)", "323.4 (85.7)", " 85.3 (0.0)")) + + # Multi hug character, desc layer, manual + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x {(XX.x)}", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 {(NA)}", "323.4 {(85.7)}", " 85.3 {(0.0)}")) + + # Single hug character, desc layer, auto + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x (A.x)", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 (NA)", "323.4 (85.7)", " 85.3 (0.0)")) + + # Multi hug character, desc layer, auto + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x {(A.x)}", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 {(NA)}", "323.4 {(85.7)}", " 85.3 {(0.0)}")) +}) +test_that("Hug character formatting applies properly for shift layers", { + tplyr_table(adlb, TRTA, where=VISIT %in% c("SCREENING 1", "UNSCHEDULED 1.1")) %>% + add_layer( + group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, VISIT)) %>% + set_format_strings(f_str("xx (XXX.x%)", n, pct)) + ) %>% + build() %>% select(var1_Placebo_N) + + tplyr_table(adlb, TRTA, where=VISIT %in% c("SCREENING 1", "UNSCHEDULED 1.1")) %>% + add_layer( + group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, VISIT)) %>% + set_format_strings(f_str("(A) (xxx.x%)", n, pct)) + ) %>% + build() %>% select(var1_Placebo_N) +}) From 3a8d96f92ffa685d1e3e96e3dfb5955e6796a25a Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:19:02 -0500 Subject: [PATCH 07/48] Unit testing updates for hugging updates and split tests into separate files --- man/set_format_strings.Rd | 2 +- tests/testthat/test-format.R | 137 ------------------------------- tests/testthat/test-num_fmt.R | 147 ++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 138 deletions(-) create mode 100644 tests/testthat/test-num_fmt.R diff --git a/man/set_format_strings.Rd b/man/set_format_strings.Rd index b1828bde..90fac1c0 100644 --- a/man/set_format_strings.Rd +++ b/man/set_format_strings.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/format.R +% Please edit documentation in R/set_format_strings.R \name{set_format_strings} \alias{set_format_strings} \alias{set_format_strings.desc_layer} diff --git a/tests/testthat/test-format.R b/tests/testthat/test-format.R index c321bfd4..b618ba08 100644 --- a/tests/testthat/test-format.R +++ b/tests/testthat/test-format.R @@ -256,140 +256,3 @@ test_that("Format string setting and autoprecision are detected appropriately", expect_equal(fmt26$settings, s26) }) - -## Hug character formatting testing ---- -load(test_path('adsl.Rdata')) -load(test_path('adlb.Rdata')) - -y <- adlb %>% - mutate( - AVAL = if_else( - AVISIT == "End of Treatment", - 85.301, - AVAL - ) - ) - -# TODO: Fix to use the non-distinct count -test_that("Hug character formatting applies properly for count layers", { - # These tests are various applications of appropriate hug character formats - - # Single hug character, count layer, manual - x <- tplyr_table(adsl, TRT01P) %>% - add_layer( - group_count(RACE) %>% - set_format_strings(f_str("a (XXX.x%)", n, pct)) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x, c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)")) - - # Multi hug character, count layer, manual - x <- tplyr_table(adsl, TRT01P) %>% - add_layer( - group_count(RACE) %>% - set_format_strings(f_str("a {(XXX.x%)}", n, pct)) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x, c(" 0 {(0.0%)}", " 8 {(9.3%)}", "78 {(90.7%)}")) - - # Single hug character, count layer, auto - x <- tplyr_table(adsl, TRT01P) %>% - add_layer( - group_count(RACE) %>% - set_format_strings(f_str("a (A.x%)", n, pct)) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x, c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)")) - - # Multi hug character, count layer, auto - x <- tplyr_table(adsl, TRT01P) %>% - add_layer( - group_count(RACE) %>% - set_format_strings(f_str("a {(A.x%)}", n, pct)) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x, c(" 0 {(0.0%)}", " 8 {(9.3%)}", "78 {(90.7%)}")) -}) - -test_that("Hug character formatting applies properly for desc layers", { - - # Single hug character, desc layer, manual - x <- tplyr_table(y, TRTA) %>% - add_layer( - group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% - set_format_strings( - TEST = f_str("xxx.x (XX.x)", mean, sd, empty="NA") - ) %>% - set_precision_by(PARAMCD) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x[1:3], c("279.6 (NA)", "323.4 (85.7)", " 85.3 (0.0)")) - - # Multi hug character, desc layer, manual - x <- tplyr_table(y, TRTA) %>% - add_layer( - group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% - set_format_strings( - TEST = f_str("xxx.x {(XX.x)}", mean, sd, empty="NA") - ) %>% - set_precision_by(PARAMCD) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x[1:3], c("279.6 {(NA)}", "323.4 {(85.7)}", " 85.3 {(0.0)}")) - - # Single hug character, desc layer, auto - x <- tplyr_table(y, TRTA) %>% - add_layer( - group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% - set_format_strings( - TEST = f_str("xxx.x (A.x)", mean, sd, empty="NA") - ) %>% - set_precision_by(PARAMCD) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x[1:3], c("279.6 (NA)", "323.4 (85.7)", " 85.3 (0.0)")) - - # Multi hug character, desc layer, auto - x <- tplyr_table(y, TRTA) %>% - add_layer( - group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% - set_format_strings( - TEST = f_str("xxx.x {(A.x)}", mean, sd, empty="NA") - ) %>% - set_precision_by(PARAMCD) - ) %>% - build() %>% - pull(var1_Placebo) - - expect_equal(x[1:3], c("279.6 {(NA)}", "323.4 {(85.7)}", " 85.3 {(0.0)}")) -}) - -test_that("Hug character formatting applies properly for shift layers", { - tplyr_table(adlb, TRTA, where=VISIT %in% c("SCREENING 1", "UNSCHEDULED 1.1")) %>% - add_layer( - group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, VISIT)) %>% - set_format_strings(f_str("xx (XXX.x%)", n, pct)) - ) %>% - build() %>% select(var1_Placebo_N) - - tplyr_table(adlb, TRTA, where=VISIT %in% c("SCREENING 1", "UNSCHEDULED 1.1")) %>% - add_layer( - group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, VISIT)) %>% - set_format_strings(f_str("(A) (xxx.x%)", n, pct)) - ) %>% - build() %>% select(var1_Placebo_N) -}) diff --git a/tests/testthat/test-num_fmt.R b/tests/testthat/test-num_fmt.R new file mode 100644 index 00000000..a6d0b075 --- /dev/null +++ b/tests/testthat/test-num_fmt.R @@ -0,0 +1,147 @@ +## Hug character formatting testing ---- +load(test_path('adsl.Rdata')) +load(test_path('adlb.Rdata')) + +y <- adlb %>% + mutate( + AVAL = if_else( + AVISIT == "End of Treatment", + 85.301, + AVAL + ) + ) + +test_that("Hug character formatting applies properly for count layers", { + # These tests are various applications of appropriate hug character formats + + # Single hug character, count layer, manual + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a (XXX.x%)", n, pct)) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)")) + + # Multi hug character, count layer, manual, single hug auto + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a {(XXX.x%)} [A]", distinct_n, distinct_pct, n)) %>% + set_distinct_by(USUBJID) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 {(0.0%)} [0]", " 8 {(9.3%)} [8]", "78 {(90.7%)} [78]")) + + # Single hug character, count layer, auto + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a (XX.x%) [A]", distinct_n, distinct_pct, n)) %>% + set_distinct_by(USUBJID) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 (0.0%) [0]", " 8 (9.3%) [8]", "78 (90.7%) [78]")) + + # Multi hug character, count layer, auto + x <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("a {(XX.x%)} [[A]]", distinct_n, distinct_pct, n)) %>% + set_distinct_by(USUBJID) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x, c(" 0 {(0.0%)} [[0]]", " 8 {(9.3%)} [[8]]", "78 {(90.7%)} [[78]]")) +}) + +test_that("Hug character formatting applies properly for desc layers", { + + # Single hug character, desc layer, manual + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x (XX.x)", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 (NA)", "323.4 (85.7)", " 85.3 (0.0)")) + + # Multi hug character, desc layer, manual + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x {(XX.x)}", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 {(NA)}", "323.4 {(85.7)}", " 85.3 {(0.0)}")) + + # Single hug character, desc layer, auto + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x (A.x)", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 (NA)", "323.4 (85.7)", " 85.3 (0.0)")) + + # Multi hug character, desc layer, auto + x <- tplyr_table(y, TRTA) %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x {(A.x)}", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + pull(var1_Placebo) + + expect_equal(x[1:3], c("279.6 {(NA)}", "323.4 {(85.7)}", " 85.3 {(0.0)}")) +}) + +test_that("Hug character formatting applies properly for shift layers", { + + # Shift layer, single hug char, manual and auto + x <- tplyr_table(adlb, TRTA, where=VISIT %in% c("SCREENING 1", "UNSCHEDULED 1.1")) %>% + add_layer( + group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, VISIT)) %>% + set_format_strings(f_str("(A) (XXX.x%)", n, pct)) + ) %>% + build() %>% + pull(var1_Placebo_N) + + expect_equal(x, c(" (8) (100.0%)", " (0) (0.0%)")) + + # Shift layer, multi hug char, manual and auto + x <- tplyr_table(adlb, TRTA, where=VISIT %in% c("SCREENING 1", "UNSCHEDULED 1.1")) %>% + add_layer( + group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, VISIT)) %>% + set_format_strings(f_str("((A)) {(XXX.x%)}", n, pct)) + ) %>% + build() %>% + pull(var1_Placebo_N) + + expect_equal(x, c(" ((8)) {(100.0%)}", " ((0)) {(0.0%)}")) +}) From 9024f5977ed69963e09092cdc04bd7f3abab570c Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:19:29 -0500 Subject: [PATCH 08/48] Split out format code into separate files --- R/num_fmt.R | 115 +++++++++++++++++++++ R/set_format_strings.R | 225 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 340 insertions(+) create mode 100644 R/num_fmt.R create mode 100644 R/set_format_strings.R diff --git a/R/num_fmt.R b/R/num_fmt.R new file mode 100644 index 00000000..de2aeee2 --- /dev/null +++ b/R/num_fmt.R @@ -0,0 +1,115 @@ +#' Format a numeric value using an \code{f_str} object +#' +#' Using the \code{f_str} object, information about the integer length and +#' significant digits are extracted. Proper round is applied and the formatted numeric value is returned. +#' +#' +#' @param val Numeric value to be formatted +#' @param fmt \code{f_str} object with formatting information related to numeric value to be formatted +#' @param i Index of the format within the \code{f_str} object +#' @param autos A named numeric vector containing the 'auto' formatting values +#' for the integer length(int) and decimal length(dec). +#' +#' @return String formatted numeric value +#' @noRd +num_fmt <- function(val, i, fmt=NULL, autos=NULL) { + + # Auto precision requires that integer and decimal are pulled from the row. If + # auto, settings will be the amount to add to max prec, so add those together. + # Otherwise pull the manually specified value + int_len <- ifelse(fmt$setting[[i]]$auto_int, + fmt$setting[[i]]$int + autos['int'], + fmt$setting[[i]]$int) + + decimals <- ifelse(fmt$setting[[i]]$auto_dec, + fmt$setting[[i]]$dec + autos['dec'], + fmt$setting[[i]]$dec) + + # Set nsmall to input decimals + nsmall <- decimals + + # Increment digits for to compensate for display + if (decimals > 0) decimals <- decimals + 1 + + # Empty return string + if (is.na(val)) {if (is.na(fmt$settings[[i]]$hug_char)) { + return(str_pad(fmt$empty[1], int_len+decimals, side="left")) + } else{ + return( + str_pad( + paste0(fmt$settings[[i]]$hug_char, fmt$empty[1]), + int_len+decimals, + side="left") + ) + } + + } + + # Use two different rounding methods based on if someone is matching with IBM rounding + if(getOption("tplyr.IBMRounding", FALSE)) { + warn(paste0(c("You have enabled IBM Rounding. This is an experimental feature.", + " If you have feedback please get in touch with the maintainers!")), + .frequency = "regularly", .frequency_id = "tplyr.ibm", immediate. = TRUE) + rounded <- ut_round(val, nsmall) + } else { + rounded <- round(val, nsmall) + } + + # Form the string + if (is.na(fmt$settings[[i]]$hug_char)) { + fmt_num <- format( + # Round + rounded, + # Set width of format string + width=(int_len+decimals), + # Decimals to display + nsmall=nsmall + ) + } else { + fmt_num <- str_pad( + paste0( + #Paste the hug character + fmt$settings[[i]]$hug_char, + format( + rounded, + nsmall=nsmall + ) + ), + width=(int_len+decimals) + ) + } + + fmt_num +} + +#' Pad Numeric Values +#' +#' This is generally used with a count layer +#' +#' @param string_ The current values of the numeric data +#' @param right_pad The total string length, done after the left pad +#' @param left_pad The length of the left_pad +#' +#' @return Modified string +#' +#' @noRd +pad_formatted_data <- function(x, right_pad, left_pad) { + # Pad the left with difference between left_pad and nchar(string_) + if(nchar(x)[1] < left_pad) { + # The double pasting looks weird but the inner one is meant to create single character + # that is the needed number of spaces and the outer pastes that to the value + x <- map_chr(x, + ~ paste0( + paste0(rep(" ", left_pad - nchar(.x)), collapse = ""), + .x)) + } + + #Padd the right with the difference of the max layer length + if(right_pad > max(nchar(x))) { + x <- map_chr(x, + paste0, paste0(rep(" ", right_pad - max(nchar(x))), + collapse = "")) + } + + x +} diff --git a/R/set_format_strings.R b/R/set_format_strings.R new file mode 100644 index 00000000..651dd4d2 --- /dev/null +++ b/R/set_format_strings.R @@ -0,0 +1,225 @@ +#' Set the format strings and associated summaries to be performed in a layer +#' +#' 'Tplyr' gives you extensive control over how strings are presented. +#' \code{set_format_strings} allows you to apply these string formats to your +#' layer. This behaves slightly differently between layers. +#' +#' @details Format strings are one of the most powerful components of 'Tplyr'. +#' Traditionally, converting numeric values into strings for presentation can +#' consume a good deal of time. Values and decimals need to align between +#' rows, rounding before trimming is sometimes forgotten - it can become a +#' tedious mess that, in the grand scheme of things, is not an important part +#' of the analysis being performed. 'Tplyr' makes this process as simple as we +#' can, while still allowing flexibility to the user. +#' +#' In a count layer, you can simply provide a single \code{\link{f_str}} +#' object to specify how you want your n's, percentages, and denominators formatted. +#' If you are additionally supplying a statistic, like risk difference using +#' \code{\link{add_risk_diff}}, you specify the count formats using the name +#' 'n_counts'. The risk difference formats would then be specified using the +#' name 'riskdiff'. In a descriptive statistic layer, +#' \code{set_format_strings} allows you to do a couple more things: +#' \itemize{ +#' \item{By naming parameters with character strings, those character strings +#' become a row label in the resulting data frame} +#' \item{The actual summaries that are performed come from the variable names +#' used within the \code{\link{f_str}} calls} +#' \item{Using multiple summaries (declared by your \code{\link{f_str}} +#' calls), multiple summary values can appear within the same line. For +#' example, to present "Mean (SD)" like displays.} +#' \item{Format strings in the desc layer also allow you to configure how +#' empty values should be presented. In the \code{f_str} call, use the +#' \code{empty} parameter to specify how missing values should present. A +#' single element character vector should be provided. If the vector is +#' unnamed, that value will be used in the format string and fill the space +#' similar to how the numbers will display. Meaning - if your empty string is +#' 'NA' and your format string is 'xx (xxx)', the empty values will populate +#' as 'NA ( NA)'. If you name the character vector in the 'empty' parameter +#' '.overall', like \code{empty = c(.overall='')}, then that exact string will +#' fill the value instead. For example, providing 'NA' will instead create the +#' formatted string as 'NA' exactly.} +#' } +#' +#' See the \code{\link{f_str}} documentation for more details about how this +#' implementation works. +#' +#' @param e Layer on which string formats will be bound +#' @param ... Named parameters containing calls to \code{f_str} to set the +#' format strings +#' +#' @return The layer environment with the format string binding added +#' @export +#' @rdname set_format_strings +#' +#' @examples +#' # Load in pipe +#' library(magrittr) +#' +#' # In a count layer +#' tplyr_table(mtcars, gear) %>% +#' add_layer( +#' group_count(cyl) %>% +#' set_format_strings(f_str('xx (xx%)', n, pct)) +#' ) %>% +#' build() +#' +#' # In a descriptive statistics layer +#' tplyr_table(mtcars, gear) %>% +#' add_layer( +#' group_desc(mpg) %>% +#' set_format_strings( +#' "n" = f_str("xx", n), +#' "Mean (SD)" = f_str("xx.x", mean, empty='NA'), +#' "SD" = f_str("xx.xx", sd), +#' "Median" = f_str("xx.x", median), +#' "Q1, Q3" = f_str("xx, xx", q1, q3, empty=c(.overall='NA')), +#' "Min, Max" = f_str("xx, xx", min, max), +#' "Missing" = f_str("xx", missing) +#' ) +#' ) %>% +#' build() +#' +#' # In a shift layer +#' tplyr_table(mtcars, am) %>% +#' add_layer( +#' group_shift(vars(row=gear, column=carb), by=cyl) %>% +#' set_format_strings(f_str("xxx (xx.xx%)", n, pct)) +#' ) %>% +#' build() +#' +set_format_strings <- function(e, ...) { + UseMethod("set_format_strings") +} + + +#' Desc layer S3 method for set_format_strings +#' +#' @param e Layer on which to bind format strings +#' @param ... Named parameters containing calls to \code{f_str} to set the +#' format strings +#' @param cap A named character vector containing an 'int' element for the cap +#' on integer precision, and a 'dec' element for the cap on decimal precision. +#' +#' @return tplyr_layer object with formats attached +#' @export +#' +#' @rdname set_format_strings +set_format_strings.desc_layer <- function(e, ..., cap=getOption('tplyr.precision_cap')) { + + # Catch the arguments from the function call so useful errors can be thrown + check <- enquos(...) + + # Make sure that all of the attachments were `f_str` objects + for (i in seq_along(check)) { + + if (is_named(check)) { + msg = paste0("In `set_format_string` entry `",names(check)[[i]],"` is not an `f_str` object. All assignmentes made within", + " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.") + } else { + msg = paste0("In `set_format_string` entry ",i," is not an `f_str` object. All assignmentes made within", + " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.") + } + + assert_that(class(quo_get_expr(check[[i]])) == "f_str" || (is_call(quo_get_expr(check[[i]])) && call_name(check[[i]]) == "f_str"), + msg = msg) + } + + # Row labels are pulled from names - so make sure that everything is named + assert_that(is_named(check), + msg = "In `set_format_string` all parameters must be named in order to create row labels.") + + + # Pick off the ellipsis + format_strings <- list(...) + + + # Get the list of variable names that need to be transposed + summary_vars <- flatten(map(format_strings, ~ .x$vars)) + + # Get the list of transpose transpose variables needed + trans_vars <- map(format_strings, ~ .x$vars[[1]]) + + # Get the variable names that need to be kept on the same row + keep_vars <- flatten(map(format_strings, ~ tail(.x$vars, n=length(.x$vars) -1))) + + # Get the max format length + max_format_length <- max(map_int(format_strings, ~ .x$size)) + + # Pick off the row labels + row_labels <- names(format_strings) + + # Identify if auto precision is needed + need_prec_table <- any(map_lgl(format_strings, ~ .x$auto_precision)) + + # Fill in defaults if cap hasn't fully been provided + if (!('int' %in% names(cap))) cap['int'] <- getOption('tplyr.precision_cap')['int'] + if (!('dec' %in% names(cap))) cap['dec'] <- getOption('tplyr.precision_cap')['dec'] + + env_bind(e, + format_strings = format_strings, + summary_vars = vars(!!!summary_vars), + keep_vars = vars(!!!keep_vars), + trans_vars = vars(!!!trans_vars), + row_labels = row_labels, + max_length = max_format_length, + need_prec_table = need_prec_table, + cap = cap + ) + e +} + +#' Set Count Layer String Format +#' +#' @param e Layer on which to bind format strings +#' @param ... Named parameters containing calls to \code{f_str} to set the format strings +#' +#' @return Returns the modified layer object. +#' @export +#' @rdname set_format_strings +set_format_strings.count_layer <- function(e, ...) { + + params <- count_f_str_check(...) + + env_bind(e, format_strings = params) + + e +} + +set_format_strings.shift_layer <- function(e, ...) { + + dots <- list(...) + + assert_that(all(dots$vars %in% c("n", "pct")), + msg = "formats in shift layers can only be n") + + env_bind(e, format_strings = dots[[1]]) + + e +} + +#' Extract a translation vector for f_str objects +#' +#' The names of the format_strings list should be row labels in the output. The first +#' element of the \code{vars} object are the transpose variables, so make the names of +#' those variables the vector names, and the names of the format_strings elements the +#' values to allow easy creation of a \code{row_labels} variable in the data +#' +#' @param fmt_strings The \code{format_strings} varaible in a layer +#' +#' @return A named character vector with the flipping applied +#' @noRd +name_translator <- function(fmt_strings) { + out <- names(fmt_strings) + names(out) <- map_chr(fmt_strings, ~ as_name(.x$vars[[1]])) + out +} + +#' Check if format strings have been applied to a layer +#' +#' @param e Layer environment +#' +#' @return Boolean +#' @noRd +has_format_strings <- function(e) { + 'format_strings' %in% ls(envir=e) +} From 99d0ef11a661626be665a81059acc90c6afe0244 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:19:44 -0500 Subject: [PATCH 09/48] Doc updates, move code out into new files --- R/format.R | 372 ++++------------------------------------------------- 1 file changed, 26 insertions(+), 346 deletions(-) diff --git a/R/format.R b/R/format.R index 21ca9ff2..ec690736 100644 --- a/R/format.R +++ b/R/format.R @@ -40,18 +40,36 @@ #' If you want two numbers on the same line, you provide two sets of x's. For #' example, if you're presenting a value like "mean (sd)" - you could provide #' the string 'xx.xx (xx.xxx)', or perhaps 'a.a+1 (a.a+2). Note that you're -#' able to provide different integer lengths and different decimal precision +#' able to provide different integer lengths and different decimal precision #' for the two values. Each format string is independent and relates only to #' the format specified. #' +#' As described above, when using 'x' or 'a', any other character within the +#' format string will stay stationary. So for example, if your format string +#' is 'xx (xxx.x)', your number may format as '12 ( 34.5)'. So the left side +#' parenthesis stays fixed. In some displays, you may want the parenthesis to +#' 'hug' your number. Following this example, when allotting 3 spaces for the +#' integer within parentheses, the parentehsis should shift to the right, +#' making the numbers appear '12 (34.5)'. Using `f_str()` you can achieve +#' this by using a capital 'X' or 'A'. For this example, the format string +#' would be 'xx (XXX.x)'. +#' +#' There are a two rules when using 'parenthesis hugging': +#' +#' - Capital letters should only be used on the integer side of a number +#' - A character must precede the capital letter, otherwise there's no +#' character to 'hug' +#' #' The other parameters of the `f_str` call specify what values should fill #' the x's. `f_str` objects are used slightly differently between different #' layers. When declaring a format string within a count layer, `f_str()` #' expects to see the values `n` or `distinct_n` for event or distinct counts, #' `pct` or `distinct_pct` for event or distinct percentages, or `total` or -#' `distinct_total` for denominator calculations. But in descriptive statistic -#' layers, `f_str` parameters refer to the names of the summaries being -#' performed, either by built in defaults, or custom summaries declared using +#' `distinct_total` for denominator calculations. Note that in an `f_str()` +#' for a count layer 'A' or 'a' are based on n counts, and therefore don't +#' make sense to use in percentages. But in descriptive statistic layers, +#' `f_str` parameters refer to the names of the summaries being performed, +#' either by built in defaults, or custom summaries declared using #' [set_custom_summaries()]. See [set_format_strings()] for some more notes #' about layers specific implementation. #' @@ -115,6 +133,10 @@ #' #' f_str("xx.x, xx.x, xx.x", q1, median, q3) #' +#' f_str("xx (XXX.x%)", n, pct) +#' +#' f_str("a.a+1 (A.a+2)", mean, sd) +#' f_str <- function(format_string, ..., empty=c(.overall='')) { # Capture the variables off of the ellipsis @@ -362,348 +384,6 @@ separate_int_dig <- function(x, settings){ settings } -#' Set the format strings and associated summaries to be performed in a layer -#' -#' 'Tplyr' gives you extensive control over how strings are presented. -#' \code{set_format_strings} allows you to apply these string formats to your -#' layer. This behaves slightly differently between layers. -#' -#' @details Format strings are one of the most powerful components of 'Tplyr'. -#' Traditionally, converting numeric values into strings for presentation can -#' consume a good deal of time. Values and decimals need to align between -#' rows, rounding before trimming is sometimes forgotten - it can become a -#' tedious mess that, in the grand scheme of things, is not an important part -#' of the analysis being performed. 'Tplyr' makes this process as simple as we -#' can, while still allowing flexibility to the user. -#' -#' In a count layer, you can simply provide a single \code{\link{f_str}} -#' object to specify how you want your n's, percentages, and denominators formatted. -#' If you are additionally supplying a statistic, like risk difference using -#' \code{\link{add_risk_diff}}, you specify the count formats using the name -#' 'n_counts'. The risk difference formats would then be specified using the -#' name 'riskdiff'. In a descriptive statistic layer, -#' \code{set_format_strings} allows you to do a couple more things: -#' \itemize{ -#' \item{By naming parameters with character strings, those character strings -#' become a row label in the resulting data frame} -#' \item{The actual summaries that are performed come from the variable names -#' used within the \code{\link{f_str}} calls} -#' \item{Using multiple summaries (declared by your \code{\link{f_str}} -#' calls), multiple summary values can appear within the same line. For -#' example, to present "Mean (SD)" like displays.} -#' \item{Format strings in the desc layer also allow you to configure how -#' empty values should be presented. In the \code{f_str} call, use the -#' \code{empty} parameter to specify how missing values should present. A -#' single element character vector should be provided. If the vector is -#' unnamed, that value will be used in the format string and fill the space -#' similar to how the numbers will display. Meaning - if your empty string is -#' 'NA' and your format string is 'xx (xxx)', the empty values will populate -#' as 'NA ( NA)'. If you name the character vector in the 'empty' parameter -#' '.overall', like \code{empty = c(.overall='')}, then that exact string will -#' fill the value instead. For example, providing 'NA' will instead create the -#' formatted string as 'NA' exactly.} -#' } -#' -#' See the \code{\link{f_str}} documentation for more details about how this -#' implementation works. -#' -#' @param e Layer on which string formats will be bound -#' @param ... Named parameters containing calls to \code{f_str} to set the -#' format strings -#' -#' @return The layer environment with the format string binding added -#' @export -#' @rdname set_format_strings -#' -#' @examples -#' # Load in pipe -#' library(magrittr) -#' -#' # In a count layer -#' tplyr_table(mtcars, gear) %>% -#' add_layer( -#' group_count(cyl) %>% -#' set_format_strings(f_str('xx (xx%)', n, pct)) -#' ) %>% -#' build() -#' -#' # In a descriptive statistics layer -#' tplyr_table(mtcars, gear) %>% -#' add_layer( -#' group_desc(mpg) %>% -#' set_format_strings( -#' "n" = f_str("xx", n), -#' "Mean (SD)" = f_str("xx.x", mean, empty='NA'), -#' "SD" = f_str("xx.xx", sd), -#' "Median" = f_str("xx.x", median), -#' "Q1, Q3" = f_str("xx, xx", q1, q3, empty=c(.overall='NA')), -#' "Min, Max" = f_str("xx, xx", min, max), -#' "Missing" = f_str("xx", missing) -#' ) -#' ) %>% -#' build() -#' -#' # In a shift layer -#' tplyr_table(mtcars, am) %>% -#' add_layer( -#' group_shift(vars(row=gear, column=carb), by=cyl) %>% -#' set_format_strings(f_str("xxx (xx.xx%)", n, pct)) -#' ) %>% -#' build() -#' -set_format_strings <- function(e, ...) { - UseMethod("set_format_strings") -} - - -#' Desc layer S3 method for set_format_strings -#' -#' @param e Layer on which to bind format strings -#' @param ... Named parameters containing calls to \code{f_str} to set the -#' format strings -#' @param cap A named character vector containing an 'int' element for the cap -#' on integer precision, and a 'dec' element for the cap on decimal precision. -#' -#' @return tplyr_layer object with formats attached -#' @export -#' -#' @rdname set_format_strings -set_format_strings.desc_layer <- function(e, ..., cap=getOption('tplyr.precision_cap')) { - - # Catch the arguments from the function call so useful errors can be thrown - check <- enquos(...) - - # Make sure that all of the attachments were `f_str` objects - for (i in seq_along(check)) { - - if (is_named(check)) { - msg = paste0("In `set_format_string` entry `",names(check)[[i]],"` is not an `f_str` object. All assignmentes made within", - " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.") - } else { - msg = paste0("In `set_format_string` entry ",i," is not an `f_str` object. All assignmentes made within", - " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.") - } - - assert_that(class(quo_get_expr(check[[i]])) == "f_str" || (is_call(quo_get_expr(check[[i]])) && call_name(check[[i]]) == "f_str"), - msg = msg) - } - - # Row labels are pulled from names - so make sure that everything is named - assert_that(is_named(check), - msg = "In `set_format_string` all parameters must be named in order to create row labels.") - - - # Pick off the ellipsis - format_strings <- list(...) - - - # Get the list of variable names that need to be transposed - summary_vars <- flatten(map(format_strings, ~ .x$vars)) - - # Get the list of transpose transpose variables needed - trans_vars <- map(format_strings, ~ .x$vars[[1]]) - - # Get the variable names that need to be kept on the same row - keep_vars <- flatten(map(format_strings, ~ tail(.x$vars, n=length(.x$vars) -1))) - - # Get the max format length - max_format_length <- max(map_int(format_strings, ~ .x$size)) - - # Pick off the row labels - row_labels <- names(format_strings) - - # Identify if auto precision is needed - need_prec_table <- any(map_lgl(format_strings, ~ .x$auto_precision)) - - # Fill in defaults if cap hasn't fully been provided - if (!('int' %in% names(cap))) cap['int'] <- getOption('tplyr.precision_cap')['int'] - if (!('dec' %in% names(cap))) cap['dec'] <- getOption('tplyr.precision_cap')['dec'] - - env_bind(e, - format_strings = format_strings, - summary_vars = vars(!!!summary_vars), - keep_vars = vars(!!!keep_vars), - trans_vars = vars(!!!trans_vars), - row_labels = row_labels, - max_length = max_format_length, - need_prec_table = need_prec_table, - cap = cap - ) - e -} - -#' Set Count Layer String Format -#' -#' @param e Layer on which to bind format strings -#' @param ... Named parameters containing calls to \code{f_str} to set the format strings -#' -#' @return Returns the modified layer object. -#' @export -#' @rdname set_format_strings -set_format_strings.count_layer <- function(e, ...) { - - params <- count_f_str_check(...) - - env_bind(e, format_strings = params) - - e -} - -set_format_strings.shift_layer <- function(e, ...) { - - dots <- list(...) - - assert_that(all(dots$vars %in% c("n", "pct")), - msg = "formats in shift layers can only be n") - - env_bind(e, format_strings = dots[[1]]) - - e -} - -#' Extract a translation vector for f_str objects -#' -#' The names of the format_strings list should be row labels in the output. The first -#' element of the \code{vars} object are the transpose variables, so make the names of -#' those variables the vector names, and the names of the format_strings elements the -#' values to allow easy creation of a \code{row_labels} variable in the data -#' -#' @param fmt_strings The \code{format_strings} varaible in a layer -#' -#' @return A named character vector with the flipping applied -#' @noRd -name_translator <- function(fmt_strings) { - out <- names(fmt_strings) - names(out) <- map_chr(fmt_strings, ~ as_name(.x$vars[[1]])) - out -} - -#' Format a numeric value using an \code{f_str} object -#' -#' Using the \code{f_str} object, information about the integer length and -#' significant digits are extracted. Proper round is applied and the formatted numeric value is returned. -#' -#' -#' @param val Numeric value to be formatted -#' @param fmt \code{f_str} object with formatting information related to numeric value to be formatted -#' @param i Index of the format within the \code{f_str} object -#' @param autos A named numeric vector containing the 'auto' formatting values -#' for the integer length(int) and decimal length(dec). -#' -#' @return String formatted numeric value -#' @noRd -num_fmt <- function(val, i, fmt=NULL, autos=NULL) { - - # Auto precision requires that integer and decimal are pulled from the row. If - # auto, settings will be the amount to add to max prec, so add those together. - # Otherwise pull the manually specified value - int_len <- ifelse(fmt$setting[[i]]$auto_int, - fmt$setting[[i]]$int + autos['int'], - fmt$setting[[i]]$int) - - decimals <- ifelse(fmt$setting[[i]]$auto_dec, - fmt$setting[[i]]$dec + autos['dec'], - fmt$setting[[i]]$dec) - - # Set nsmall to input decimals - nsmall <- decimals - - # Increment digits for to compensate for display - if (decimals > 0) decimals <- decimals + 1 - - # Empty return string - if (is.na(val)) {if (is.na(fmt$settings[[i]]$hug_char)) { - return(str_pad(fmt$empty[1], int_len+decimals, side="left")) - } else{ - return( - str_pad( - paste0(fmt$settings[[i]]$hug_char, fmt$empty[1]), - int_len+decimals, - side="left") - ) - } - - } - - # Use two different rounding methods based on if someone is matching with IBM rounding - if(getOption("tplyr.IBMRounding", FALSE)) { - warn(paste0(c("You have enabled IBM Rounding. This is an experimental feature.", - " If you have feedback please get in touch with the maintainers!")), - .frequency = "regularly", .frequency_id = "tplyr.ibm", immediate. = TRUE) - rounded <- ut_round(val, nsmall) - } else { - rounded <- round(val, nsmall) - } - - # Form the string - if (is.na(fmt$settings[[i]]$hug_char)) { - fmt_num <- format( - # Round - rounded, - # Set width of format string - width=(int_len+decimals), - # Decimals to display - nsmall=nsmall - ) - } else { - fmt_num <- str_pad( - paste0( - #Paste the hug character - fmt$settings[[i]]$hug_char, - format( - rounded, - nsmall=nsmall - ) - ), - width=(int_len+decimals) - ) - } - - fmt_num -} - -#' Check if format strings have been applied to a layer -#' -#' @param e Layer environment -#' -#' @return Boolean -#' @noRd -has_format_strings <- function(e) { - 'format_strings' %in% ls(envir=e) -} - -#' Pad Numeric Values -#' -#' This is generally used with a count layer -#' -#' @param string_ The current values of the numeric data -#' @param right_pad The total string length, done after the left pad -#' @param left_pad The length of the left_pad -#' -#' @return Modified string -#' -#' @noRd -pad_formatted_data <- function(x, right_pad, left_pad) { - # Pad the left with difference between left_pad and nchar(string_) - if(nchar(x)[1] < left_pad) { - # The double pasting looks weird but the inner one is meant to create single character - # that is the needed number of spaces and the outer pastes that to the value - x <- map_chr(x, - ~ paste0( - paste0(rep(" ", left_pad - nchar(.x)), collapse = ""), - .x)) - } - - #Padd the right with the difference of the max layer length - if(right_pad > max(nchar(x))) { - x <- map_chr(x, - paste0, paste0(rep(" ", right_pad - max(nchar(x))), - collapse = "")) - } - - x -} - #' Helper for changing values on count f_str #' #' @param ... The object passed to `set_format_strings` or `set_count_layer_formats` From 6f90d829c3fd5dd40a9a379c8af309a7dc3d29cf Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:19:54 -0500 Subject: [PATCH 10/48] doc updates --- man/f_str.Rd | 31 +++++++++++++++++++++++++++---- man/gather_settings.Rd | 19 +++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 man/gather_settings.Rd diff --git a/man/f_str.Rd b/man/f_str.Rd index 4260c4b7..10cd8c4e 100644 --- a/man/f_str.Rd +++ b/man/f_str.Rd @@ -64,18 +64,37 @@ as well, making format strings such as 'xx.a+1'. If you want two numbers on the same line, you provide two sets of x's. For example, if you're presenting a value like "mean (sd)" - you could provide the string 'xx.xx (xx.xxx)', or perhaps 'a.a+1 (a.a+2). Note that you're -able to provide different integer lengths and different decimal precision +able to provide different integer lengths and different decimal precision for the two values. Each format string is independent and relates only to the format specified. +As described above, when using 'x' or 'a', any other character within the +format string will stay stationary. So for example, if your format string +is 'xx (xxx.x)', your number may format as '12 ( 34.5)'. So the left side +parenthesis stays fixed. In some displays, you may want the parenthesis to +'hug' your number. Following this example, when allotting 3 spaces for the +integer within parentheses, the parentehsis should shift to the right, +making the numbers appear '12 (34.5)'. Using \code{f_str()} you can achieve +this by using a capital 'X' or 'A'. For this example, the format string +would be 'xx (XXX.x)'. + +There are a two rules when using 'parenthesis hugging': +\itemize{ +\item Capital letters should only be used on the integer side of a number +\item A character must precede the capital letter, otherwise there's no +character to 'hug' +} + The other parameters of the \code{f_str} call specify what values should fill the x's. \code{f_str} objects are used slightly differently between different layers. When declaring a format string within a count layer, \code{f_str()} expects to see the values \code{n} or \code{distinct_n} for event or distinct counts, \code{pct} or \code{distinct_pct} for event or distinct percentages, or \code{total} or -\code{distinct_total} for denominator calculations. But in descriptive statistic -layers, \code{f_str} parameters refer to the names of the summaries being -performed, either by built in defaults, or custom summaries declared using +\code{distinct_total} for denominator calculations. Note that in an \code{f_str()} +for a count layer 'A' or 'a' are based on n counts, and therefore don't +make sense to use in percentages. But in descriptive statistic layers, +\code{f_str} parameters refer to the names of the summaries being performed, +either by built in defaults, or custom summaries declared using \code{\link[=set_custom_summaries]{set_custom_summaries()}}. See \code{\link[=set_format_strings]{set_format_strings()}} for some more notes about layers specific implementation. @@ -132,4 +151,8 @@ f_str("xx.a (xx.a+1)", mean, sd) f_str("xx.x, xx.x, xx.x", q1, median, q3) +f_str("xx (XXX.x\%)", n, pct) + +f_str("a.a+1 (A.a+2)", mean, sd) + } diff --git a/man/gather_settings.Rd b/man/gather_settings.Rd new file mode 100644 index 00000000..79ede97c --- /dev/null +++ b/man/gather_settings.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/format.R +\name{gather_settings} +\alias{gather_settings} +\title{Gather the settings for a specific format string section} +\usage{ +gather_settings(x) +} +\arguments{ +\item{x}{A character string representing a format string section} +} +\value{ +A named list of settings +} +\description{ +This function will collect specific settings about a format string section, +including integer and decimal length, whether autos were turned on, and hug +character settings/ +} From acdf729b183e3b5f18a4493c0fe01c3f59758da4 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:20:19 -0500 Subject: [PATCH 11/48] Add text about parenthesis hugging to docs --- vignettes/count.Rmd | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vignettes/count.Rmd b/vignettes/count.Rmd index 3ceebb4a..ca77ee72 100644 --- a/vignettes/count.Rmd +++ b/vignettes/count.Rmd @@ -69,6 +69,24 @@ kable(t) You may have seen tables before like the one above. This display shows the number of subjects who experienced an adverse event, the percentage of subjects within the given treatment group who experienced that event, and then the total number of occurrences of that event. Using `set_distinct_by()` triggered the derivation of `distinct_n` and `distinct_pct` in addition to the `n` and `pct` created within `group_count`. The display of the values is then controlled by the `f_str()` call in `set_format_strings()`. +An additional option for formatting the numbers above would be using 'parenthesis hugging'. To trigger this, on the integer side of a number use a capital 'X' or a capital 'A'. For example: + +```{r} +t <- tplyr_table(adae, TRTA) %>% + add_layer( + group_count(AEDECOD) %>% + set_distinct_by(USUBJID) %>% + set_format_strings(f_str("xxx (XXX.xx%) [A]", distinct_n, distinct_pct, n)) + ) %>% + build() %>% + head() %>% + select(row_label1, `var1_Xanomeline Low Dose`) + +t +``` + +As can be seen above, when using parenthesis hugging, the width of a specified format group is preserved, but the preceding character (or characters) to the left of the 'X' or 'A' is pulled to the right to 'hug' the specified number. + ## Nested Count Summaries Certain summary tables present counts within groups. One example could be in a disposition table where a disposition reason of "Other" summarizes what those other reasons were. A very common example is an Adverse Event table that displays counts for body systems, and then the events within those body systems. This is again a nuanced situation - there are two variables being summarized: The body system counts, and the advert event counts. From 4597007352a0cfe717a657f7b2656a46458b12c7 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:21:12 -0500 Subject: [PATCH 12/48] Move formatting text out into it's own vignette for desc layers --- vignettes/desc.Rmd | 187 +----------------- vignettes/desc_layer_formatting.Rmd | 282 ++++++++++++++++++++++++++++ 2 files changed, 283 insertions(+), 186 deletions(-) create mode 100644 vignettes/desc_layer_formatting.Rmd diff --git a/vignettes/desc.Rmd b/vignettes/desc.Rmd index 6a401a86..3e7b0a52 100644 --- a/vignettes/desc.Rmd +++ b/vignettes/desc.Rmd @@ -48,7 +48,7 @@ Let's walk through this call to `set_format_strings` to understand in detail wha 1) The quoted strings on the left of the '=' within `set_format_strings()` become the row label in the output. This allows you to define some custom text in `set_format_strings()` to explain the summary that is presented on the associated row. This text is fully in your control. 2) On the right side of each equals is a call to `f_str()`. As explained in the `vignette("Tplyr")`, this is an object that captures a lot of metadata to understand how the strings should be presented. -3) Within the `f_str()` call, you see x's in quotes. This defines how you'd like the numbers formatted from the resulting summaries. The number of x's you use on the left side of a decimal control the space allotted for an integer, and the right side controls the decimal precision. Decimals are rounded prior to string formatting - so no need to worry about that. Note that this forcefully sets the decimal and integer precision - **Tplyr** can automatically determine this for you as well, but more on that later. +3) Within the `f_str()` call, you see x's in quotes. This defines how you'd like the numbers formatted from the resulting summaries. The number of x's you use on the left side of a decimal control the space allotted for an integer, and the right side controls the decimal precision. Decimals are rounded prior to string formatting - so no need to worry about that. Note that this forcefully sets the decimal and integer precision - **Tplyr** can automatically determine this for you as well, but more on that later. 4) After the x's there are unquoted variable names. This is where you specify the actual summaries that will be performed. Notice that some `f_str()` calls have two summaries specified. This allows you to put two summaries in the same string and present them on the same line. But where do these summary names come from? And which ones does **Tplyr** have? @@ -191,188 +191,3 @@ tplyr_table(adsl, TRT01P) %>% ``` Note that the table code used to produce the output is the same. Now **Tplyr** used the custom summary function for `mean` as specified in the `tplyr.custom_summaries` option. Also note the use of `rlang::quos()`. We've done our best to mask this from the user everywhere possible and make the interfaces clean and intuitive, but a great deal of **Tplyr** is built using 'rlang' and non-standard evaluation. Within this option is one of the very few instances where a user needs to concern themselves with the use of quosures. If you'd like to learn more about non-standard evaluation and quosures, we recommend [Section IV](https://adv-r.hadley.nz/metaprogramming.html) in Advanced R. - -## Formatting - -A lot of the nuance to formatting descriptive statistics layers has already been covered above, but there are a couple more tricks to getting the most out of **Tplyr**. One of these tricks is filling empty values. - -By default, if there is no available value for a summary in a particular observation, the result being presented will be blanked out. - -_Note: **Tplyr**generally respects factor levels - so in instances of a missing row or column group, if the factor level is present, then the variable or row will still generate)_ - -```{r missing} -adsl$TRT01P <- as.factor(adsl$TRT01P) -adlb$TRTA <- as.factor(adlb$TRTA) - -adlb_2 <- adlb %>% - filter(TRTA != "Placebo") - -tplyr_table(adlb_2, TRTA) %>% - set_pop_data(adsl) %>% - set_pop_treat_var(TRT01P) %>% - add_layer( - group_desc(AVAL, by=PARAMCD) %>% - set_format_strings('Mean (SD)' = f_str('xxx (xxx)', mean, sd)) - ) %>% - build() %>% - head() %>% - select(-starts_with("ord")) %>% - kable() -``` - -Note how the entire example above has all records in `var1_Placebo` missing. **Tplyr** gives you control over how you fill this space. Let's say that we wanted instead to make that space say "Missing". You can control this with the `f_str()` object using the `empty` parameter. - -```{r missing1} -tplyr_table(adlb_2, TRTA) %>% - set_pop_data(adsl) %>% - set_pop_treat_var(TRT01P) %>% - add_layer( - group_desc(AVAL, by=PARAMCD) %>% - set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c(.overall="MISSING"))) - ) %>% - build() %>% - head() %>% - select(-starts_with("ord")) %>% - kable() -``` - -Look at the `empty` parameter above. Here, we use a named character vector, where the name is `.overall`. When this name is used, if all elements within the cell are missing, they will be filled with the specified text. Otherwise, the provided string will fill just the missing parameter. In some cases, this may not be what you'd like to see. Perhaps we want a string that fills each missing space. - -```{r missing2} -tplyr_table(adlb_2, TRTA) %>% - set_pop_data(adsl) %>% - set_pop_treat_var(TRT01P) %>% - add_layer( - group_desc(AVAL, by=PARAMCD) %>% - set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c("NA"))) - ) %>% - build() %>% - head() %>% - select(-starts_with("ord")) %>% - kable() -``` - -In the example above, instead of filling the whole space, the `empty` text of "NA" replaces the empty value for each element. So for 'Mean (SD)', we now have 'NA ( NA)'. Note that the proper padding was still used for 'NA' to make sure the parentheses still align with populated records. - -### Auto Precision - -You may have noticed that the approach to formatting covered so far leaves a lot to be desired. Consider analyzing lab results, where you may want precision to vary based on the collected precision of the tests. Furthermore, depending on the summary being presented, you may wish to increase the precision further. For example, you may want the mean to be at collected precision +1 decimal place, and for standard deviation +2. - -**Tplyr** has this covered using auto-precision. Auto-precision allows you to format your numeric summaries based on the precision of the data collected. This has all been built into the format strings, because a natural place to specify your desired format is where you specify how you want your data presented. If you wish to use auto-precision, use `a` instead of `x` when creating your summaries. Note that only one `a` is needed on each side of a decimal. To use increased precision, use `a+n` where `n` is the number of additional spaces you wish to add. - -```{r autoprecision1} -tplyr_table(adlb, TRTA) %>% - add_layer( - group_desc(AVAL, by = PARAMCD) %>% - set_format_strings( - 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd) - ) - ) %>% - build() %>% - head(20) %>% - select(-starts_with("ord")) %>% - kable() -``` - -As you can see, the decimal precision is now varying depending on the test being performed. Notice that both the integer and the decimal side of each number fluctuate as well. **Tplyr** collects both the integer and decimal precision, and you can specify both separately. For example, you could use `x`'s to specify a default number of spaces for your integers that are used consistently across by variables, but vary the decimal precision based on collected data. You can also increment the number of spaces for both integer and decimal separately. - -But - this is kind of ugly, isn't it? Do we really need all 6 decimal places collected for CA? For this reason, you're able to set a cap on the precision that's displayed: - -```{r autoprecision2} -tplyr_table(adlb, TRTA) %>% - add_layer( - group_desc(AVAL, by = PARAMCD) %>% - set_format_strings( - 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd), - cap = c(int=3, dec=2) - ) - ) %>% - build() %>% - head(20) %>% - select(-starts_with("ord")) %>% - kable() -``` - -Now that looks better. The `cap` argument is part of `set_format_strings()`. You need to specify the integer and decimal caps separately. Note that integer precision works slightly differently than decimal precision. Integer precision relates to the length allotted for the left side of a decimal, but integers will not truncate. When using 'x' formatting, if an integer exceeds the set length, it will push the number over. If the integer side of auto-precision is not capped, the necessary length for an integer in the associated by group will be as long as necessary. Decimals, on the other hand, round to the specified length. These caps apply to the length allotted for the "a" on either the integer or the decimal. So for example, if the decimal length is capped at 2 and the selected precision is "a+1", then 3 decimal places will be allotted. - -This was a basic situation, but if you're paying close attention, you may have some questions. What if you have more by variables, like by visit AND test. Do we then calculate precision by visit and test? What if collected precision is different per visit and we don't want that? What about multiple summary variables? How do we determine precision then? We have modifier functions for this: - -```{r precision3} -tplyr_table(adlb, TRTA) %>% - add_layer( - group_desc(vars(AVAL, CHG, BASE), by = PARAMCD) %>% - set_format_strings( - 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA"), - cap = c(int=3, dec=2) - ) %>% - set_precision_on(AVAL) %>% - set_precision_by(PARAMCD) - ) %>% - build() %>% - head() %>% - select(-starts_with("ord")) %>% - kable() -``` - -Three variables are being summarized here - AVAL, CHG, and BASE. So which should be used for precision? `set_precision_on()` allows you to specify this, where the `precision_on()` variable must be one of the variables within `target_var`. Similarly, `set_precision_by()` changes the `by` variables used to determine collected precision. If no `precision_on()` variable is specified, the first variable in `target_var` is used. If no `precision_by` variables are specified, then the default `by` variables are used. - -### External Precision - -Lastly, while dynamic precision might be what you're looking for, you may not want precision driven by the data. Perhaps there's a company standard that dictates what decimal precision should be used for each separate lab test. Maybe even deeper down to the lab test and category. New in **Tplyr** 1.0.0 we've added the ability to take decimal precision from an external source. - -The principal of external precision is exactly the same as auto-precision. The only difference is that you - the user - provide the precision table that **Tplyr** was automatically calculating in the background. This is done using the new function `set_precision_data()`. In the output below, Notice how the precision by PARAMCD varies depending on what was specified in the data frame `prec_data`. - - -```{r external-precision} -prec_data <- tibble::tribble( - ~PARAMCD, ~max_int, ~max_dec, - "BUN", 1, 0, - "CA", 2, 4, - "CK", 3, 1, - "GGT", 3, 0, - "URATE", 3, 1, -) - -tplyr_table(adlb, TRTA) %>% - add_layer( - group_desc(AVAL, by = PARAMCD) %>% - set_format_strings( - 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA") - ) %>% - set_precision_on(AVAL) %>% - set_precision_by(PARAMCD) %>% - set_precision_data(prec_data) - ) %>% - build() %>% - head() %>% - select(-starts_with("ord")) %>% - kable() - -``` - -If one of your by variable groups are missing in the precision data, **Tplyr** can default back to using auto-precision by using the option `default=auto`. - -```{r external-precision2} -prec_data <- tibble::tribble( - ~PARAMCD, ~max_int, ~max_dec, - "BUN", 1, 0, - "CA", 2, 4, - "CK", 3, 1, - "GGT", 3, 0, -) - -tplyr_table(adlb, TRTA) %>% - add_layer( - group_desc(AVAL, by = PARAMCD) %>% - set_format_strings( - 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA") - ) %>% - set_precision_on(AVAL) %>% - set_precision_by(PARAMCD) %>% - set_precision_data(prec_data, default="auto") - ) %>% - build() %>% - head() %>% - select(-starts_with("ord")) %>% - kable() -``` diff --git a/vignettes/desc_layer_formatting.Rmd b/vignettes/desc_layer_formatting.Rmd new file mode 100644 index 00000000..29e2dcd2 --- /dev/null +++ b/vignettes/desc_layer_formatting.Rmd @@ -0,0 +1,282 @@ +--- +title: "Advanced Descriptive Statistic Layer Formatting" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{desc_layer_formatting} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup, include=FALSE} +library(tidyverse) +library(magrittr) +library(Tplyr) +library(knitr) +load("adlb.Rdata") +load("adsl.Rdata") +load('adlb.Rdata') +``` + +# NA Formatting +A lot of the nuance to formatting descriptive statistics layers has already been covered above, but there are a couple more tricks to getting the most out of **Tplyr**. One of these tricks is filling empty values. + +By default, if there is no available value for a summary in a particular observation, the result being presented will be blanked out. + +_Note: **Tplyr** generally respects factor levels - so in instances of a missing row or column group, if the factor level is present, then the variable or row will still generate)_ + +```{r missing} +adsl$TRT01P <- as.factor(adsl$TRT01P) +adlb$TRTA <- as.factor(adlb$TRTA) + +adlb_2 <- adlb %>% + filter(TRTA != "Placebo") + +tplyr_table(adlb_2, TRTA) %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01P) %>% + add_layer( + group_desc(AVAL, by=PARAMCD) %>% + set_format_strings('Mean (SD)' = f_str('xxx (xxx)', mean, sd)) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() +``` + +Note how the entire example above has all records in `var1_Placebo` missing. **Tplyr** gives you control over how you fill this space. Let's say that we wanted instead to make that space say "Missing". You can control this with the `f_str()` object using the `empty` parameter. + +```{r missing1} +tplyr_table(adlb_2, TRTA) %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01P) %>% + add_layer( + group_desc(AVAL, by=PARAMCD) %>% + set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c(.overall="MISSING"))) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() +``` + +Look at the `empty` parameter above. Here, we use a named character vector, where the name is `.overall`. When this name is used, if all elements within the cell are missing, they will be filled with the specified text. Otherwise, the provided string will fill just the missing parameter. In some cases, this may not be what you'd like to see. Perhaps we want a string that fills each missing space. + +```{r missing2} +tplyr_table(adlb_2, TRTA) %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01P) %>% + add_layer( + group_desc(AVAL, by=PARAMCD) %>% + set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c("NA"))) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() +``` + +In the example above, instead of filling the whole space, the `empty` text of "NA" replaces the empty value for each element. So for 'Mean (SD)', we now have 'NA ( NA)'. Note that the proper padding was still used for 'NA' to make sure the parentheses still align with populated records. + +# Auto Precision + +You may have noticed that the approach to formatting covered so far leaves a lot to be desired. Consider analyzing lab results, where you may want precision to vary based on the collected precision of the tests. Furthermore, depending on the summary being presented, you may wish to increase the precision further. For example, you may want the mean to be at collected precision +1 decimal place, and for standard deviation +2. + +**Tplyr** has this covered using auto-precision. Auto-precision allows you to format your numeric summaries based on the precision of the data collected. This has all been built into the format strings, because a natural place to specify your desired format is where you specify how you want your data presented. If you wish to use auto-precision, use `a` instead of `x` when creating your summaries. Note that only one `a` is needed on each side of a decimal. To use increased precision, use `a+n` where `n` is the number of additional spaces you wish to add. + +```{r autoprecision1} +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd) + ) + ) %>% + build() %>% + head(20) %>% + select(-starts_with("ord")) %>% + kable() +``` + +As you can see, the decimal precision is now varying depending on the test being performed. Notice that both the integer and the decimal side of each number fluctuate as well. **Tplyr** collects both the integer and decimal precision, and you can specify both separately. For example, you could use `x`'s to specify a default number of spaces for your integers that are used consistently across by variables, but vary the decimal precision based on collected data. You can also increment the number of spaces for both integer and decimal separately. + +But - this is kind of ugly, isn't it? Do we really need all 6 decimal places collected for CA? For this reason, you're able to set a cap on the precision that's displayed: + +```{r autoprecision2} +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd), + cap = c(int=3, dec=2) + ) + ) %>% + build() %>% + head(20) %>% + select(-starts_with("ord")) %>% + kable() +``` + +Now that looks better. The `cap` argument is part of `set_format_strings()`. You need to specify the integer and decimal caps separately. Note that integer precision works slightly differently than decimal precision. Integer precision relates to the length allotted for the left side of a decimal, but integers will not truncate. When using 'x' formatting, if an integer exceeds the set length, it will push the number over. If the integer side of auto-precision is not capped, the necessary length for an integer in the associated by group will be as long as necessary. Decimals, on the other hand, round to the specified length. These caps apply to the length allotted for the "a" on either the integer or the decimal. So for example, if the decimal length is capped at 2 and the selected precision is "a+1", then 3 decimal places will be allotted. + +This was a basic situation, but if you're paying close attention, you may have some questions. What if you have more by variables, like by visit AND test. Do we then calculate precision by visit and test? What if collected precision is different per visit and we don't want that? What about multiple summary variables? How do we determine precision then? We have modifier functions for this: + +```{r precision3} +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(vars(AVAL, CHG, BASE), by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA"), + cap = c(int=3, dec=2) + ) %>% + set_precision_on(AVAL) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() +``` + +Three variables are being summarized here - AVAL, CHG, and BASE. So which should be used for precision? `set_precision_on()` allows you to specify this, where the `precision_on()` variable must be one of the variables within `target_var`. Similarly, `set_precision_by()` changes the `by` variables used to determine collected precision. If no `precision_on()` variable is specified, the first variable in `target_var` is used. If no `precision_by` variables are specified, then the default `by` variables are used. + +# External Precision + +Lastly, while dynamic precision might be what you're looking for, you may not want precision driven by the data. Perhaps there's a company standard that dictates what decimal precision should be used for each separate lab test. Maybe even deeper down to the lab test and category. New in **Tplyr** 1.0.0 we've added the ability to take decimal precision from an external source. + +The principal of external precision is exactly the same as auto-precision. The only difference is that you - the user - provide the precision table that **Tplyr** was automatically calculating in the background. This is done using the new function `set_precision_data()`. In the output below, Notice how the precision by PARAMCD varies depending on what was specified in the data frame `prec_data`. + + +```{r external-precision} +prec_data <- tibble::tribble( + ~PARAMCD, ~max_int, ~max_dec, + "BUN", 1, 0, + "CA", 2, 4, + "CK", 3, 1, + "GGT", 3, 0, + "URATE", 3, 1, +) + +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA") + ) %>% + set_precision_on(AVAL) %>% + set_precision_by(PARAMCD) %>% + set_precision_data(prec_data) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() + +``` + +If one of your by variable groups are missing in the precision data, **Tplyr** can default back to using auto-precision by using the option `default=auto`. + +```{r external-precision2} +prec_data <- tibble::tribble( + ~PARAMCD, ~max_int, ~max_dec, + "BUN", 1, 0, + "CA", 2, 4, + "CK", 3, 1, + "GGT", 3, 0, +) + +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA") + ) %>% + set_precision_on(AVAL) %>% + set_precision_by(PARAMCD) %>% + set_precision_data(prec_data, default="auto") + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() +``` + +# Parenthesis Hugging + +By default, when using 'x' or 'a', any other character within a format string will stay stationary. Consider the standard example from the descriptive statistic layer vignette. + +```{r standard} +tplyr_table(adsl, TRT01P) %>% + add_layer( + group_desc(AGE, by = "Age (years)", where= SAFFL=="Y") %>% + set_format_strings( + "n" = f_str("xx", n), + "Mean (SD)"= f_str("xx.x (xx.xx)", mean, sd), + "Median" = f_str("xx.x", median), + "Q1, Q3" = f_str("xx, xx", q1, q3), + "Min, Max" = f_str("xx, xx", min, max), + "Missing" = f_str("xx", missing) + ) + ) %>% + build() %>% + select(-starts_with('ord')) +``` + +Note that if a certain number of integers are alotted, space will be left for the numbers that fill that space, but the position of the parenthesis stays fixed. In some displays, you may want the parenthesis to 'hug' your number - the "format group" width should stay fixed, the parenthesis should move to the right along with the numbers consuming less integer space. Within your `f_str()`, you can achieve this by using a capital 'X'. For example, focusing on the mean and standard deviation line: + +```{r manual_hugging} +# Mock up 0 standard deviation +example_adlb <- adlb %>% + mutate( + AVAL = if_else( + AVISIT == "End of Treatment", + 85.301, + AVAL + ) + ) + +tplyr_table(example_adlb, TRTA, PARAMCD == "CK") %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("xxx.x (XXX.x)", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + head() %>% + select(-starts_with('ord')) +``` + +Similarly, the same functionality works with auto precision by using a capital A. + +```{r auto_hugging} + +tplyr_table(example_adlb, TRTA, PARAMCD == "CK") %>% + add_layer( + group_desc(AVAL, by=vars(PARAMCD, AVISIT)) %>% + set_format_strings( + TEST = f_str("a.a (A.a)", mean, sd, empty="NA") + ) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + head() %>% + select(-starts_with('ord')) +``` + +There are a two rules when using 'parenthesis hugging': + +- Capital letters should only be used on the integer side of a number +- A character must precede the capital letter, otherwise there's no character to 'hug'. + +Aside from these rules, parenthesis hugging can be combined with all other valid format string capabilities. From c6cdf0af5180601f4bee4f10c6d94f895048f95b Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:29:35 -0500 Subject: [PATCH 13/48] Don't output docs for gather_settings() --- R/format.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/format.R b/R/format.R index ec690736..f4c1d992 100644 --- a/R/format.R +++ b/R/format.R @@ -253,6 +253,7 @@ get_format_string_regex <- function() { #' @param x A character string representing a format string section #' #' @return A named list of settings +#' @noRd gather_settings <- function(x) { settings <- list( From debcde61df6d0e0c7d3e9fd37d5967a1c8ead9fa Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:29:50 -0500 Subject: [PATCH 14/48] add desc layer advanced formatting to pkgdown --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index 58f54aee..99a6db44 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -127,6 +127,7 @@ articles: - title: Helper Vignettes navbar: Helpers contents: + - desc_layer_formatting - riskdiff - sort - options From 2ce289c4860a0f89ad22fe78b9e2ce3dda8f3302 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:30:05 -0500 Subject: [PATCH 15/48] Ignore gather_settings() --- man/gather_settings.Rd | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 man/gather_settings.Rd diff --git a/man/gather_settings.Rd b/man/gather_settings.Rd deleted file mode 100644 index 79ede97c..00000000 --- a/man/gather_settings.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/format.R -\name{gather_settings} -\alias{gather_settings} -\title{Gather the settings for a specific format string section} -\usage{ -gather_settings(x) -} -\arguments{ -\item{x}{A character string representing a format string section} -} -\value{ -A named list of settings -} -\description{ -This function will collect specific settings about a format string section, -including integer and decimal length, whether autos were turned on, and hug -character settings/ -} From 44d466f2d0a4e266f32136559a6e850cec4d603e Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:30:15 -0500 Subject: [PATCH 16/48] Update vignette formatting --- vignettes/desc_layer_formatting.Rmd | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vignettes/desc_layer_formatting.Rmd b/vignettes/desc_layer_formatting.Rmd index 29e2dcd2..49748ca1 100644 --- a/vignettes/desc_layer_formatting.Rmd +++ b/vignettes/desc_layer_formatting.Rmd @@ -26,8 +26,9 @@ load("adsl.Rdata") load('adlb.Rdata') ``` -# NA Formatting -A lot of the nuance to formatting descriptive statistics layers has already been covered above, but there are a couple more tricks to getting the most out of **Tplyr**. One of these tricks is filling empty values. +A lot of the nuance to formatting descriptive statistics layers was covered in the descriptive statistic layer vignette, but there are a couple more tricks to getting the most out of **Tplyr**. In this vignette, we'll cover some of the options in more detail. + +# Empty Value Formatting By default, if there is no available value for a summary in a particular observation, the result being presented will be blanked out. @@ -276,7 +277,7 @@ tplyr_table(example_adlb, TRTA, PARAMCD == "CK") %>% There are a two rules when using 'parenthesis hugging': -- Capital letters should only be used on the integer side of a number -- A character must precede the capital letter, otherwise there's no character to 'hug'. +- Capital letters should only be used on the integer side of a number +- A character must precede the capital letter, otherwise there's no character to 'hug' Aside from these rules, parenthesis hugging can be combined with all other valid format string capabilities. From 0d17ff67dafddb252ecbbb1f457e7056a794fdaf Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:33:24 -0500 Subject: [PATCH 17/48] News and description --- DESCRIPTION | 2 +- NEWS.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b78a7950..6f709b5c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Tplyr Title: A Traceability Focused Grammar of Clinical Data Summary -Version: 1.0.2.9000 +Version: 1.1.0 Authors@R: c( person(given = "Eli", diff --git a/NEWS.md b/NEWS.md index bbf0ef94..51c63459 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# Tplyr 1.1.0 +- This release incorporate parenthesis hugging across all layers (#117) + # Tplyr 1.0.2 - Bug fixes - Resolve issue with `where` logic when using population data. From 3df49f3c7f7f203c7290964669a93a3f352f9f79 Mon Sep 17 00:00:00 2001 From: mstackhouse Date: Thu, 29 Dec 2022 21:35:39 -0500 Subject: [PATCH 18/48] build pkgdown --- docs/404.html | 11 +- docs/CONTRIBUTING.html | 7 +- docs/ISSUE_TEMPLATE.html | 7 +- docs/LICENSE-text.html | 7 +- docs/LICENSE.html | 7 +- docs/PULL_REQUEST_TEMPLATE.html | 34 +- docs/articles/count.html | 43 +- docs/articles/custom-metadata.html | 51 +- docs/articles/denom.html | 138 ++-- docs/articles/desc.html | 719 +----------------- docs/articles/index.html | 11 +- docs/articles/metadata.html | 11 +- docs/articles/options.html | 16 +- docs/articles/riskdiff.html | 35 +- docs/articles/shift.html | 17 +- docs/articles/sort.html | 13 +- docs/articles/styled-table.html | 11 +- docs/articles/table.html | 41 +- docs/authors.html | 11 +- docs/index.html | 13 +- docs/news/index.html | 17 +- docs/pkgdown.yml | 7 +- docs/reference/add_column_headers.html | 22 +- docs/reference/add_risk_diff.html | 64 +- docs/reference/add_total_row.html | 20 +- docs/reference/append_metadata.html | 24 +- docs/reference/apply_formats.html | 7 +- docs/reference/apply_row_masks.html | 7 +- docs/reference/build.html | 39 +- docs/reference/by.html | 7 +- docs/reference/f_str.html | 55 +- docs/reference/get_meta_result.html | 7 +- docs/reference/get_meta_subset.html | 7 +- docs/reference/get_metadata.html | 24 +- docs/reference/get_numeric_data.html | 7 +- docs/reference/get_stats_data.html | 7 +- docs/reference/header_n.html | 7 +- docs/reference/index.html | 7 +- docs/reference/keep_levels.html | 7 +- docs/reference/layer_attachment.html | 7 +- docs/reference/layer_constructors.html | 7 +- docs/reference/ordering.html | 18 +- docs/reference/pipe.html | 7 +- docs/reference/pop_data.html | 7 +- docs/reference/pop_treat_var.html | 7 +- docs/reference/precision_by.html | 7 +- docs/reference/precision_on.html | 7 +- docs/reference/process_formatting.html | 7 +- docs/reference/process_statistic_data.html | 7 +- .../process_statistic_formatting.html | 7 +- docs/reference/process_summaries.html | 7 +- docs/reference/set_custom_summaries.html | 17 +- docs/reference/set_denom_ignore.html | 16 +- docs/reference/set_denom_where.html | 7 +- docs/reference/set_denoms_by.html | 94 ++- docs/reference/set_distinct_by.html | 18 +- docs/reference/set_format_strings.html | 38 +- docs/reference/set_indentation.html | 7 +- docs/reference/set_missing_count.html | 18 +- docs/reference/set_nest_count.html | 7 +- docs/reference/set_outer_sort_position.html | 7 +- docs/reference/set_precision_data.html | 7 +- docs/reference/set_stats_as_columns.html | 7 +- docs/reference/set_total_row_label.html | 20 +- docs/reference/str_indent_wrap.html | 7 +- docs/reference/table_format_defaults.html | 7 +- docs/reference/target_var.html | 7 +- docs/reference/tplyr.html | 544 ++++++------- docs/reference/tplyr_layer.html | 7 +- docs/reference/tplyr_meta.html | 7 +- docs/reference/tplyr_table.html | 7 +- docs/reference/treat_grps.html | 7 +- docs/reference/treat_var.html | 7 +- docs/reference/where.html | 7 +- docs/sitemap.xml | 6 +- 75 files changed, 926 insertions(+), 1590 deletions(-) diff --git a/docs/404.html b/docs/404.html index 7c82bc7d..08ba95c8 100644 --- a/docs/404.html +++ b/docs/404.html @@ -13,10 +13,10 @@ - - + + - + @@ -41,7 +41,7 @@ Tplyr - 1.0.1.9000 + 1.1.0