diff --git a/DESCRIPTION b/DESCRIPTION
index b78a7950..f04e9c5b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: Tplyr
Title: A Traceability Focused Grammar of Clinical Data Summary
-Version: 1.0.2.9000
+Version: 1.1.0
Authors@R:
c(
person(given = "Eli",
@@ -20,8 +20,12 @@ Authors@R:
person(given = "Nathan",
family = "Kosiba",
email = "Nathan.Kosiba@atorusresearch.com",
- role = "aut",
+ role = "ctb",
comment = c(ORCID = "0000-0001-5359-4234")),
+ person(given = "Sadchla",
+ family = "Mascary",
+ email = "sadchla.mascary@atorusresearch.com",
+ role = "ctb"),
person(given = "Atorus Research LLC",
role = "cph")
)
@@ -55,6 +59,6 @@ Suggests:
pharmaRTF,
withr
VignetteBuilder: knitr
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
RdMacros: lifecycle
Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 87428457..16e6a319 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -50,6 +50,7 @@ export(add_total_row)
export(add_treat_grps)
export(add_variables)
export(append_metadata)
+export(apply_conditional_format)
export(apply_formats)
export(apply_row_masks)
export(build)
@@ -69,6 +70,7 @@ export(get_precision_on)
export(get_shift_layer_formats)
export(get_stats_data)
export(get_target_var)
+export(get_tplyr_regex)
export(get_where)
export(group_count)
export(group_desc)
@@ -114,6 +116,8 @@ export(set_target_var)
export(set_total_row_label)
export(set_treat_var)
export(set_where)
+export(str_extract_fmt_group)
+export(str_extract_num)
export(str_indent_wrap)
export(tplyr_layer)
export(tplyr_meta)
@@ -231,6 +235,7 @@ importFrom(stats,median)
importFrom(stats,quantile)
importFrom(stats,sd)
importFrom(stats,var)
+importFrom(stringr,"str_sub<-")
importFrom(stringr,fixed)
importFrom(stringr,regex)
importFrom(stringr,str_count)
diff --git a/NEWS.md b/NEWS.md
index bbf0ef94..2482e4c0 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,10 @@
+# Tplyr 1.1.0
+- This release incorporate parenthesis hugging across all layers (#117)
+- New functions `apply_conditional_formats()`, `str_extract_fmt_group()` and `str_extract_num()`
+- Vignette reorganization, as well as new vignettes added
+- Bug fix for #115
+- Scroll bar added to articles menu on pkgdown (Thanks @mattroumaya and @MayaGans!!!)
+
# Tplyr 1.0.2
- Bug fixes
- Resolve issue with `where` logic when using population data.
diff --git a/R/apply_conditional_format.R b/R/apply_conditional_format.R
new file mode 100644
index 00000000..16198fa1
--- /dev/null
+++ b/R/apply_conditional_format.R
@@ -0,0 +1,131 @@
+#' Validator for apply_conditional_format
+#'
+#' @param string Target character vector where text may be replaced
+#' @param format_group An integer representing the targeted numeric field within
+#' the string, numbered from left to right
+#' @param condition An expression, using the variable name 'x' as the target
+#' variable within the condition
+#' @param replacement A string to use as the replacement value
+#' @param full_string TRUE if the full string should be replaced, FALSE if the
+#' replacement should be done within the format group
+#' @noRd
+validate_conditional_format_params <- function(string, format_group, condition, replacement, full_string) {
+ if (!inherits(string, "character")) {
+ stop("Paramter `string` must be a character vector", call.=FALSE)
+ }
+
+ if (!inherits(format_group, "numeric") || (inherits(format_group, "numeric") && format_group %% 1 != 0)) {
+ stop("Paramter `format_group` must be an integer", call.=FALSE)
+ }
+
+ if (!inherits(replacement, "character")) {
+ stop("Paramter `replacement` must be a string", call.=FALSE)
+ }
+
+ # Condition statement must use the variable name 'x'
+ if (!identical(all.vars(condition), "x")) {
+ stop("Condition must be a valid expression only using the variable name `x`", call.=FALSE)
+ }
+
+ if (!inherits(full_string, "logical")) {
+ stop("Paramter `full_string` must be bool", call.=FALSE)
+ }
+
+}
+
+#' Conditional reformatting of a pre-populated string of numbers
+#'
+#' This function allows you to conditionally re-format a string of numbers based
+#' on a numeric value within the string itself. By selecting a "format group",
+#' which is targeting a specific number within the string, a user can establish
+#' a condition upon which a provided replacement string can be used. Either the
+#' entire replacement can be used to replace the entire string, or the
+#' replacement text can refill the "format group" while preserving the original
+#' width and alignment of the target string.
+#'
+#' @param string Target character vector where text may be replaced
+#' @param format_group An integer representing the targeted numeric field within
+#' the string, numbered from left to right
+#' @param condition An expression, using the variable name 'x' as the target
+#' variable within the condition
+#' @param replacement A string to use as the replacement value
+#' @param full_string TRUE if the full string should be replaced, FALSE if the
+#' replacement should be done within the format group
+#'
+#' @return A character vector
+#' @export
+#'
+#' @examples
+#'
+#' string <- c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)")
+#'
+#' apply_conditional_format(string, 2, x == 0, " 0 ", full_string=TRUE)
+#'
+#' apply_conditional_format(string, 2, x < 1, "(<1%)")
+#'
+apply_conditional_format <- function(string, format_group, condition, replacement, full_string=FALSE) {
+
+ condition <- enexpr(condition)
+
+ # Validate all parameters
+ validate_conditional_format_params(string, format_group, condition, replacement, full_string)
+
+ # Pull out regex to drive the work
+ f_grp_rx <- get_format_group_regex()
+
+ # Pull out all the match groups and then get the numeric for the conditional number
+ match_groups <- str_match_all(string, f_grp_rx)
+
+ # Get the number upon which the condition will be evaluated
+ x <- map_dbl(
+ match_groups,
+ ~ if (nrow(.) < format_group) {NA_real_} else {as.double(.[format_group, 2])}
+ )
+
+ # Get the bool vector for where strings should be replaced and handle NAs
+ tf <- replace_na(eval(condition), FALSE)
+
+ if (full_string) {
+ out_string <- if_else(tf, replacement, string)
+ } else {
+ # Grab the match locations to use for sub stringing
+ match_locs <- str_locate_all(string, f_grp_rx)
+ # Get the group length out to ensure that the string is fully padded
+ group_length <- map_int(
+ match_groups,
+ ~ if (nrow(.) < format_group) {NA_integer_} else {as.integer(nchar(.[format_group, 1]))}
+ )
+
+ if (any(nchar(replacement) > group_length[!is.na(group_length)])) {
+ warning(
+ paste0("Replacement string length is longer that some string's format group length.",
+ "Some alignment will not be preserved")
+ )
+ }
+
+ # Pad at least as long as the format group space
+ pad_length <- map_int(
+ group_length,
+ ~ if_else(nchar(replacement) > ., nchar(replacement), .)
+ )
+
+ # Pull out locs for the format group
+ end_locs <- map_int(
+ match_locs,
+ ~ if (nrow(.) < format_group) {NA_integer_} else {.[format_group, 'end']}
+ )
+ start_locs <- end_locs - pad_length + 1
+
+ # Build the sub string matrix
+ sub_mat <- matrix(c(rbind(start_locs, end_locs)), ncol=2, byrow=TRUE)
+
+ # Generate a vector with replacements already done
+ rep_string <- string
+ str_sub(rep_string, sub_mat) <- str_pad(replacement, pad_length)
+
+ out_string <- if_else(tf, rep_string, string)
+ }
+
+ out_string
+}
+
diff --git a/R/count.R b/R/count.R
index 3bb0063f..fb00efcc 100644
--- a/R/count.R
+++ b/R/count.R
@@ -342,10 +342,13 @@ prepare_format_metadata.count_layer <- function(x) {
# If a layer_width flag is present, edit the formatting string to display the maximum
# character length
- if (str_detect(format_strings[['n_counts']]$format_string, "a")) {
- # Replace the flag with however many xs
+ if (str_detect(format_strings[['n_counts']]$format_string, "a|A")) {
+ # Replace 'a' with appropriate 'x'
replaced_string <- str_replace(format_strings[['n_counts']]$format_string, "a",
paste(rep("x", n_width), collapse = ""))
+ # Replace 'A' with appropriate 'X'
+ replaced_string <- str_replace(replaced_string, "A",
+ paste(rep("X", n_width), collapse = ""))
# Make a new f_str and replace the old one
format_strings[['n_counts']] <- f_str(replaced_string, !!!format_strings$n_counts$vars)
diff --git a/R/format.R b/R/format.R
index 92308000..e30100a4 100644
--- a/R/format.R
+++ b/R/format.R
@@ -40,18 +40,36 @@
#' If you want two numbers on the same line, you provide two sets of x's. For
#' example, if you're presenting a value like "mean (sd)" - you could provide
#' the string 'xx.xx (xx.xxx)', or perhaps 'a.a+1 (a.a+2). Note that you're
-#' able to provide different integer lengths and different decimal precision
+#' able to provide different integer lengths and different decimal precision
#' for the two values. Each format string is independent and relates only to
#' the format specified.
#'
+#' As described above, when using 'x' or 'a', any other character within the
+#' format string will stay stationary. So for example, if your format string
+#' is 'xx (xxx.x)', your number may format as '12 ( 34.5)'. So the left side
+#' parenthesis stays fixed. In some displays, you may want the parenthesis to
+#' 'hug' your number. Following this example, when allotting 3 spaces for the
+#' integer within parentheses, the parentehsis should shift to the right,
+#' making the numbers appear '12 (34.5)'. Using `f_str()` you can achieve
+#' this by using a capital 'X' or 'A'. For this example, the format string
+#' would be 'xx (XXX.x)'.
+#'
+#' There are a two rules when using 'parenthesis hugging':
+#'
+#' - Capital letters should only be used on the integer side of a number
+#' - A character must precede the capital letter, otherwise there's no
+#' character to 'hug'
+#'
#' The other parameters of the `f_str` call specify what values should fill
#' the x's. `f_str` objects are used slightly differently between different
#' layers. When declaring a format string within a count layer, `f_str()`
#' expects to see the values `n` or `distinct_n` for event or distinct counts,
#' `pct` or `distinct_pct` for event or distinct percentages, or `total` or
-#' `distinct_total` for denominator calculations. But in descriptive statistic
-#' layers, `f_str` parameters refer to the names of the summaries being
-#' performed, either by built in defaults, or custom summaries declared using
+#' `distinct_total` for denominator calculations. Note that in an `f_str()`
+#' for a count layer 'A' or 'a' are based on n counts, and therefore don't
+#' make sense to use in percentages. But in descriptive statistic layers,
+#' `f_str` parameters refer to the names of the summaries being performed,
+#' either by built in defaults, or custom summaries declared using
#' [set_custom_summaries()]. See [set_format_strings()] for some more notes
#' about layers specific implementation.
#'
@@ -115,6 +133,10 @@
#'
#' f_str("xx.x, xx.x, xx.x", q1, median, q3)
#'
+#' f_str("xx (XXX.x%)", n, pct)
+#'
+#' f_str("a.a+1 (A.a+2)", mean, sd)
+#'
f_str <- function(format_string, ..., empty=c(.overall='')) {
# Capture the variables off of the ellipsis
@@ -125,32 +147,33 @@ f_str <- function(format_string, ..., empty=c(.overall='')) {
# Check format string class
assert_has_class(format_string, "character")
- # Capture the format groups
+ # Do a pre-check of the format string to catch invalid auto specifications
+ if (str_detect(format_string, "AA|aa")) {
+ stop(paste0("In f_str(), only use a single 'A' or 'a' on the integer or",
+ " decimal side to trigger auto precision."), call.=TRUE)
+ }
- # This regex does a few things so let's break it into pieces
- # (a(\\+\\d)?|x+) -> a, possibly followed by + and a digit, or 1 or more x's
- # This captures the integer, with either the auto formats or x's
- # (\\.(a(\\+\\d)?|x+)?)? -> a period, then possibly the same a <+digit>, or multiple x's
- # This captures the decimal places, but they don't have to exist
- rx <- "(a(\\+\\d+)?|x+)(\\.(a(\\+\\d+)?|x+)?)?"
- formats <- str_extract_all(format_string, regex(rx))[[1]]
+ # Parse out the format string sections
+ rx <- get_format_string_regex()
+ formats <- str_extract_all(format_string, rx)[[1]]
# Duplicate any '%' to escape them
format_string_1 <- str_replace_all(format_string, "%", "%%")
# Make the sprintf ready string
- repl_str <- str_replace_all(format_string_1, regex(rx), "%s")
+ repl_str <- str_replace_all(format_string_1, rx, "%s")
- # Make sure that if two formats were found, two varaibles exist
+ # Make sure that if two formats were found, two variables exist
assert_that(length(formats) == length(vars),
msg = paste0("In `f_str` ", length(formats), " formats were entered in the format string ",
format_string, "but ", length(vars), " variables were assigned."))
# Pull out the integer and decimal
- settings <- map(formats, separate_int_dig)
+ settings <- map(formats, gather_settings)
# A value in settings will be <0 if it's an auto format
- auto_precision <- any(map_lgl(settings, ~any(attr(.x, 'auto'))))
+ auto_precision <- any(map_lgl(settings, ~ any(as.logical(.[c('auto_int', 'auto_dec')]))))
+ hug_formatting <- any(map_lgl(settings, ~ !is.na(.['hug_char'])))
# All ellipsis variables are names
assert_that(all(map_lgl(vars, function(x) class(x) == "name")),
@@ -164,40 +187,96 @@ f_str <- function(format_string, ..., empty=c(.overall='')) {
size = nchar(format_string),
repl_str = repl_str,
auto_precision = auto_precision,
+ hug_formatting = hug_formatting,
empty=empty
),
class="f_str"
)
}
-#' Evaluate a portion of a format string to check the integer and digit lengths
+#' Gather the settings for a specific format string section
#'
-#' @param x String to have sections counted
+#' This function will collect specific settings about a format string section,
+#' including integer and decimal length, whether autos were turned on, and hug
+#' character settings/
#'
-#' @return A named vector with the names "int" and "dec", countaining numeric values
+#' @param x A character string representing a format string section
#'
+#' @return A named list of settings
#' @noRd
-separate_int_dig <- function(x){
+gather_settings <- function(x) {
+
+ settings <- list(
+ int = 0,
+ dec = 0,
+ auto_int = FALSE,
+ auto_dec = FALSE,
+ hug_char = NA_character_
+ )
- # Initialize a vector and name the elements
- out <- numeric(2)
- names(out) <- c('int', 'dec')
- attr(out, 'auto') <- c('int'=FALSE, 'dec'=FALSE)
+ settings <- parse_hug_char(x, settings)
+ settings <- separate_int_dig(x, settings)
- # Count the characters on each side of the decimal
- fields <- str_split(x, "\\.")[[1]]
+ settings
+}
- num_chars <- map(fields, parse_fmt)
- auto <- map_lgl(num_chars, ~attr(.x, 'auto'))
- num_chars <- as.numeric(num_chars)
- attr(num_chars, 'auto') <- auto
+#' Find if a hug character exists and attach to settings
+#'
+#' @param x Format string section
+#' @param settings A list of settings for a format string section
+#'
+#' @return List of settings
+#' @noRd
+parse_hug_char <- function(x, settings) {
+
+ # Find hugging
+ if (str_detect(x, "X|A")) {
+
+ # Look for characters preceding X or A that aren't X or A
+ hug_char_rx <- regex("([^XA]+)[XA]")
+
+ # Search the hug character and pull out all matches
+ # x is guaranteed to be a single element vector so pull out first
+ # element of the list
+ hug_char_match <- str_match_all(x, hug_char_rx)[[1]]
+
+ # If no rows, then X or A was used with no specified hug character
+ if (nrow(hug_char_match) == 0) {
+ stop(
+ paste0("In f_str(), an 'X' or 'A' was used but no hug character ",
+ "was specified, such as a parenthesis. Use 'X' or 'A' to bind ",
+ "a character within a format string."),
+ call.=FALSE
+ )
+ }
+
+ # The match matrix can't be more than one row. If it is, it was probably
+ # because X or A were placed before and after a decimal, so show the user
+ if (nrow(hug_char_match) > 1) {
+ err_msg <- paste0(
+ "In f_str(), invalid format string specification. The following section",
+ " failed to parse:\n\t'", x,
+ "'\nThe issue is present with a hug character. Was 'X' or 'A' used after",
+ " a decimal?"
+ )
+ stop(err_msg, call.=FALSE)
+ }
- # Insert the number of characters into the named vector
- for (i in seq_along(num_chars)) {
- out[i] <- num_chars[i]
- attr(out, 'auto')[i] <- attr(num_chars, 'auto')[i]
+ # If X or A was used after the decimal at all, that's also invalid so error
+ # out as well
+ if (str_detect(hug_char_match[1,1], fixed("."))) {
+ stop(
+ paste0("In f_str(), 'X' or 'A' can only be used on the left side of a",
+ " decimal within a format string."),
+ call.=FALSE
+ )
+ }
+
+ # The hug char is in a capture group, so we pull it out of the match
+ settings$hug_char <- hug_char_match[1,2]
}
- out
+
+ settings
}
#' Parse a portion of a string format
@@ -211,7 +290,7 @@ separate_int_dig <- function(x){
#' @noRd
parse_fmt <- function(x) {
# If it's an auto format, grab the output value
- if (grepl('a', x)) {
+ if (grepl('a|A', x)) {
# Pick out the digit
add <- replace_na(as.double(str_extract(x, '\\d+')), 0)
# Auto formats will be -1 - the specified precision
@@ -226,328 +305,34 @@ parse_fmt <- function(x) {
val
}
-#' Set the format strings and associated summaries to be performed in a layer
-#'
-#' 'Tplyr' gives you extensive control over how strings are presented.
-#' \code{set_format_strings} allows you to apply these string formats to your
-#' layer. This behaves slightly differently between layers.
-#'
-#' @details Format strings are one of the most powerful components of 'Tplyr'.
-#' Traditionally, converting numeric values into strings for presentation can
-#' consume a good deal of time. Values and decimals need to align between
-#' rows, rounding before trimming is sometimes forgotten - it can become a
-#' tedious mess that, in the grand scheme of things, is not an important part
-#' of the analysis being performed. 'Tplyr' makes this process as simple as we
-#' can, while still allowing flexibility to the user.
-#'
-#' In a count layer, you can simply provide a single \code{\link{f_str}}
-#' object to specify how you want your n's, percentages, and denominators formatted.
-#' If you are additionally supplying a statistic, like risk difference using
-#' \code{\link{add_risk_diff}}, you specify the count formats using the name
-#' 'n_counts'. The risk difference formats would then be specified using the
-#' name 'riskdiff'. In a descriptive statistic layer,
-#' \code{set_format_strings} allows you to do a couple more things:
-#' \itemize{
-#' \item{By naming parameters with character strings, those character strings
-#' become a row label in the resulting data frame}
-#' \item{The actual summaries that are performed come from the variable names
-#' used within the \code{\link{f_str}} calls}
-#' \item{Using multiple summaries (declared by your \code{\link{f_str}}
-#' calls), multiple summary values can appear within the same line. For
-#' example, to present "Mean (SD)" like displays.}
-#' \item{Format strings in the desc layer also allow you to configure how
-#' empty values should be presented. In the \code{f_str} call, use the
-#' \code{empty} parameter to specify how missing values should present. A
-#' single element character vector should be provided. If the vector is
-#' unnamed, that value will be used in the format string and fill the space
-#' similar to how the numbers will display. Meaning - if your empty string is
-#' 'NA' and your format string is 'xx (xxx)', the empty values will populate
-#' as 'NA ( NA)'. If you name the character vector in the 'empty' parameter
-#' '.overall', like \code{empty = c(.overall='')}, then that exact string will
-#' fill the value instead. For example, providing 'NA' will instead create the
-#' formatted string as 'NA' exactly.}
-#' }
-#'
-#' See the \code{\link{f_str}} documentation for more details about how this
-#' implementation works.
-#'
-#' @param e Layer on which string formats will be bound
-#' @param ... Named parameters containing calls to \code{f_str} to set the
-#' format strings
-#'
-#' @return The layer environment with the format string binding added
-#' @export
-#' @rdname set_format_strings
-#'
-#' @examples
-#' # Load in pipe
-#' library(magrittr)
-#'
-#' # In a count layer
-#' tplyr_table(mtcars, gear) %>%
-#' add_layer(
-#' group_count(cyl) %>%
-#' set_format_strings(f_str('xx (xx%)', n, pct))
-#' ) %>%
-#' build()
-#'
-#' # In a descriptive statistics layer
-#' tplyr_table(mtcars, gear) %>%
-#' add_layer(
-#' group_desc(mpg) %>%
-#' set_format_strings(
-#' "n" = f_str("xx", n),
-#' "Mean (SD)" = f_str("xx.x", mean, empty='NA'),
-#' "SD" = f_str("xx.xx", sd),
-#' "Median" = f_str("xx.x", median),
-#' "Q1, Q3" = f_str("xx, xx", q1, q3, empty=c(.overall='NA')),
-#' "Min, Max" = f_str("xx, xx", min, max),
-#' "Missing" = f_str("xx", missing)
-#' )
-#' ) %>%
-#' build()
-#'
-#' # In a shift layer
-#' tplyr_table(mtcars, am) %>%
-#' add_layer(
-#' group_shift(vars(row=gear, column=carb), by=cyl) %>%
-#' set_format_strings(f_str("xxx (xx.xx%)", n, pct))
-#' ) %>%
-#' build()
-#'
-set_format_strings <- function(e, ...) {
- UseMethod("set_format_strings")
-}
-
-
-#' Desc layer S3 method for set_format_strings
-#'
-#' @param e Layer on which to bind format strings
-#' @param ... Named parameters containing calls to \code{f_str} to set the
-#' format strings
-#' @param cap A named character vector containing an 'int' element for the cap
-#' on integer precision, and a 'dec' element for the cap on decimal precision.
-#'
-#' @return tplyr_layer object with formats attached
-#' @export
-#'
-#' @rdname set_format_strings
-set_format_strings.desc_layer <- function(e, ..., cap=getOption('tplyr.precision_cap')) {
-
- # Catch the arguments from the function call so useful errors can be thrown
- check <- enquos(...)
-
- # Make sure that all of the attachments were `f_str` objects
- for (i in seq_along(check)) {
-
- if (is_named(check)) {
- msg = paste0("In `set_format_string` entry `",names(check)[[i]],"` is not an `f_str` object. All assignmentes made within",
- " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.")
- } else {
- msg = paste0("In `set_format_string` entry ",i," is not an `f_str` object. All assignmentes made within",
- " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.")
- }
-
- assert_that(class(quo_get_expr(check[[i]])) == "f_str" || (is_call(quo_get_expr(check[[i]])) && call_name(check[[i]]) == "f_str"),
- msg = msg)
- }
-
- # Row labels are pulled from names - so make sure that everything is named
- assert_that(is_named(check),
- msg = "In `set_format_string` all parameters must be named in order to create row labels.")
-
-
- # Pick off the ellipsis
- format_strings <- list(...)
-
-
- # Get the list of variable names that need to be transposed
- summary_vars <- flatten(map(format_strings, ~ .x$vars))
-
- # Get the list of transpose transpose variables needed
- trans_vars <- map(format_strings, ~ .x$vars[[1]])
-
- # Get the variable names that need to be kept on the same row
- keep_vars <- flatten(map(format_strings, ~ tail(.x$vars, n=length(.x$vars) -1)))
-
- # Get the max format length
- max_format_length <- max(map_int(format_strings, ~ .x$size))
-
- # Pick off the row labels
- row_labels <- names(format_strings)
-
- # Identify if auto precision is needed
- need_prec_table <- any(map_lgl(format_strings, ~ .x$auto_precision))
-
- # Fill in defaults if cap hasn't fully been provided
- if (!('int' %in% names(cap))) cap['int'] <- getOption('tplyr.precision_cap')['int']
- if (!('dec' %in% names(cap))) cap['dec'] <- getOption('tplyr.precision_cap')['dec']
-
- env_bind(e,
- format_strings = format_strings,
- summary_vars = vars(!!!summary_vars),
- keep_vars = vars(!!!keep_vars),
- trans_vars = vars(!!!trans_vars),
- row_labels = row_labels,
- max_length = max_format_length,
- need_prec_table = need_prec_table,
- cap = cap
- )
- e
-}
-
-#' Set Count Layer String Format
-#'
-#' @param e Layer on which to bind format strings
-#' @param ... Named parameters containing calls to \code{f_str} to set the format strings
-#'
-#' @return Returns the modified layer object.
-#' @export
-#' @rdname set_format_strings
-set_format_strings.count_layer <- function(e, ...) {
-
- params <- count_f_str_check(...)
-
- env_bind(e, format_strings = params)
-
- e
-}
-
-set_format_strings.shift_layer <- function(e, ...) {
-
- dots <- list(...)
-
- assert_that(all(dots$vars %in% c("n", "pct")),
- msg = "formats in shift layers can only be n")
-
- env_bind(e, format_strings = dots[[1]])
-
- e
-}
-
-#' Extract a translation vector for f_str objects
-#'
-#' The names of the format_strings list should be row labels in the output. The first
-#' element of the \code{vars} object are the transpose variables, so make the names of
-#' those variables the vector names, and the names of the format_strings elements the
-#' values to allow easy creation of a \code{row_labels} variable in the data
-#'
-#' @param fmt_strings The \code{format_strings} varaible in a layer
-#'
-#' @return A named character vector with the flipping applied
-#' @noRd
-name_translator <- function(fmt_strings) {
- out <- names(fmt_strings)
- names(out) <- map_chr(fmt_strings, ~ as_name(.x$vars[[1]]))
- out
-}
-
-#' Format a numeric value using an \code{f_str} object
-#'
-#' Using the \code{f_str} object, information about the integer length and
-#' significant digits are extracted. Proper round is applied and the formatted numeric value is returned.
+#' Evaluate a portion of a format string to check the integer and digit lengths
#'
+#' @param x Format string section
+#' @param settings A list of settings for a format string section
#'
-#' @param val Numeric value to be formatted
-#' @param fmt \code{f_str} object with formatting information related to numeric value to be formatted
-#' @param i Index of the format within the \code{f_str} object
-#' @param autos A named numeric vector containing the 'auto' formatting values
-#' for the integer length(int) and decimal length(dec).
+#' @return List of settings
#'
-#' @return String formatted numeric value
#' @noRd
-num_fmt <- function(val, i, fmt=NULL, autos=NULL) {
-
- assert_that(is.numeric(val))
- assert_has_class(fmt, 'f_str')
- assert_that(i <= length(fmt$formats), msg="In `num_fmt` supplied ")
-
- # Auto precision requires that integer and decimal are
- # pulled from the row. If auto, settings will be the amount to add
- # to max prec, so add those together. Otherwise pull the manually
- # specified value
- int_len <- ifelse(attr(fmt$settings[[i]],'auto')['int'],
- fmt$settings[[i]]['int'] + autos['int'],
- fmt$settings[[i]]['int'])
-
- decimals <- ifelse(attr(fmt$settings[[i]],'auto')['dec'],
- fmt$settings[[i]]['dec'] + autos['dec'],
- fmt$settings[[i]]['dec'])
-
- # Set nsmall to input decimals
- nsmall = decimals
-
- # Increment digits for to compensate for display
- if (decimals > 0) decimals <- decimals + 1
+separate_int_dig <- function(x, settings){
- # Empty return string
- if (is.na(val)) {
- return(str_pad(fmt$empty[1], int_len+decimals, side="left"))
- }
-
- # Use two different rounding methods based on if someone is matching with IBM rounding
- if(getOption("tplyr.IBMRounding", FALSE)) {
- warn(paste0(c("You have enabled IBM Rounding. This is an experimental feature.",
- " If you have feedback please get in touch with the maintainers!")),
- .frequency = "regularly", .frequency_id = "tplyr.ibm", immediate. = TRUE)
- rounded <- ut_round(val, nsmall)
- } else {
- rounded <- round(val, nsmall)
- }
-
- # Form the string
- return(
- format(
- # Round
- rounded,
- # Set width of format string
- width=(int_len+decimals),
- # Decimals to display
- nsmall=nsmall
- )
- )
-}
+ # Count the characters on each side of the decimal
+ fields <- str_split(x, "\\.")[[1]]
+ # Label the split segments
+ names(fields) <- c('int', 'dec')[1:length(fields)]
-#' Check if format strings have been applied to a layer
-#'
-#' @param e Layer environment
-#'
-#' @return Boolean
-#' @noRd
-has_format_strings <- function(e) {
- 'format_strings' %in% ls(envir=e)
-}
+ # Parse out length and auto info from each field and apply to settings
+ num_chars <- map(fields, parse_fmt)
+ auto <- map_lgl(num_chars, ~attr(.x, 'auto'))
-#' Pad Numeric Values
-#'
-#' This is generally used with a count layer
-#'
-#' @param string_ The current values of the numeric data
-#' @param right_pad The total string length, done after the left pad
-#' @param left_pad The length of the left_pad
-#'
-#' @return Modified string
-#'
-#' @noRd
-pad_formatted_data <- function(x, right_pad, left_pad) {
- # Pad the left with difference between left_pad and nchar(string_)
- if(nchar(x)[1] < left_pad) {
- # The double pasting looks weird but the inner one is meant to create single character
- # that is the needed number of spaces and the outer pastes that to the value
- x <- map_chr(x,
- ~ paste0(
- paste0(rep(" ", left_pad - nchar(.x)), collapse = ""),
- .x))
- }
+ settings[names(num_chars)] <- as.numeric(num_chars)
+ settings[paste0("auto_", names(auto))] <- auto
- #Padd the right with the difference of the max layer length
- if(right_pad > max(nchar(x))) {
- x <- map_chr(x,
- paste0, paste0(rep(" ", right_pad - max(nchar(x))),
- collapse = ""))
+ # If a hug character is specified,subtract if from the integer length
+ if (!is.na(settings$hug_char) && settings$auto_int) {
+ settings$int <- settings$int + (nchar(settings$hug_char) - 1)
}
- x
+ settings
}
#' Helper for changing values on count f_str
diff --git a/R/num_fmt.R b/R/num_fmt.R
new file mode 100644
index 00000000..12ca4de3
--- /dev/null
+++ b/R/num_fmt.R
@@ -0,0 +1,115 @@
+#' Format a numeric value using an \code{f_str} object
+#'
+#' Using the \code{f_str} object, information about the integer length and
+#' significant digits are extracted. Proper round is applied and the formatted numeric value is returned.
+#'
+#'
+#' @param val Numeric value to be formatted
+#' @param fmt \code{f_str} object with formatting information related to numeric value to be formatted
+#' @param i Index of the format within the \code{f_str} object
+#' @param autos A named numeric vector containing the 'auto' formatting values
+#' for the integer length(int) and decimal length(dec).
+#'
+#' @return String formatted numeric value
+#' @noRd
+num_fmt <- function(val, i, fmt=NULL, autos=NULL) {
+
+ # Auto precision requires that integer and decimal are pulled from the row. If
+ # auto, settings will be the amount to add to max prec, so add those together.
+ # Otherwise pull the manually specified value
+ int_len <- ifelse(fmt$setting[[i]]$auto_int,
+ fmt$setting[[i]]$int + autos['int'],
+ fmt$setting[[i]]$int)
+
+ decimals <- ifelse(fmt$setting[[i]]$auto_dec,
+ fmt$setting[[i]]$dec + autos['dec'],
+ fmt$setting[[i]]$dec)
+
+ # Set nsmall to input decimals
+ nsmall <- decimals
+
+ # Increment digits for to compensate for display
+ if (decimals > 0) decimals <- decimals + 1
+
+ # Empty return string
+ if (is.na(val)) {if (is.na(fmt$settings[[i]]$hug_char)) {
+ return(str_pad(fmt$empty[1], int_len+decimals, side="left"))
+ } else{
+ return(
+ str_pad(
+ paste0(fmt$settings[[i]]$hug_char, fmt$empty[1]),
+ int_len+decimals,
+ side="left")
+ )
+ }
+
+ }
+
+ # Use two different rounding methods based on if someone is matching with IBM rounding
+ if(getOption("tplyr.IBMRounding", FALSE)) {
+ warn(paste0(c("You have enabled IBM Rounding. This is an experimental feature.",
+ " If you have feedback please get in touch with the maintainers!")),
+ .frequency = "regularly", .frequency_id = "tplyr.ibm", immediate. = TRUE)
+ rounded <- ut_round(val, nsmall)
+ } else {
+ rounded <- round(val, nsmall)
+ }
+
+ # Form the string
+ if (is.na(fmt$settings[[i]]$hug_char)) {
+ fmt_num <- format(
+ # Round
+ rounded,
+ # Set width of format string
+ width=(int_len+decimals),
+ # Decimals to display
+ nsmall=nsmall
+ )
+ } else {
+ fmt_num <- str_pad(
+ paste0(
+ #Paste the hug character
+ fmt$settings[[i]]$hug_char,
+ format(
+ rounded,
+ nsmall=nsmall
+ )
+ ),
+ width=(int_len+decimals)
+ )
+ }
+
+ fmt_num
+}
+
+#' Pad Numeric Values
+#'
+#' This is generally used with a count layer
+#'
+#' @param x The current values of the numeric data
+#' @param right_pad The total string length, done after the left pad
+#' @param left_pad The length of the left_pad
+#'
+#' @return Modified string
+#'
+#' @noRd
+pad_formatted_data <- function(x, right_pad, left_pad) {
+ # Pad the left with difference between left_pad and nchar(string_)
+ if(nchar(x)[1] < left_pad) {
+ # The double pasting looks weird but the inner one is meant to create single character
+ # that is the needed number of spaces and the outer pastes that to the value
+ x <- map_chr(x,
+ ~ paste0(
+ paste0(rep(" ", left_pad - nchar(.x)), collapse = ""),
+ .x))
+ }
+
+ #Padd the right with the difference of the max layer length
+ if(right_pad > max(nchar(x))) {
+ x <- map_chr(x,
+ paste0, paste0(rep(" ", right_pad - max(nchar(x))),
+ collapse = ""))
+ }
+
+ x
+}
diff --git a/R/regex.R b/R/regex.R
new file mode 100644
index 00000000..d259212f
--- /dev/null
+++ b/R/regex.R
@@ -0,0 +1,112 @@
+#' Retrieve one of Tplyr's regular expressions
+#'
+#' This function allows you to extract important regular expressions used inside
+#' Tplyr.
+#'
+#' There are two important regular expressions used within Tplyr. The
+#' format_string expression is the expression to parse format strings. This is
+#' what is used to make sense out of strings like 'xx (XX.x%)' or 'a+1 (A.a+2)'
+#' by inferring what the user is specifying about number formatting.
+#'
+#' The 'format_group' regex is the opposite of this, and when given a string of
+#' numbers, such as ' 5 (34%) \[9]' will return the separate segments of numbers
+#' broken into their format groups, which in this example would be ' 5',
+#' '(34%)', and '\[9]'.
+#'
+#' @param rx A character string with either the value 'format_string' or
+#' 'format_group'
+#'
+#' @return A regular expression object
+#' @export
+#' @md
+#'
+#' @examples
+#'
+#' get_tplyr_regex('format_string')
+#'
+#' get_tplyr_regex('format_group')
+#'
+get_tplyr_regex <- function(rx=c("format_string", "format_group")) {
+ rx <- match.arg(rx)
+
+ switch(
+ rx,
+ 'format_string' = get_format_string_regex(),
+ 'format_group' = get_format_group_regex()
+ )
+}
+
+#' Generate the format string parsing regular expression
+#'
+#' @return A regular expression object with the compiled expression
+#'
+#' @noRd
+get_format_string_regex <- function() {
+
+ # On the integer side, find an a that may be followed by a + and a number
+ # so this could look like a or a+1, a+2, etc.
+ int_auto <- "a(\\+\\d+)?"
+
+ # Same as above, but look for an A and a non-whitespace character preceding
+ # the A
+ int_auto_hug <- "(\\S+)A(\\+\\d+)?"
+
+ # Look for one or more X's, with a non-whitespace character preceding
+ int_fixed_hug <- "(\\S+)X+"
+
+ # Look for one or more x's
+ int_fixed <- "x+"
+
+ # Look for an A or a that may be followed by a + and a number
+ # so this could look like a or a+1, a+2, etc.
+ # A's will be invalid here but that will be caught by error checking
+ # in parse_hug_char()
+ dec_auto <- "[A|a](\\+\\d+)?"
+
+ # One or more X or x - again X is invalid but caught later
+ dec_fixed <- "[X|x]+"
+
+ # Now prepare to piece the chunks together - all of the int side pieces are
+ # combined with "or's". The decimal side comes after that, and this specifies
+ # that it will find them if they exist, but the integer side will be found
+ # even if they don't
+ joined_string <- "(%s|%s|%s|%s)(\\.(%s|%s)?)?"
+
+ # Concatenate it all together and convert it to a regex
+ regex(
+ sprintf(
+ joined_string,
+ int_auto,
+ int_auto_hug,
+ int_fixed_hug,
+ int_fixed,
+ dec_auto,
+ dec_fixed
+ )
+ )
+}
+
+#' Return the regex for identifying format groups in populated strings
+#'
+#' This regex is the reverse of the f_str() regex, and is used to find populated
+#' format groups with real numbers rather than mock formatting
+#'
+#' @return A regular expression
+#' @noRd
+get_format_group_regex <- function() {
+
+ # 0 or more non-whitespace or non-digit character
+ nwsd <- "[^\\s\\d]*"
+
+ # 0 or more whitespace
+ ws <- "\\s*"
+
+ # Positive or negative integer or decimal
+ num <- "(\\-?\\d+(\\.\\d+)?)"
+
+ # 0 or more non-whitespace
+ nws <- "\\S*"
+
+ regex(paste0(nwsd, ws, num, nws))
+
+}
diff --git a/R/set_format_strings.R b/R/set_format_strings.R
new file mode 100644
index 00000000..651dd4d2
--- /dev/null
+++ b/R/set_format_strings.R
@@ -0,0 +1,225 @@
+#' Set the format strings and associated summaries to be performed in a layer
+#'
+#' 'Tplyr' gives you extensive control over how strings are presented.
+#' \code{set_format_strings} allows you to apply these string formats to your
+#' layer. This behaves slightly differently between layers.
+#'
+#' @details Format strings are one of the most powerful components of 'Tplyr'.
+#' Traditionally, converting numeric values into strings for presentation can
+#' consume a good deal of time. Values and decimals need to align between
+#' rows, rounding before trimming is sometimes forgotten - it can become a
+#' tedious mess that, in the grand scheme of things, is not an important part
+#' of the analysis being performed. 'Tplyr' makes this process as simple as we
+#' can, while still allowing flexibility to the user.
+#'
+#' In a count layer, you can simply provide a single \code{\link{f_str}}
+#' object to specify how you want your n's, percentages, and denominators formatted.
+#' If you are additionally supplying a statistic, like risk difference using
+#' \code{\link{add_risk_diff}}, you specify the count formats using the name
+#' 'n_counts'. The risk difference formats would then be specified using the
+#' name 'riskdiff'. In a descriptive statistic layer,
+#' \code{set_format_strings} allows you to do a couple more things:
+#' \itemize{
+#' \item{By naming parameters with character strings, those character strings
+#' become a row label in the resulting data frame}
+#' \item{The actual summaries that are performed come from the variable names
+#' used within the \code{\link{f_str}} calls}
+#' \item{Using multiple summaries (declared by your \code{\link{f_str}}
+#' calls), multiple summary values can appear within the same line. For
+#' example, to present "Mean (SD)" like displays.}
+#' \item{Format strings in the desc layer also allow you to configure how
+#' empty values should be presented. In the \code{f_str} call, use the
+#' \code{empty} parameter to specify how missing values should present. A
+#' single element character vector should be provided. If the vector is
+#' unnamed, that value will be used in the format string and fill the space
+#' similar to how the numbers will display. Meaning - if your empty string is
+#' 'NA' and your format string is 'xx (xxx)', the empty values will populate
+#' as 'NA ( NA)'. If you name the character vector in the 'empty' parameter
+#' '.overall', like \code{empty = c(.overall='')}, then that exact string will
+#' fill the value instead. For example, providing 'NA' will instead create the
+#' formatted string as 'NA' exactly.}
+#' }
+#'
+#' See the \code{\link{f_str}} documentation for more details about how this
+#' implementation works.
+#'
+#' @param e Layer on which string formats will be bound
+#' @param ... Named parameters containing calls to \code{f_str} to set the
+#' format strings
+#'
+#' @return The layer environment with the format string binding added
+#' @export
+#' @rdname set_format_strings
+#'
+#' @examples
+#' # Load in pipe
+#' library(magrittr)
+#'
+#' # In a count layer
+#' tplyr_table(mtcars, gear) %>%
+#' add_layer(
+#' group_count(cyl) %>%
+#' set_format_strings(f_str('xx (xx%)', n, pct))
+#' ) %>%
+#' build()
+#'
+#' # In a descriptive statistics layer
+#' tplyr_table(mtcars, gear) %>%
+#' add_layer(
+#' group_desc(mpg) %>%
+#' set_format_strings(
+#' "n" = f_str("xx", n),
+#' "Mean (SD)" = f_str("xx.x", mean, empty='NA'),
+#' "SD" = f_str("xx.xx", sd),
+#' "Median" = f_str("xx.x", median),
+#' "Q1, Q3" = f_str("xx, xx", q1, q3, empty=c(.overall='NA')),
+#' "Min, Max" = f_str("xx, xx", min, max),
+#' "Missing" = f_str("xx", missing)
+#' )
+#' ) %>%
+#' build()
+#'
+#' # In a shift layer
+#' tplyr_table(mtcars, am) %>%
+#' add_layer(
+#' group_shift(vars(row=gear, column=carb), by=cyl) %>%
+#' set_format_strings(f_str("xxx (xx.xx%)", n, pct))
+#' ) %>%
+#' build()
+#'
+set_format_strings <- function(e, ...) {
+ UseMethod("set_format_strings")
+}
+
+
+#' Desc layer S3 method for set_format_strings
+#'
+#' @param e Layer on which to bind format strings
+#' @param ... Named parameters containing calls to \code{f_str} to set the
+#' format strings
+#' @param cap A named character vector containing an 'int' element for the cap
+#' on integer precision, and a 'dec' element for the cap on decimal precision.
+#'
+#' @return tplyr_layer object with formats attached
+#' @export
+#'
+#' @rdname set_format_strings
+set_format_strings.desc_layer <- function(e, ..., cap=getOption('tplyr.precision_cap')) {
+
+ # Catch the arguments from the function call so useful errors can be thrown
+ check <- enquos(...)
+
+ # Make sure that all of the attachments were `f_str` objects
+ for (i in seq_along(check)) {
+
+ if (is_named(check)) {
+ msg = paste0("In `set_format_string` entry `",names(check)[[i]],"` is not an `f_str` object. All assignmentes made within",
+ " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.")
+ } else {
+ msg = paste0("In `set_format_string` entry ",i," is not an `f_str` object. All assignmentes made within",
+ " `set_format_string` must be made using the function `f_str`. See the `f_str` documentation.")
+ }
+
+ assert_that(class(quo_get_expr(check[[i]])) == "f_str" || (is_call(quo_get_expr(check[[i]])) && call_name(check[[i]]) == "f_str"),
+ msg = msg)
+ }
+
+ # Row labels are pulled from names - so make sure that everything is named
+ assert_that(is_named(check),
+ msg = "In `set_format_string` all parameters must be named in order to create row labels.")
+
+
+ # Pick off the ellipsis
+ format_strings <- list(...)
+
+
+ # Get the list of variable names that need to be transposed
+ summary_vars <- flatten(map(format_strings, ~ .x$vars))
+
+ # Get the list of transpose transpose variables needed
+ trans_vars <- map(format_strings, ~ .x$vars[[1]])
+
+ # Get the variable names that need to be kept on the same row
+ keep_vars <- flatten(map(format_strings, ~ tail(.x$vars, n=length(.x$vars) -1)))
+
+ # Get the max format length
+ max_format_length <- max(map_int(format_strings, ~ .x$size))
+
+ # Pick off the row labels
+ row_labels <- names(format_strings)
+
+ # Identify if auto precision is needed
+ need_prec_table <- any(map_lgl(format_strings, ~ .x$auto_precision))
+
+ # Fill in defaults if cap hasn't fully been provided
+ if (!('int' %in% names(cap))) cap['int'] <- getOption('tplyr.precision_cap')['int']
+ if (!('dec' %in% names(cap))) cap['dec'] <- getOption('tplyr.precision_cap')['dec']
+
+ env_bind(e,
+ format_strings = format_strings,
+ summary_vars = vars(!!!summary_vars),
+ keep_vars = vars(!!!keep_vars),
+ trans_vars = vars(!!!trans_vars),
+ row_labels = row_labels,
+ max_length = max_format_length,
+ need_prec_table = need_prec_table,
+ cap = cap
+ )
+ e
+}
+
+#' Set Count Layer String Format
+#'
+#' @param e Layer on which to bind format strings
+#' @param ... Named parameters containing calls to \code{f_str} to set the format strings
+#'
+#' @return Returns the modified layer object.
+#' @export
+#' @rdname set_format_strings
+set_format_strings.count_layer <- function(e, ...) {
+
+ params <- count_f_str_check(...)
+
+ env_bind(e, format_strings = params)
+
+ e
+}
+
+set_format_strings.shift_layer <- function(e, ...) {
+
+ dots <- list(...)
+
+ assert_that(all(dots$vars %in% c("n", "pct")),
+ msg = "formats in shift layers can only be n")
+
+ env_bind(e, format_strings = dots[[1]])
+
+ e
+}
+
+#' Extract a translation vector for f_str objects
+#'
+#' The names of the format_strings list should be row labels in the output. The first
+#' element of the \code{vars} object are the transpose variables, so make the names of
+#' those variables the vector names, and the names of the format_strings elements the
+#' values to allow easy creation of a \code{row_labels} variable in the data
+#'
+#' @param fmt_strings The \code{format_strings} varaible in a layer
+#'
+#' @return A named character vector with the flipping applied
+#' @noRd
+name_translator <- function(fmt_strings) {
+ out <- names(fmt_strings)
+ names(out) <- map_chr(fmt_strings, ~ as_name(.x$vars[[1]]))
+ out
+}
+
+#' Check if format strings have been applied to a layer
+#'
+#' @param e Layer environment
+#'
+#' @return Boolean
+#' @noRd
+has_format_strings <- function(e) {
+ 'format_strings' %in% ls(envir=e)
+}
diff --git a/R/shift.R b/R/shift.R
index e46d9cc1..a6a38d7b 100644
--- a/R/shift.R
+++ b/R/shift.R
@@ -81,17 +81,19 @@ prepare_format_metadata.shift_layer <- function(x) {
evalq({
-
# Pull max character length from counts. Should be at least 1
n_width <- max(c(nchar(numeric_data$n), 1L))
# If a layer_width flag is present, edit the formatting string to display the maximum
# character length
- if(str_detect(format_strings$format_string, "a")) {
+ if(str_detect(format_strings$format_string, "a|A")) {
# Replace the flag with however many xs
replaced_string <- str_replace(format_strings$format_string, "a",
paste(rep("x", n_width), collapse = ""))
+ replaced_string <- str_replace(replaced_string, "A",
+ paste(rep("X", n_width), collapse = ""))
+
# Make a new f_str and replace the old one
format_strings <- f_str(replaced_string, !!!format_strings$vars)
}
diff --git a/R/str_extractors.R b/R/str_extractors.R
new file mode 100644
index 00000000..cdb73535
--- /dev/null
+++ b/R/str_extractors.R
@@ -0,0 +1,78 @@
+#' Extract format group strings or numbers
+#'
+#' These functions allow you to extract segments of information from within a
+#' result string by targetting specific format groups. `str_extract_fmt_group()`
+#' allows you to pull out the individual format group string, while
+#' `str_extract_num()` allows you to pull out that specific numeric result.
+#'
+#' Format groups refer to individual segments of a string. For example, given
+#' the string ' 5 (34.4%) \[9]', there are three separate format groups, which
+#' are ' 5', '(34.4%)', and '\[9]'.
+#'
+#' @param string A string of number results from which to extract format groups
+#' @param format_group An integer representing format group that should be
+#' extracted
+#'
+#' @family String extractors
+#' @rdname str_extractors
+#'
+#' @return A character vector
+#' @export
+#' @md
+#'
+#' @examples
+#'
+#' string <- c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)")
+#'
+#' str_extract_fmt_group(string, 2)
+#'
+#' str_extract_num(string, 2)
+#'
+str_extract_fmt_group <- function(string, format_group) {
+
+ if (!inherits(string, "character")) {
+ stop("Paramter `string` must be a character vector", call.=FALSE)
+ }
+
+ if (!inherits(format_group, "numeric") || (inherits(format_group, "numeric") && format_group %% 1 != 0)) {
+ stop("Paramter `format_group` must be an integer", call.=FALSE)
+ }
+
+ # Pull out regex to drive the work
+ f_grp_rx <- get_format_group_regex()
+
+ # Pull out all the match groups and then get the numeric for the conditional number
+ match_groups <- str_extract_all(string, f_grp_rx)
+
+ # Get string broken out from groups
+ map_chr(
+ match_groups,
+ ~ if (length(.) < format_group) {NA_character_} else {.[format_group]}
+ )
+}
+
+#' @family String extractors
+#' @rdname str_extractors
+#' @export
+str_extract_num <- function(string, format_group) {
+
+ if (!inherits(string, "character")) {
+ stop("Paramter `string` must be a character vector", call.=FALSE)
+ }
+
+ if (!inherits(format_group, "numeric") || (inherits(format_group, "numeric") && format_group %% 1 != 0)) {
+ stop("Paramter `format_group` must be an integer", call.=FALSE)
+ }
+
+ # Pull out regex to drive the work
+ f_grp_rx <- get_format_group_regex()
+
+ # Pull out all the match groups and then get the numeric for the conditional number
+ match_groups <- str_match_all(string, f_grp_rx)
+
+ # Get the number upon which the condition will be evaluated
+ map_dbl(
+ match_groups,
+ ~ if (nrow(.) < format_group) {NA_real_} else {as.double(.[format_group, 2])}
+ )
+}
diff --git a/R/zzz.R b/R/zzz.R
index 5a3e6437..836ea526 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -4,7 +4,7 @@
#' @importFrom rlang quos quo env_names env_bind_active as_label eval_tidy warn quo_is_call
#' @importFrom stringr str_split str_extract_all regex str_detect str_replace_all str_replace str_locate_all fixed str_count str_trim str_wrap
#' @importFrom purrr flatten map map_lgl pmap_chr imap reduce map_chr map_int map_dbl map_dfr pmap_dfr walk2 map2 map2_dfr walk
-#' @importFrom stringr str_sub str_extract str_pad str_starts str_remove_all str_match_all
+#' @importFrom stringr str_sub str_sub<- str_extract str_pad str_starts str_remove_all str_match_all
#' @importFrom tidyr pivot_longer pivot_wider replace_na
#' @importFrom magrittr %>% extract extract2
#' @importFrom assertthat assert_that
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 58f54aee..b638c118 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -98,12 +98,22 @@ reference:
- get_metadata
- append_metadata
- starts_with('get_meta')
-- title: Helper functions
- desc: General helper functions
+- title: Templates
+ desc: Layer Templates
+- contents:
+ - new_layer_template
+- title: Post-pocessing
+ desc: Post-pocessing functions
- contents:
- - apply_formats
- str_indent_wrap
- apply_row_masks
+ - apply_conditional_format
+ - str_extract_fmt_group
+ - str_extract_num
+ - apply_formats
+- title: Helper functions
+ desc: General helper functions
+- contents:
- get_numeric_data
- get_stats_data
- get_by
@@ -111,27 +121,25 @@ reference:
- treat_var
- get_where.tplyr_layer
- Tplyr
- - new_layer_template
+ - get_tplyr_regex
articles:
-- title: Table Vignettes
- navbar: Table Vignettes
+- title: Table Basics
+ navbar: Table Basics
contents:
- table
-- title: Layer Vignettes
- navbar: Layer Vignettes
+- title: Layer Basics
+ navbar: Layer Basics
contents:
- desc
- count
- shift
-- title: Helper Vignettes
- navbar: Helpers
+- title: Table Customization
+ navbar: Table Customization
contents:
- - riskdiff
+ - general_string_formatting
+ - desc_layer_formatting
- sort
- - options
- - layer_templates
- - styled-table
- denom
- Tplyr
- title: Using Metadata
@@ -139,4 +147,15 @@ articles:
contents:
- metadata
- custom-metadata
+- title: Advanced
+ navbar: Advanced
+ contents:
+ - riskdiff
+ - options
+ - layer_templates
+- title: Post-processing
+ navbar: Post-processing
+ contents:
+ - post_processing
+ - styled-table
diff --git a/docs/404.html b/docs/404.html
index 7c82bc7d..180edbe6 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -13,10 +13,11 @@
-
-
+
+
-
+
+
@@ -41,7 +42,7 @@
Tplyr
- 1.0.1.9000
+ 1.1.0
+
An additional option for formatting the numbers above would be using
+‘parenthesis hugging’. To trigger this, on the integer side of a number
+use a capital ‘X’ or a capital ‘A’. For example:
As can be seen above, when using parenthesis hugging, the width of a
+specified format group is preserved, but the preceding character (or
+characters) to the left of the ‘X’ or ‘A’ is pulled to the right to
+‘hug’ the specified number.
+searching for how to create a total column. Tplyr
+allows you to do this as well with the function
+add_total_group() and read more in
+vignette("table").
A lot of the nuance to formatting descriptive statistics layers has
-already been covered above, but there are a couple more tricks to
-getting the most out of Tplyr. One of these tricks is
-filling empty values.
-
By default, if there is no available value for a summary in a
-particular observation, the result being presented will be blanked
-out.
-
Note:Tplyrgenerally respects factor levels - so
-in instances of a missing row or column group, if the factor level is
-present, then the variable or row will still generate)
Note how the entire example above has all records in
-var1_Placebo missing. Tplyr gives you
-control over how you fill this space. Let’s say that we wanted instead
-to make that space say “Missing”. You can control this with the
-f_str() object using the empty parameter.
Look at the empty parameter above. Here, we use a named
-character vector, where the name is .overall. When this
-name is used, if all elements within the cell are missing, they will be
-filled with the specified text. Otherwise, the provided string will fill
-just the missing parameter. In some cases, this may not be what you’d
-like to see. Perhaps we want a string that fills each missing space.
In the example above, instead of filling the whole space, the
-empty text of “NA” replaces the empty value for each
-element. So for ‘Mean (SD)’, we now have ‘NA ( NA)’. Note that the
-proper padding was still used for ‘NA’ to make sure the parentheses
-still align with populated records.
-
-
Auto Precision
-
-
You may have noticed that the approach to formatting covered so far
-leaves a lot to be desired. Consider analyzing lab results, where you
-may want precision to vary based on the collected precision of the
-tests. Furthermore, depending on the summary being presented, you may
-wish to increase the precision further. For example, you may want the
-mean to be at collected precision +1 decimal place, and for standard
-deviation +2.
-
Tplyr has this covered using auto-precision.
-Auto-precision allows you to format your numeric summaries based on the
-precision of the data collected. This has all been built into the format
-strings, because a natural place to specify your desired format is where
-you specify how you want your data presented. If you wish to use
-auto-precision, use a instead of x when
-creating your summaries. Note that only one a is needed on
-each side of a decimal. To use increased precision, use a+n
-where n is the number of additional spaces you wish to
-add.
As you can see, the decimal precision is now varying depending on the
-test being performed. Notice that both the integer and the decimal side
-of each number fluctuate as well. Tplyr collects both
-the integer and decimal precision, and you can specify both separately.
-For example, you could use x’s to specify a default number
-of spaces for your integers that are used consistently across by
-variables, but vary the decimal precision based on collected data. You
-can also increment the number of spaces for both integer and decimal
-separately.
-
But - this is kind of ugly, isn’t it? Do we really need all 6 decimal
-places collected for CA? For this reason, you’re able to set a cap on
-the precision that’s displayed:
Now that looks better. The cap argument is part of
-set_format_strings(). You need to specify the integer and
-decimal caps separately. Note that integer precision works slightly
-differently than decimal precision. Integer precision relates to the
-length allotted for the left side of a decimal, but integers will not
-truncate. When using ‘x’ formatting, if an integer exceeds the set
-length, it will push the number over. If the integer side of
-auto-precision is not capped, the necessary length for an integer in the
-associated by group will be as long as necessary. Decimals, on the other
-hand, round to the specified length. These caps apply to the length
-allotted for the “a” on either the integer or the decimal. So for
-example, if the decimal length is capped at 2 and the selected precision
-is “a+1”, then 3 decimal places will be allotted.
-
This was a basic situation, but if you’re paying close attention, you
-may have some questions. What if you have more by variables, like by
-visit AND test. Do we then calculate precision by visit and test? What
-if collected precision is different per visit and we don’t want that?
-What about multiple summary variables? How do we determine precision
-then? We have modifier functions for this:
Three variables are being summarized here - AVAL, CHG, and BASE. So
-which should be used for precision? set_precision_on()
-allows you to specify this, where the precision_on()
-variable must be one of the variables within target_var.
-Similarly, set_precision_by() changes the by
-variables used to determine collected precision. If no
-precision_on() variable is specified, the first variable in
-target_var is used. If no precision_by
-variables are specified, then the default by variables are
-used.
-
-
-
External Precision
-
-
Lastly, while dynamic precision might be what you’re looking for, you
-may not want precision driven by the data. Perhaps there’s a company
-standard that dictates what decimal precision should be used for each
-separate lab test. Maybe even deeper down to the lab test and category.
-New in Tplyr 1.0.0 we’ve added the ability to take
-decimal precision from an external source.
-
The principal of external precision is exactly the same as
-auto-precision. The only difference is that you - the user - provide the
-precision table that Tplyr was automatically
-calculating in the background. This is done using the new function
-set_precision_data(). In the output below, Notice how the
-precision by PARAMCD varies depending on what was specified in the data
-frame prec_data.
In this example, we targetted the count layers by their layer index
+(in ord_layer_index) and used dplyr::across()
+to target the result columns. Within
+apply_conditional_format() where saying take the string,
+look at the second number, and if it’s equal to 0 then replace that
+entire string segment with a blank. For more information on what the
+apply_conditional_format() function, see the function
+documentation. For now, we’ll leave this data frame unedited for the
+rest of the example.
Sorting, Column Ordering, Column Headers, and Clean-up
Now that we have our data, let’s make sure it’s in the right order.
Additionally, let’s clean the data up so it’s ready to present.
diff --git a/docs/reference/set_total_row_label.html b/docs/reference/set_total_row_label.html
index f279da3d..53c25a33 100644
--- a/docs/reference/set_total_row_label.html
+++ b/docs/reference/set_total_row_label.html
@@ -1,6 +1,6 @@
Set the label for the total row — set_total_row_label • TplyrSet the label for the total row — set_total_row_label • TplyrTplyr
- 1.0.1.9000
+ 1.1.0
@@ -41,25 +41,32 @@
diff --git a/docs/reference/table_format_defaults.html b/docs/reference/table_format_defaults.html
index fd7e0d46..d330e127 100644
--- a/docs/reference/table_format_defaults.html
+++ b/docs/reference/table_format_defaults.html
@@ -3,7 +3,7 @@
strings. You may wish to reuse the same format strings across numerous
layers. set_desc_layer_formats and set_count_layer_formats
allow you to apply your desired format strings within the entire scope of the
-table.">Get or set the default format strings for descriptive statistics layers — get_desc_layer_formats • TplyrGet or set the default format strings for descriptive statistics layers — get_desc_layer_formats • TplyrTplyr
- 1.0.1.9000
+ 1.1.0
@@ -41,25 +41,32 @@