Skip to content

Commit

Permalink
#164 - Made varying review changes requested and updated tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
sophie-gem committed Feb 29, 2024
1 parent ae783b5 commit ddbead1
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 142 deletions.
97 changes: 31 additions & 66 deletions R/format.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,25 @@
#' @section Format Checks: This function carries out a series of basic
#' checks to ensure the formats being applied make sense.
#'
#' Note, the 'type' of message that is generated will depend on the value
#' passed to the `verbose` argument: with 'stop' producing an error, 'warn'
#' producing a warning, or 'message' producing a message. A value of 'none'
#' will not output any messages.
#'
#' 1) If the variable has a suffix of `DT`, `DTM`, `TM` (indicating a
#' numeric date/time variable) then a warning will be shown if there is
#' numeric date/time variable) then a message will be shown if there is
#' no format associated with it.
#'
#' 2) If a variable is character then a warning will be shown if there is
#' 2) If a variable is character then a message will be shown if there is
#' no `$` prefix in the associated format.
#'
#' 3) If a variable is character then a warning will be shown if the
#' 3) If a variable is character then a message will be shown if the
#' associated format has greater than 31 characters (excluding the `$`).
#'
#' 4) If a variable is numeric then a warning will be shown if there is a
#' 4) If a variable is numeric then a message will be shown if there is a
#' `$` prefix in the associated format.
#'
#' 5) If a variable is numeric then a warning will be shown if the
#' 5) If a variable is numeric then a message will be shown if the
#' associated format has greater than 32 characters.
#'
#' 6) All formats will be checked against a list of formats considered
Expand Down Expand Up @@ -97,6 +102,7 @@
xportr_format <- function(.df,
metadata = NULL,
domain = NULL,
verbose = NULL,
metacore = deprecated()) {
if (!missing(metacore)) {
lifecycle::deprecate_stop(
Expand All @@ -113,11 +119,18 @@ xportr_format <- function(.df,

metadata <- metadata %||% attr(.df, "_xportr.df_metadata_")

# Verbose should use an explicit verbose option first, then the value set in
# metadata, and finally fall back to the option value
verbose <- verbose %||%
attr(.df, "_xportr.df_verbose_") %||%
getOption("xportr.length_verbose", "none")

## End of common section

assert_data_frame(.df)
assert_string(domain, null.ok = TRUE)
assert_metadata(metadata)
assert_choice(verbose, choices = .internal_verbose_choices)

domain_name <- getOption("xportr.domain_name")
format_name <- getOption("xportr.format_name")
Expand Down Expand Up @@ -147,58 +160,10 @@ xportr_format <- function(.df,
# https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/leforinforref
# /n0p2fmevfgj470n17h4k9f27qjag.htm#n0wi06aq4kydlxn1uqc0p6eygu75

expected_formats <- c(
NA,
"",
paste("$", 1:200, ".", sep = ""),
paste("date", 5:11, ".", sep = ""),
paste("time", 2:20, ".", sep = ""),
paste("datetime", 7:40, ".", sep = ""),
paste("yymmdd", 2:10, ".", sep = ""),
paste("mmddyy", 2:10, ".", sep = ""),
paste("ddmmyy", 2:10, ".", sep = ""),
"E8601DA.",
"E8601DA10.",
"E8601DN.",
"E8601DN10.",
"E8601TM.",
paste("E8601TM", 8:15, ".", sep = ""),
paste("E8601TM", 8:15, ".", sort(rep(0:6, 8)), sep = ""),
"E8601TZ.",
paste("E8601TZ", 9:20, ".", sep = ""),
paste("E8601TZ", 9:20, ".", sort(rep(0:6, 12)), sep = ""),
"E8601TX.",
paste("E8601TX", 9:20, ".", sep = ""),
"E8601DT.",
paste("E8601DT", 16:26, ".", sep = ""),
paste("E8601DT", 16:26, ".", sort(rep(0:6, 11)), sep = ""),
"E8601LX.",
paste("E8601LX", 20:35, ".", sep = ""),
"E8601LZ.",
paste("E8601LZ", 9:20, ".", sep = ""),
"E8601DX.",
paste("E8601DX", 20:35, ".", sep = ""),
"B8601DT.",
paste("B8601DT", 15:26, ".", sep = ""),
paste("B8601DT", 15:26, ".", sort(rep(0:6, 12)), sep = ""),
"IS8601DA.",
"B8601DA.",
paste("B8601DA", 8:10, ".", sep = ""),
"weekdate.",
paste("weekdate", 3:37, ".", sep = ""),
"mmddyy.",
"ddmmyy.",
"yymmdd.",
"date.",
"time.",
"hhmm.",
"IS8601TM.",
"E8601TM.",
"B8601TM."
)
expected_formats <- .internal_format_list

# w.d format for numeric variables
format_regex <- "^([1-9]|[12][0-9]|3[0-2])\\.$|^([1-9]|[12][0-9]|3[0-2])\\.([1-9]|[12][0-9]|3[0-1])$"
format_regex <- .internal_format_regex


for (i in seq_len(ncol(.df))) {
Expand All @@ -209,56 +174,56 @@ xportr_format <- function(.df,
# series of checks for formats

# check that any variables ending DT, DTM, TM have a format
if (grepl("DT$|DTM$|TM$", colnames(.df)[i]) == TRUE && format_sas == "") {
if (isTRUE(grepl("DT$|DTM$|TM$", colnames(.df)[i])) && format_sas == "") {
message <- glue(
"(xportr::xportr_format) {encode_vars(colnames(.df)[i])} is expected to have a format but does not."
)
xportr_logger(message, type = "warn")
xportr_logger(message, type = verbose)
}

# remaining checks to be carried out if a format exists
if (format_sas != "") {
# if the variable is character
if (class(.df[[i]])[1] == "character") {
# character variable formats should start with a $
if (grepl("^\\$", format_sas) == FALSE) {
if (isFALSE(grepl("^\\$", format_sas))) {
message <- glue(
"(xportr::xportr_format)",
" {encode_vars(colnames(.df)[i])} is a character variable",
" and should have a `$` prefix."
)
xportr_logger(message, type = "warn")
xportr_logger(message, type = verbose)
}
# character variable formats should have length <= 31 (excluding the $)
if (nchar(gsub(".$", "", format_sas)) > 32) {
if (nchar(gsub("\\.$", "", format_sas)) > 32) {
message <- glue(
"(xportr::xportr_format)",
" Format for character variable {encode_vars(colnames(.df)[i])}",
" should have length <= 31 (excluding `$`)."
)
xportr_logger(message, type = "warn")
xportr_logger(message, type = verbose)
}
}

# if the variable is numeric
if (class(.df[[i]])[1] == "numeric") {
# numeric variables should not start with a $
if (grepl("^\\$", format_sas) == TRUE) {
if (isTRUE(grepl("^\\$", format_sas))) {
message <- glue(
"(xportr::xportr_format)",
" {encode_vars(colnames(.df)[i])} is a numeric variable and",
" should not have a `$` prefix."
)
xportr_logger(message, type = "warn")
xportr_logger(message, type = verbose)
}
# numeric variable formats should have length <= 32
if (nchar(gsub(".$", "", format_sas)) > 32) {
if (nchar(gsub("\\.$", "", format_sas)) > 32) {
message <- glue(
"(xportr::xportr_format)",
" Format for numeric variable {encode_vars(colnames(.df)[i])}",
" should have length <= 32."
)
xportr_logger(message, type = "warn")
xportr_logger(message, type = verbose)
}
}

Expand All @@ -272,7 +237,7 @@ xportr_format <- function(.df,
" Check format {encode_vars(format_sas)} for variable {encode_vars(colnames(.df)[i])}",
" - is this correct?"
)
xportr_logger(message, type = "message")
xportr_logger(message, type = verbose)
}
}

Expand Down
112 changes: 62 additions & 50 deletions R/utils-xportr.R
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,66 @@ xpt_validate_var_names <- function(varnames,
return(err_cnd)
}

#' Internal list of formats to check
#' @noRd
.internal_format_list <- c(
NA,
"",
paste("$", 1:200, ".", sep = ""),
paste("date", 5:11, ".", sep = ""),
paste("time", 2:20, ".", sep = ""),
paste("datetime", 7:40, ".", sep = ""),
paste("yymmdd", 2:10, ".", sep = ""),
paste("mmddyy", 2:10, ".", sep = ""),
paste("ddmmyy", 2:10, ".", sep = ""),
"E8601DA.",
"E8601DA10.",
"E8601DN.",
"E8601DN10.",
"E8601TM.",
paste("E8601TM", 8:15, ".", sep = ""),
paste("E8601TM", 8:15, ".", sort(rep(0:6, 8)), sep = ""),
"E8601TZ.",
paste("E8601TZ", 9:20, ".", sep = ""),
paste("E8601TZ", 9:20, ".", sort(rep(0:6, 12)), sep = ""),
"E8601TX.",
paste("E8601TX", 9:20, ".", sep = ""),
"E8601DT.",
paste("E8601DT", 16:26, ".", sep = ""),
paste("E8601DT", 16:26, ".", sort(rep(0:6, 11)), sep = ""),
"E8601LX.",
paste("E8601LX", 20:35, ".", sep = ""),
"E8601LZ.",
paste("E8601LZ", 9:20, ".", sep = ""),
"E8601DX.",
paste("E8601DX", 20:35, ".", sep = ""),
"B8601DT.",
paste("B8601DT", 15:26, ".", sep = ""),
paste("B8601DT", 15:26, ".", sort(rep(0:6, 12)), sep = ""),
"IS8601DA.",
"B8601DA.",
paste("B8601DA", 8:10, ".", sep = ""),
"weekdate.",
paste("weekdate", 3:37, ".", sep = ""),
"mmddyy.",
"ddmmyy.",
"yymmdd.",
"date.",
"time.",
"hhmm.",
"IS8601TM.",
"E8601TM.",
"B8601TM."
)

#' Internal regex for format w.d
#' @noRd
.internal_format_regex <- paste(
sep = "|",
"^([1-9]|[12][0-9]|3[0-2])\\.$",
"^([1-9]|[12][0-9]|3[0-2])\\.([1-9]|[12][0-9]|3[0-1])$"
)

#' Validate Dataset Can be Written to xpt
#'
#' Function used to validate dataframes before they are sent to
Expand Down Expand Up @@ -222,57 +282,9 @@ xpt_validate <- function(data) {

## The usual expected formats in clinical trials: characters, dates
# Formats: https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/leforinforref/n0zwce550r32van1fdd5yoixrk4d.htm
expected_formats <- c(
NA,
"",
paste("$", 1:200, ".", sep = ""),
paste("date", 5:11, ".", sep = ""),
paste("time", 2:20, ".", sep = ""),
paste("datetime", 7:40, ".", sep = ""),
paste("yymmdd", 2:10, ".", sep = ""),
paste("mmddyy", 2:10, ".", sep = ""),
paste("ddmmyy", 2:10, ".", sep = ""),
"E8601DA.",
"E8601DA10.",
"E8601DN.",
"E8601DN10.",
"E8601TM.",
paste("E8601TM", 8:15, ".", sep = ""),
paste("E8601TM", 8:15, ".", sort(rep(0:6, 8)), sep = ""),
"E8601TZ.",
paste("E8601TZ", 9:20, ".", sep = ""),
paste("E8601TZ", 9:20, ".", sort(rep(0:6, 12)), sep = ""),
"E8601TX.",
paste("E8601TX", 9:20, ".", sep = ""),
"E8601DT.",
paste("E8601DT", 16:26, ".", sep = ""),
paste("E8601DT", 16:26, ".", sort(rep(0:6, 11)), sep = ""),
"E8601LX.",
paste("E8601LX", 20:35, ".", sep = ""),
"E8601LZ.",
paste("E8601LZ", 9:20, ".", sep = ""),
"E8601DX.",
paste("E8601DX", 20:35, ".", sep = ""),
"B8601DT.",
paste("B8601DT", 15:26, ".", sep = ""),
paste("B8601DT", 15:26, ".", sort(rep(0:6, 12)), sep = ""),
"IS8601DA.",
"B8601DA.",
paste("B8601DA", 8:10, ".", sep = ""),
"weekdate.",
paste("weekdate", 3:37, ".", sep = ""),
"mmddyy.",
"ddmmyy.",
"yymmdd.",
"date.",
"time.",
"hhmm.",
"IS8601TM.",
"E8601TM.",
"B8601TM."
)
format_regex <- "^([1-9]|[12][0-9]|3[0-2])\\.$|^([1-9]|[12][0-9]|3[0-2])\\.([1-9]|[12][0-9]|3[0-1])$"
expected_formats <- .internal_format_list

format_regex <- .internal_format_regex

# 3.1 Invalid types
is_valid <- toupper(formats) %in% toupper(expected_formats) |
Expand Down
27 changes: 21 additions & 6 deletions man/xportr_format.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ddbead1

Please sign in to comment.