diff --git a/.lintr b/.lintr index 80754030..37b9c939 100644 --- a/.lintr +++ b/.lintr @@ -1,4 +1,5 @@ linters: linters_with_defaults( + cyclocomp_linter(complexity_limit = 18), line_length_linter(120), object_usage_linter = NULL, object_name_linter = NULL, diff --git a/NEWS.md b/NEWS.md index 6dbba2df..3021dfdb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,8 @@ * New argument in `xportr_length()` allows selection between the length from metadata, as previously done, or from the calculated maximum length per variable when `length_source` is set to “data” (#91) +* `xportr_length()` assigns the maximum length value instead of 200 for a character variable when the length is missing in the metadata (#207) + ## Deprecation and Breaking Changes * The `domain` argument for xportr functions will no longer be dynamically diff --git a/R/length.R b/R/length.R index d87fe2b2..5a6092c6 100644 --- a/R/length.R +++ b/R/length.R @@ -2,7 +2,7 @@ #' #' Assigns the SAS length to a specified data frame, either from a metadata object #' or based on the calculated maximum data length. If a length isn't present for -#' a variable the length value is set to 200 for character columns, and 8 +#' a variable the length value is set to maximum data length for character columns, and 8 #' for non-character columns. This value is stored in the 'width' attribute of the column. #' #' @inheritParams xportr @@ -118,37 +118,51 @@ xportr_length <- function(.df, check_multiple_var_specs(metadata, variable_name) } + # Get max length for missing length and when length_source == "data" + var_length_max <- variable_max_length(.df) + + length_data <- var_length_max[[variable_length]] + names(length_data) <- var_length_max[[variable_name]] + # Check any variables missed in metadata but present in input data --- miss_vars <- setdiff(names(.df), metadata[[variable_name]]) - length_log(miss_vars, verbose) - + miss_length <- as.character() if (length_source == "metadata") { length_metadata <- metadata[[variable_length]] names(length_metadata) <- metadata[[variable_name]] + # Check any variables with missing length in metadata + miss_length <- names(length_metadata[is.na(length_metadata)]) + for (i in names(.df)) { if (i %in% miss_vars) { - attr(.df[[i]], "width") <- impute_length(.df[[i]]) + attr(.df[[i]], "width") <- length_data[[i]] + } else if (is.na(length_metadata[[i]])) { + attr(.df[[i]], "width") <- length_data[[i]] } else { attr(.df[[i]], "width") <- length_metadata[[i]] } } } + # Message for missing var and missing length + length_log(miss_vars, miss_length, verbose) + # Assign length from data if (length_source == "data") { - var_length_max <- variable_max_length(.df) - - length_data <- var_length_max[[variable_length]] - names(length_data) <- var_length_max[[variable_name]] - for (i in names(.df)) { attr(.df[[i]], "width") <- length_data[[i]] } - length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) %>% - filter(length.x < length.y) + length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) + length_msg <- length_msg %>% + mutate( + length_df = as.numeric(length_msg[[paste0(variable_length, ".x")]]), + length_meta = as.numeric(length_msg[[paste0(variable_length, ".y")]]) + ) %>% + filter(length_df < length_meta) %>% + select(variable_name, length_df, length_meta) max_length_msg(length_msg, verbose) } @@ -156,13 +170,3 @@ xportr_length <- function(.df, .df } - -impute_length <- function(col) { - characterTypes <- getOption("xportr.character_types") - # first_class will collapse to character if it is the option - if (first_class(col) %in% "character") { - 200 - } else { - 8 - } -} diff --git a/R/messages.R b/R/messages.R index e85d6875..e7d49d64 100644 --- a/R/messages.R +++ b/R/messages.R @@ -102,17 +102,19 @@ type_log <- function(meta_ordered, type_mismatch_ind, verbose) { #' Utility for Lengths #' #' @param miss_vars Variables missing from metadata +#' @param miss_length Variables with missing length in metadata #' @param verbose Provides additional messaging for user #' #' @return Output to Console #' @noRd -length_log <- function(miss_vars, verbose) { +length_log <- function(miss_vars, miss_length, verbose) { assert_character(miss_vars) + assert_character(miss_length) assert_choice(verbose, choices = .internal_verbose_choices) - if (length(miss_vars) > 0) { + if (length(c(miss_vars, miss_length)) > 0) { cli_h2("Variable lengths missing from metadata.") - cli_alert_success("{ length(miss_vars) } lengths resolved") + cli_alert_success("{ length(c(miss_vars, miss_length)) } lengths resolved {encode_vars(c(miss_vars, miss_length))}") xportr_logger( glue( diff --git a/man/xportr-package.Rd b/man/xportr-package.Rd index 2f38a255..bcd0521d 100644 --- a/man/xportr-package.Rd +++ b/man/xportr-package.Rd @@ -85,7 +85,8 @@ coerce R classes to numeric XPT types. Default: c("integer", "numeric", "num", " } \item{ xportr.numeric_types - The default character vector used to explicitly -coerce R classes to numeric XPT types. Default: c("integer", "float", "numeric", "posixct", "posixt", "time", "date") +coerce R classes to numeric XPT types. Default: c("integer", "float", +"numeric", "posixct", "posixt", "time", "date") } } } diff --git a/man/xportr_length.Rd b/man/xportr_length.Rd index 93425808..c3180a71 100644 --- a/man/xportr_length.Rd +++ b/man/xportr_length.Rd @@ -43,7 +43,7 @@ Data frame with SAS default length attributes for each variable. \description{ Assigns the SAS length to a specified data frame, either from a metadata object or based on the calculated maximum data length. If a length isn't present for -a variable the length value is set to 200 for character columns, and 8 +a variable the length value is set to maximum data length for character columns, and 8 for non-character columns. This value is stored in the 'width' attribute of the column. } \section{Messaging}{ diff --git a/tests/testthat/test-length.R b/tests/testthat/test-length.R index 3b294f55..12fce410 100644 --- a/tests/testthat/test-length.R +++ b/tests/testthat/test-length.R @@ -163,17 +163,6 @@ test_that("xportr_length: Domain not in character format", { ) }) -test_that("xportr_length: Column length of known/unkown character types is 200/8 ", { - expect_equal(impute_length(123), 8) - expect_equal(impute_length(123L), 8) - expect_equal(impute_length("string"), 200) - expect_equal(impute_length(Sys.Date()), 8) - expect_equal(impute_length(Sys.time()), 8) - - local_options(xportr.character_types = c("character", "date")) - expect_equal(impute_length(Sys.time()), 8) -}) - test_that("xportr_length: error when metadata is not set", { adsl <- minimal_table(30) diff --git a/tests/testthat/test-messages.R b/tests/testthat/test-messages.R index 2055914e..1da3e004 100644 --- a/tests/testthat/test-messages.R +++ b/tests/testthat/test-messages.R @@ -22,9 +22,9 @@ test_that("length_log: Missing lengths messages are shown", { # Remove empty lines in cli theme local_cli_theme() - length_log(c("var1", "var2", "var3"), "message") %>% + length_log(c("var1", "var2", "var3"), c("var4"), "message") %>% expect_message("Variable lengths missing from metadata.") %>% - expect_message("lengths resolved") %>% + expect_message("lengths resolved `var1`.*`var2`.*`var3`.*`var4`") %>% expect_message("Problem with `var1`.*`var2`.*`var3`") }) diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index b97c895b..9c9a4d08 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -513,12 +513,12 @@ test_that("xportr_length: Check if length gets imputed when a new variable is pa xportr_length(df, df_meta, domain = "df") ) - # 200 is the imputed length for character and 8 for other data types as in impute_length() - expect_equal(c(x = 1, y = 200, z = 8), map_dbl(df_with_width, attr, "width")) + # Max length is the imputed length for character and 8 for other data types + expect_equal(c(x = 1, y = 1, z = 8), map_dbl(df_with_width, attr, "width")) expect_equal(df_with_width, structure( list( x = structure("a", width = 1), - y = structure("b", width = 200), + y = structure("b", width = 1), z = structure(3, width = 8) ), row.names = c(NA, -1L), `_xportr.df_arg_` = "df", class = "data.frame"