From fdb0f07b4333707d58b42ed0a5f987c28cf684d7 Mon Sep 17 00:00:00 2001 From: Celine Date: Thu, 15 Feb 2024 04:53:15 -0500 Subject: [PATCH 01/11] Get max length for missing length in Metadata iso 200 or 8 --- R/length.R | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/R/length.R b/R/length.R index d87fe2b2..9d985287 100644 --- a/R/length.R +++ b/R/length.R @@ -2,7 +2,7 @@ #' #' Assigns the SAS length to a specified data frame, either from a metadata object #' or based on the calculated maximum data length. If a length isn't present for -#' a variable the length value is set to 200 for character columns, and 8 +#' a variable the length value is set to maximum data length for character columns, and 8 #' for non-character columns. This value is stored in the 'width' attribute of the column. #' #' @inheritParams xportr @@ -118,6 +118,12 @@ xportr_length <- function(.df, check_multiple_var_specs(metadata, variable_name) } + # Get max length for missing length and when length_source == "data" + var_length_max <- variable_max_length(.df) + + length_data <- var_length_max[[variable_length]] + names(length_data) <- var_length_max[[variable_name]] + # Check any variables missed in metadata but present in input data --- miss_vars <- setdiff(names(.df), metadata[[variable_name]]) @@ -129,7 +135,7 @@ xportr_length <- function(.df, for (i in names(.df)) { if (i %in% miss_vars) { - attr(.df[[i]], "width") <- impute_length(.df[[i]]) + attr(.df[[i]], "width") <- length_data[[i]] } else { attr(.df[[i]], "width") <- length_metadata[[i]] } @@ -138,10 +144,6 @@ xportr_length <- function(.df, # Assign length from data if (length_source == "data") { - var_length_max <- variable_max_length(.df) - - length_data <- var_length_max[[variable_length]] - names(length_data) <- var_length_max[[variable_name]] for (i in names(.df)) { attr(.df[[i]], "width") <- length_data[[i]] @@ -157,12 +159,3 @@ xportr_length <- function(.df, .df } -impute_length <- function(col) { - characterTypes <- getOption("xportr.character_types") - # first_class will collapse to character if it is the option - if (first_class(col) %in% "character") { - 200 - } else { - 8 - } -} From 7a8a650185a1e27aa1e2472c8a6f278029639664 Mon Sep 17 00:00:00 2001 From: Celine Date: Thu, 15 Feb 2024 06:14:29 -0500 Subject: [PATCH 02/11] Update length_log message to include variables with missing length in metadata --- R/length.R | 11 +++++++++-- R/messages.R | 8 +++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/R/length.R b/R/length.R index 9d985287..96d02b4a 100644 --- a/R/length.R +++ b/R/length.R @@ -127,21 +127,28 @@ xportr_length <- function(.df, # Check any variables missed in metadata but present in input data --- miss_vars <- setdiff(names(.df), metadata[[variable_name]]) - length_log(miss_vars, verbose) - if (length_source == "metadata") { length_metadata <- metadata[[variable_length]] names(length_metadata) <- metadata[[variable_name]] + # Check any variables with missing length in metadata + miss_length <- names(length_metadata[is.na(length_metadata)]) + for (i in names(.df)) { + if (i %in% miss_vars) { attr(.df[[i]], "width") <- length_data[[i]] + } else if (is.na(length_metadata[[i]])){ + attr(.df[[i]], "width") <- length_data[[i]] } else { attr(.df[[i]], "width") <- length_metadata[[i]] } } } + # Message for missing var and missing length + length_log(miss_vars, miss_length, verbose) + # Assign length from data if (length_source == "data") { diff --git a/R/messages.R b/R/messages.R index e85d6875..e7d49d64 100644 --- a/R/messages.R +++ b/R/messages.R @@ -102,17 +102,19 @@ type_log <- function(meta_ordered, type_mismatch_ind, verbose) { #' Utility for Lengths #' #' @param miss_vars Variables missing from metadata +#' @param miss_length Variables with missing length in metadata #' @param verbose Provides additional messaging for user #' #' @return Output to Console #' @noRd -length_log <- function(miss_vars, verbose) { +length_log <- function(miss_vars, miss_length, verbose) { assert_character(miss_vars) + assert_character(miss_length) assert_choice(verbose, choices = .internal_verbose_choices) - if (length(miss_vars) > 0) { + if (length(c(miss_vars, miss_length)) > 0) { cli_h2("Variable lengths missing from metadata.") - cli_alert_success("{ length(miss_vars) } lengths resolved") + cli_alert_success("{ length(c(miss_vars, miss_length)) } lengths resolved {encode_vars(c(miss_vars, miss_length))}") xportr_logger( glue( From 8067662a9473f436b50ef5ff7d5a9bd482892bde Mon Sep 17 00:00:00 2001 From: Celine Date: Thu, 15 Feb 2024 07:17:12 -0500 Subject: [PATCH 03/11] tests: updated for missing length in metadata --- R/length.R | 1 + tests/testthat/test-length.R | 11 ----------- tests/testthat/test-messages.R | 4 ++-- tests/testthat/test-metadata.R | 6 +++--- 4 files changed, 6 insertions(+), 16 deletions(-) diff --git a/R/length.R b/R/length.R index 96d02b4a..f2581350 100644 --- a/R/length.R +++ b/R/length.R @@ -127,6 +127,7 @@ xportr_length <- function(.df, # Check any variables missed in metadata but present in input data --- miss_vars <- setdiff(names(.df), metadata[[variable_name]]) + miss_length <- as.character() if (length_source == "metadata") { length_metadata <- metadata[[variable_length]] names(length_metadata) <- metadata[[variable_name]] diff --git a/tests/testthat/test-length.R b/tests/testthat/test-length.R index ffdc599a..a6d77024 100644 --- a/tests/testthat/test-length.R +++ b/tests/testthat/test-length.R @@ -163,17 +163,6 @@ test_that("xportr_length: Domain not in character format", { ) }) -test_that("xportr_length: Column length of known/unkown character types is 200/8 ", { - expect_equal(impute_length(123), 8) - expect_equal(impute_length(123L), 8) - expect_equal(impute_length("string"), 200) - expect_equal(impute_length(Sys.Date()), 8) - expect_equal(impute_length(Sys.time()), 8) - - withr::local_options(list(xportr.character_types = c("character", "date"))) - expect_equal(impute_length(Sys.time()), 8) -}) - test_that("xportr_length: error when metadata is not set", { adsl <- minimal_table(30) diff --git a/tests/testthat/test-messages.R b/tests/testthat/test-messages.R index 2055914e..1da3e004 100644 --- a/tests/testthat/test-messages.R +++ b/tests/testthat/test-messages.R @@ -22,9 +22,9 @@ test_that("length_log: Missing lengths messages are shown", { # Remove empty lines in cli theme local_cli_theme() - length_log(c("var1", "var2", "var3"), "message") %>% + length_log(c("var1", "var2", "var3"), c("var4"), "message") %>% expect_message("Variable lengths missing from metadata.") %>% - expect_message("lengths resolved") %>% + expect_message("lengths resolved `var1`.*`var2`.*`var3`.*`var4`") %>% expect_message("Problem with `var1`.*`var2`.*`var3`") }) diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index 29a9aab0..aed50073 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -513,12 +513,12 @@ test_that("xportr_length: Check if length gets imputed when a new variable is pa xportr_length(df, df_meta, domain = "df") ) - # 200 is the imputed length for character and 8 for other data types as in impute_length() - expect_equal(c(x = 1, y = 200, z = 8), map_dbl(df_with_width, attr, "width")) + # Max length is the imputed length for character and 8 for other data types as in impute_length() + expect_equal(c(x = 1, y = 1, z = 8), map_dbl(df_with_width, attr, "width")) expect_equal(df_with_width, structure( list( x = structure("a", width = 1), - y = structure("b", width = 200), + y = structure("b", width = 1), z = structure(3, width = 8) ), row.names = c(NA, -1L), `_xportr.df_arg_` = "df", class = "data.frame" From 16469ed9942bb39f0b7c016e99b90e053b4c4c59 Mon Sep 17 00:00:00 2001 From: Celine Date: Thu, 15 Feb 2024 07:18:34 -0500 Subject: [PATCH 04/11] doc: ran devtools::document() --- man/xportr-package.Rd | 3 ++- man/xportr_length.Rd | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/man/xportr-package.Rd b/man/xportr-package.Rd index 8f4327ff..2e05668c 100644 --- a/man/xportr-package.Rd +++ b/man/xportr-package.Rd @@ -86,7 +86,8 @@ coerce R classes to numeric XPT types. Default: c("integer", "numeric", "num", " } \item{ xportr.numeric_types - The default character vector used to explicitly -coerce R classes to numeric XPT types. Default: c("integer", "float", "numeric", "posixct", "posixt", "time", "date") +coerce R classes to numeric XPT types. Default: c("integer", "float", +"numeric", "posixct", "posixt", "time", "date") } } } diff --git a/man/xportr_length.Rd b/man/xportr_length.Rd index 8d034eb8..a2c2e01e 100644 --- a/man/xportr_length.Rd +++ b/man/xportr_length.Rd @@ -43,7 +43,7 @@ Data frame with SAS default length attributes for each variable. \description{ Assigns the SAS length to a specified data frame, either from a metadata object or based on the calculated maximum data length. If a length isn't present for -a variable the length value is set to 200 for character columns, and 8 +a variable the length value is set to maximum data length for character columns, and 8 for non-character columns. This value is stored in the 'width' attribute of the column. } \section{Messaging}{ From 987c7b274f7124e049acce64bc998b3ff036e0cd Mon Sep 17 00:00:00 2001 From: Celine Date: Thu, 15 Feb 2024 07:25:57 -0500 Subject: [PATCH 05/11] doc: NEWS.md udapted --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 8e16bec5..f70d8b98 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,8 @@ * New argument in `xportr_length()` allows selection between the length from metadata, as previously done, or from the calculated maximum length per variable when `length_source` is set to “data” (#91) +* `xportr_length()` assigns the maximum length value instead of 200 for a character variable when the length is missing in the metadata (#207) + ## Deprecation and Breaking Changes * The `domain` argument for xportr functions will no longer be dynamically From f4f06e3aef228d9869f7c07c5b771b406b010d83 Mon Sep 17 00:00:00 2001 From: Celine Date: Thu, 15 Feb 2024 07:38:40 -0500 Subject: [PATCH 06/11] Style --- R/length.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/length.R b/R/length.R index f2581350..96cf0fc4 100644 --- a/R/length.R +++ b/R/length.R @@ -136,10 +136,9 @@ xportr_length <- function(.df, miss_length <- names(length_metadata[is.na(length_metadata)]) for (i in names(.df)) { - if (i %in% miss_vars) { attr(.df[[i]], "width") <- length_data[[i]] - } else if (is.na(length_metadata[[i]])){ + } else if (is.na(length_metadata[[i]])) { attr(.df[[i]], "width") <- length_data[[i]] } else { attr(.df[[i]], "width") <- length_metadata[[i]] @@ -152,7 +151,6 @@ xportr_length <- function(.df, # Assign length from data if (length_source == "data") { - for (i in names(.df)) { attr(.df[[i]], "width") <- length_data[[i]] } @@ -166,4 +164,3 @@ xportr_length <- function(.df, .df } - From adc990b5aa2bf67f420f4437ad1cf780f0f80f3c Mon Sep 17 00:00:00 2001 From: Celine Date: Tue, 20 Feb 2024 07:35:46 -0500 Subject: [PATCH 07/11] bug: length_msg when options are not by default --- R/length.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/length.R b/R/length.R index 96cf0fc4..ed0ecd83 100644 --- a/R/length.R +++ b/R/length.R @@ -155,8 +155,12 @@ xportr_length <- function(.df, attr(.df[[i]], "width") <- length_data[[i]] } - length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) %>% - filter(length.x < length.y) + length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) + length_msg <- length_msg %>% + mutate(length_df = as.numeric(length_msg[[paste0(variable_length, ".x")]]), + length_meta = as.numeric(length_msg[[paste0(variable_length, ".y")]])) %>% + filter(length_df < length_meta) %>% + select(variable_name, length_df, length_meta) max_length_msg(length_msg, verbose) } From 19130f3dd25a58724f5b40ffb24954de9292f168 Mon Sep 17 00:00:00 2001 From: Celine Date: Tue, 20 Feb 2024 07:38:23 -0500 Subject: [PATCH 08/11] style --- R/length.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/length.R b/R/length.R index ed0ecd83..5a6092c6 100644 --- a/R/length.R +++ b/R/length.R @@ -157,8 +157,10 @@ xportr_length <- function(.df, length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) length_msg <- length_msg %>% - mutate(length_df = as.numeric(length_msg[[paste0(variable_length, ".x")]]), - length_meta = as.numeric(length_msg[[paste0(variable_length, ".y")]])) %>% + mutate( + length_df = as.numeric(length_msg[[paste0(variable_length, ".x")]]), + length_meta = as.numeric(length_msg[[paste0(variable_length, ".y")]]) + ) %>% filter(length_df < length_meta) %>% select(variable_name, length_df, length_meta) From 0ea9a8fe0cb6095e0fc5fe8648e3f1af81b5784f Mon Sep 17 00:00:00 2001 From: Celine Date: Tue, 20 Feb 2024 07:43:44 -0500 Subject: [PATCH 09/11] subject header updated to remove impute_length() --- tests/testthat/test-metadata.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index 7aee0106..9c9a4d08 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -513,7 +513,7 @@ test_that("xportr_length: Check if length gets imputed when a new variable is pa xportr_length(df, df_meta, domain = "df") ) - # Max length is the imputed length for character and 8 for other data types as in impute_length() + # Max length is the imputed length for character and 8 for other data types expect_equal(c(x = 1, y = 1, z = 8), map_dbl(df_with_width, attr, "width")) expect_equal(df_with_width, structure( list( From ae09301005202556970746dbe3400a506b975b36 Mon Sep 17 00:00:00 2001 From: Celine Date: Tue, 20 Feb 2024 07:52:50 -0500 Subject: [PATCH 10/11] test: removed test with impute_length() --- tests/testthat/test-length.R | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/testthat/test-length.R b/tests/testthat/test-length.R index 3b294f55..12fce410 100644 --- a/tests/testthat/test-length.R +++ b/tests/testthat/test-length.R @@ -163,17 +163,6 @@ test_that("xportr_length: Domain not in character format", { ) }) -test_that("xportr_length: Column length of known/unkown character types is 200/8 ", { - expect_equal(impute_length(123), 8) - expect_equal(impute_length(123L), 8) - expect_equal(impute_length("string"), 200) - expect_equal(impute_length(Sys.Date()), 8) - expect_equal(impute_length(Sys.time()), 8) - - local_options(xportr.character_types = c("character", "date")) - expect_equal(impute_length(Sys.time()), 8) -}) - test_that("xportr_length: error when metadata is not set", { adsl <- minimal_table(30) From 34a6b95176ca7f4f77b517f6ba5852d32f045e32 Mon Sep 17 00:00:00 2001 From: Ben Straub Date: Tue, 20 Feb 2024 09:13:10 -0500 Subject: [PATCH 11/11] fix: #207 increase cyclomatic complexity --- .lintr | 1 + 1 file changed, 1 insertion(+) diff --git a/.lintr b/.lintr index 80754030..37b9c939 100644 --- a/.lintr +++ b/.lintr @@ -1,4 +1,5 @@ linters: linters_with_defaults( + cyclocomp_linter(complexity_limit = 18), line_length_linter(120), object_usage_linter = NULL, object_name_linter = NULL,