Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #207 xportr_length for missing length in metadata #238

Merged
merged 14 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .lintr
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
linters: linters_with_defaults(
cyclocomp_linter(complexity_limit = 18),
line_length_linter(120),
object_usage_linter = NULL,
object_name_linter = NULL,
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

* New argument in `xportr_length()` allows selection between the length from metadata, as previously done, or from the calculated maximum length per variable when `length_source` is set to “data” (#91)

* `xportr_length()` assigns the maximum length value instead of 200 for a character variable when the length is missing in the metadata (#207)

## Deprecation and Breaking Changes

* The `domain` argument for xportr functions will no longer be dynamically
Expand Down
46 changes: 25 additions & 21 deletions R/length.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' Assigns the SAS length to a specified data frame, either from a metadata object
#' or based on the calculated maximum data length. If a length isn't present for
#' a variable the length value is set to 200 for character columns, and 8
#' a variable the length value is set to maximum data length for character columns, and 8
#' for non-character columns. This value is stored in the 'width' attribute of the column.
#'
#' @inheritParams xportr
Expand Down Expand Up @@ -118,51 +118,55 @@ xportr_length <- function(.df,
check_multiple_var_specs(metadata, variable_name)
}

# Get max length for missing length and when length_source == "data"
var_length_max <- variable_max_length(.df)

length_data <- var_length_max[[variable_length]]
names(length_data) <- var_length_max[[variable_name]]

# Check any variables missed in metadata but present in input data ---
miss_vars <- setdiff(names(.df), metadata[[variable_name]])

length_log(miss_vars, verbose)

miss_length <- as.character()
if (length_source == "metadata") {
length_metadata <- metadata[[variable_length]]
names(length_metadata) <- metadata[[variable_name]]

# Check any variables with missing length in metadata
miss_length <- names(length_metadata[is.na(length_metadata)])

for (i in names(.df)) {
if (i %in% miss_vars) {
attr(.df[[i]], "width") <- impute_length(.df[[i]])
attr(.df[[i]], "width") <- length_data[[i]]
} else if (is.na(length_metadata[[i]])) {
attr(.df[[i]], "width") <- length_data[[i]]
} else {
attr(.df[[i]], "width") <- length_metadata[[i]]
}
}
}

# Message for missing var and missing length
length_log(miss_vars, miss_length, verbose)

# Assign length from data
if (length_source == "data") {
var_length_max <- variable_max_length(.df)

length_data <- var_length_max[[variable_length]]
names(length_data) <- var_length_max[[variable_name]]

for (i in names(.df)) {
attr(.df[[i]], "width") <- length_data[[i]]
}

length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) %>%
filter(length.x < length.y)
length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name)
length_msg <- length_msg %>%
mutate(
length_df = as.numeric(length_msg[[paste0(variable_length, ".x")]]),
length_meta = as.numeric(length_msg[[paste0(variable_length, ".y")]])
) %>%
filter(length_df < length_meta) %>%
select(variable_name, length_df, length_meta)

max_length_msg(length_msg, verbose)
}


.df
}

impute_length <- function(col) {
characterTypes <- getOption("xportr.character_types")
# first_class will collapse to character if it is the option
if (first_class(col) %in% "character") {
200
} else {
8
}
}
8 changes: 5 additions & 3 deletions R/messages.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,19 @@ type_log <- function(meta_ordered, type_mismatch_ind, verbose) {
#' Utility for Lengths
#'
#' @param miss_vars Variables missing from metadata
#' @param miss_length Variables with missing length in metadata
#' @param verbose Provides additional messaging for user
#'
#' @return Output to Console
#' @noRd
length_log <- function(miss_vars, verbose) {
length_log <- function(miss_vars, miss_length, verbose) {
assert_character(miss_vars)
assert_character(miss_length)
assert_choice(verbose, choices = .internal_verbose_choices)

if (length(miss_vars) > 0) {
if (length(c(miss_vars, miss_length)) > 0) {
cli_h2("Variable lengths missing from metadata.")
cli_alert_success("{ length(miss_vars) } lengths resolved")
cli_alert_success("{ length(c(miss_vars, miss_length)) } lengths resolved {encode_vars(c(miss_vars, miss_length))}")

xportr_logger(
glue(
Expand Down
3 changes: 2 additions & 1 deletion man/xportr-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/xportr_length.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 0 additions & 11 deletions tests/testthat/test-length.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,17 +163,6 @@ test_that("xportr_length: Domain not in character format", {
)
})

test_that("xportr_length: Column length of known/unkown character types is 200/8 ", {
expect_equal(impute_length(123), 8)
expect_equal(impute_length(123L), 8)
expect_equal(impute_length("string"), 200)
expect_equal(impute_length(Sys.Date()), 8)
expect_equal(impute_length(Sys.time()), 8)

local_options(xportr.character_types = c("character", "date"))
expect_equal(impute_length(Sys.time()), 8)
})

test_that("xportr_length: error when metadata is not set", {
adsl <- minimal_table(30)

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-messages.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ test_that("length_log: Missing lengths messages are shown", {
# Remove empty lines in cli theme
local_cli_theme()

length_log(c("var1", "var2", "var3"), "message") %>%
length_log(c("var1", "var2", "var3"), c("var4"), "message") %>%
expect_message("Variable lengths missing from metadata.") %>%
expect_message("lengths resolved") %>%
expect_message("lengths resolved `var1`.*`var2`.*`var3`.*`var4`") %>%
expect_message("Problem with `var1`.*`var2`.*`var3`")
})

Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -513,12 +513,12 @@ test_that("xportr_length: Check if length gets imputed when a new variable is pa
xportr_length(df, df_meta, domain = "df")
)

# 200 is the imputed length for character and 8 for other data types as in impute_length()
expect_equal(c(x = 1, y = 200, z = 8), map_dbl(df_with_width, attr, "width"))
# Max length is the imputed length for character and 8 for other data types
expect_equal(c(x = 1, y = 1, z = 8), map_dbl(df_with_width, attr, "width"))
expect_equal(df_with_width, structure(
list(
x = structure("a", width = 1),
y = structure("b", width = 200),
y = structure("b", width = 1),
z = structure(3, width = 8)
),
row.names = c(NA, -1L), `_xportr.df_arg_` = "df", class = "data.frame"
Expand Down
Loading