diff --git a/.gitignore b/.gitignore index 7bda4869..608d713f 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,5 @@ docs xportr.Rcheck/ xportr*.tar.gz xportr*.tgz -docs/* \ No newline at end of file +docs/* +local diff --git a/NAMESPACE b/NAMESPACE index 9b83ed5f..cc9e76ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ importFrom(dplyr,distinct) importFrom(dplyr,everything) importFrom(dplyr,filter) importFrom(dplyr,group_by) +importFrom(dplyr,if_else) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,n) diff --git a/NEWS.md b/NEWS.md index f4fed78a..d1f5f894 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # xportr 0.3.0 -* Fixed an issue where xportr_type would overwrite column labels, widths, and "sas.formats" + +* Fixed an issue where `xportr_type` would overwrite column labels, widths, and "sas.formats" +* Fixed messaging of `xportr_order`to give better visability of the number of variables being reordered. +* Add new argument to `xportr_write` to allow users to specify how xpt validation checks are handled. +* Fixed bug where character_types were case sensitive. They are now case insensitive. +* Updated `xportr_type` to make type coercion more explicit. # xportr 0.2.0 * Added a new validation test that errors when users pass invalid formats (#60 #64). Thanks to @zdz2101! diff --git a/R/messages.R b/R/messages.R index b7b4d376..c2fe3309 100644 --- a/R/messages.R +++ b/R/messages.R @@ -134,19 +134,30 @@ label_log <- function(miss_vars, verbose) { #' Utility for Ordering #' -#' @param moved_vars Variables moved in the dataset +#' @param reordered_vars Number of variables reordered +#' @param moved_vars Number of variables moved in the dataset #' @param verbose Provides additional messaging for user #' #' @return Output to Console #' @export -var_ord_msg <- function(moved_vars, verbose) { +var_ord_msg <- function(reordered_vars, moved_vars, verbose) { if (length(moved_vars) > 0) { cli_h2("{ length(moved_vars) } variables not in spec and moved to end") message <- glue( - "Variable reordered in `.df`: { encode_vars(moved_vars) }" + "Variable moved to end in `.df`: { encode_vars(moved_vars) }" ) xportr_logger(message, verbose) } else { cli_h2("All variables in specification file are in dataset") } + + if (length(reordered_vars) > 0) { + cli_h2("{ length(reordered_vars) } reordered in dataset") + message <- glue( + "Variable reordered in `.df`: { encode_vars(reordered_vars) }" + ) + xportr_logger(message, verbose) + } else { + cli_h2("All variables in dataset are ordered") + } } diff --git a/R/order.R b/R/order.R index 19303028..38d50734 100644 --- a/R/order.R +++ b/R/order.R @@ -63,14 +63,13 @@ xportr_order <- function(.df, metacore, domain = NULL, verbose = getOption("xpor drop_vars <- .df %>% select(!any_of(vars_in_spec_ds)) - # Used in warning message for how many vars have been moved - moved_vars <- ncol(drop_vars) - ordered_vars <- ncol(ord_vars) - df_re_ord <- bind_cols(ord_vars, drop_vars) + # Used in warning message for how many vars have been moved + reorder_vars <- names(df_re_ord)[names(df_re_ord) != names(.df)] + # Function is located in messages.R - var_ord_msg(moved_vars, verbose) + var_ord_msg(reorder_vars, names(drop_vars), verbose) df_re_ord } diff --git a/R/type.R b/R/type.R index f680700b..137166a7 100644 --- a/R/type.R +++ b/R/type.R @@ -37,12 +37,14 @@ xportr_type <- function(.df, metacore, domain = NULL, domain_name <- getOption("xportr.domain_name") variable_name <- getOption("xportr.variable_name") type_name <- getOption("xportr.type_name") - characterTypes <- getOption("xportr.character_types") + characterTypes <- c(getOption("xportr.character_types"), "_character") + numericTypes <- c(getOption("xportr.numeric_types"), "_numeric") ## Common section to detect domain from argument or pipes df_arg <- tryCatch(as_name(enexpr(.df)), error = function(err) NULL) domain <- get_domain(.df, df_arg, domain) + if (!is.null(domain)) attr(.df, "_xportr.df_arg_") <- domain ## End of common section @@ -65,7 +67,16 @@ xportr_type <- function(.df, metacore, domain = NULL, data.frame(variable = names(.df), type = unlist(table_cols_types)), metacore, by = "variable" - ) + ) %>% + mutate( + # _character is used here as a mask of character, in case someone doesn't + # want 'character' coerced to character + type.x = if_else(type.x %in% characterTypes, "_character", type.x), + type.x = if_else(type.x %in% numericTypes, "_numeric", type.x), + type.y = tolower(type.y), + type.y = if_else(type.y %in% characterTypes, "_character", type.y), + type.y = if_else(type.y %in% numericTypes, "_numeric", type.y) + ) # It is possible that a variable exists in the table that isn't in the metadata # it will be silently ignored here. This may happen depending on what a user diff --git a/R/write.R b/R/write.R index 369db5c4..ce16dfb5 100644 --- a/R/write.R +++ b/R/write.R @@ -7,7 +7,11 @@ #' @param .df A data frame to write. #' @param path Path where transport file will be written. File name sans will be #' used as `xpt` name. -#' @param label Dataset label. It must be<=40 characters. +#' @param label Dataset label. It must be <=40 characters. +#' @param strict_checks If TRUE, xpt validation will report errors and not +#' write out the dataset. If FALSE, xpt validation will report warnings and continue +#' with writing out the dataset. Defaults to FALSE +#' #' @details #' * Variable and dataset labels are stored in the "label" attribute. #' @@ -19,7 +23,7 @@ #' #' @return A data frame. `xportr_write()` returns the input data invisibly. #' @export -xportr_write <- function(.df, path, label = NULL) { +xportr_write <- function(.df, path, label = NULL, strict_checks = FALSE) { path <- normalizePath(path, mustWork = FALSE) name <- tools::file_path_sans_ext(basename(path)) @@ -47,7 +51,11 @@ xportr_write <- function(.df, path, label = NULL) { checks <- xpt_validate(.df) if (length(checks) > 0) { - abort(c("The following validation failed:", checks)) + if (!strict_checks) { + warn(c("The following validation checks failed:", checks)) + } else { + abort(c("The following validation checks failed:", checks)) + } } data <- as.data.frame(.df) diff --git a/R/xportr-package.R b/R/xportr-package.R index 09e63f42..fa8a788d 100644 --- a/R/xportr-package.R +++ b/R/xportr-package.R @@ -26,7 +26,7 @@ globalVariables(c( "abbr_parsed", "abbr_stem", "adj_orig", "adj_parsed", "col_pos", "dict_varname", "lower_original_varname", "my_minlength", "num_st_ind", "original_varname", - "renamed_n", "renamed_var", "use_bundle", "viable_start" + "renamed_n", "renamed_var", "use_bundle", "viable_start", "type.x", "type.y" )) # The following block is used by usethis to automatically manage diff --git a/R/zzz.R b/R/zzz.R index 4cb85dde..ef71ea87 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -14,7 +14,14 @@ xportr.label_verbose = "none", xportr.length_verbose = "none", xportr.type_verbose = "none", - xportr.character_types = c("character", "char", "text", "date", "posixct", "posixt"), + xportr.character_types = c( + "character", "char", "text", "date", "posixct", + "posixt", "datetime", "time", "partialdate", + "partialtime", "partialdatetime", + "incompletedatetime", "durationdatetime", + "intervaldatetime" + ), + xportr.numeric_types = c("integer", "numeric", "num", "float"), xportr.order_name = "order" ) toset <- !(names(op.devtools) %in% names(op)) diff --git a/man/var_ord_msg.Rd b/man/var_ord_msg.Rd index 1b92ab4e..0bd5979e 100644 --- a/man/var_ord_msg.Rd +++ b/man/var_ord_msg.Rd @@ -4,10 +4,12 @@ \alias{var_ord_msg} \title{Utility for Ordering} \usage{ -var_ord_msg(moved_vars, verbose) +var_ord_msg(reordered_vars, moved_vars, verbose) } \arguments{ -\item{moved_vars}{Variables moved in the dataset} +\item{reordered_vars}{Number of variables reordered} + +\item{moved_vars}{Number of ariables moved in the dataset} \item{verbose}{Provides additional messaging for user} } diff --git a/man/xportr_write.Rd b/man/xportr_write.Rd index ee82ac64..b22356f1 100644 --- a/man/xportr_write.Rd +++ b/man/xportr_write.Rd @@ -4,7 +4,7 @@ \alias{xportr_write} \title{Write xpt v5 transport file} \usage{ -xportr_write(.df, path, label = NULL) +xportr_write(.df, path, label = NULL, strict_checks = FALSE) } \arguments{ \item{.df}{A data frame to write.} @@ -13,6 +13,10 @@ xportr_write(.df, path, label = NULL) used as \code{xpt} name.} \item{label}{Dataset label. It must be<=40 characters.} + +\item{strict_checks}{If TRUE, xpt validation will report errors and not +the dataset. If FALSE, xpt validation will report warnings and continue +with writing. Defaults to FALSE} } \value{ A data frame. \code{xportr_write()} returns the input data invisibly. diff --git a/tests/testthat/test-messages.R b/tests/testthat/test-messages.R index 36c35b32..13e966af 100644 --- a/tests/testthat/test-messages.R +++ b/tests/testthat/test-messages.R @@ -44,23 +44,6 @@ test_that("length_log: Missing variables messages are shown", { expect_message("Problem with `var1`.*`var2`.*`var3`") }) -test_that("var_ord_msg: Reordered variables messages are shown", { - # Remove empty lines in cli theme - withr::local_options(list(cli.user_theme = cli_theme_tests)) - app <- cli::start_app(output = "message", .auto_close = FALSE) - withr::defer(cli::stop_app(app)) - - moved_vars <- c("var1", "var2", "var3") - message_regexp <- "Variable reordered in.+`var1`.+`var2`.+`var3`$" - - var_ord_msg(moved_vars, "message") %>% - expect_message("variables not in spec and moved to end") %>% - expect_message(message_regexp) - - var_ord_msg(c(), "message") %>% - expect_message("All variables in specification file are in dataset") -}) - test_that("var_names_log: Renamed variables messages are shown", { # Remove empty lines in cli theme withr::local_options(list(cli.user_theme = cli_theme_tests)) diff --git a/tests/testthat/test-order.R b/tests/testthat/test-order.R index 64a7d19b..0b988853 100644 --- a/tests/testthat/test-order.R +++ b/tests/testthat/test-order.R @@ -90,3 +90,41 @@ test_that("xportr_order: Expect error if domain is not a character", { expect_error(xportr_order(df, df_meta, domain = NA, verbose = "none")) expect_error(xportr_order(df, df_meta, domain = 1, verbose = "none")) }) + +test_that("xportr_order: Variable ordering messaging is correct", { + output_file <- tempfile() + + df <- data.frame(c = 1:5, a = "a", d = 5:1, b = LETTERS[1:5]) + df2 <- data.frame(a = "a", z = "z") + df_meta <- data.frame( + dataset = "df", + variable = letters[1:4], + order = 1:4 + ) + + capture.output(xportr_order(df, df_meta, verbose = "message"), file = output_file, type = "message") + + expect_equal( + readLines(output_file), + c( + "-- All variables in specification file are in dataset --", + "", + "-- 4 reordered in dataset --", + "", + "Variable reordered in `.df`: `a`, `b`, `c`, and `d`" + ) + ) + + capture.output(xportr_order(df2, df_meta, verbose = "message"), file = output_file, type = "message") + + expect_equal( + readLines(output_file), + c( + "-- 2 variables not in spec and moved to end --", + "", + "Variable moved to end in `.df`: `a` and `z`", + "-- All variables in dataset are ordered --", + "" + ) + ) +}) diff --git a/tests/testthat/test-type.R b/tests/testthat/test-type.R index f554f6bb..68a9babd 100644 --- a/tests/testthat/test-type.R +++ b/tests/testthat/test-type.R @@ -11,6 +11,39 @@ df <- data.frame( Param = c("param1", "param2", "param3") ) +test_that("xportr_type: NAs are handled as expected", { + # Namely that "" isn't converted to NA or vice versa + # Numeric columns will become NA but that is the nature of as.numeric + df <- data.frame( + Subj = as.character(c(123, 456, 789, "", NA, NA_integer_)), + Different = c("a", "b", "c", "", NA, NA_character_), + Val = c("1", "2", "3", "", NA, NA_character_), + Param = c("param1", "param2", "param3", "", NA, NA_character_) + ) + meta_example <- data.frame( + dataset = "df", + variable = c("Subj", "Param", "Val", "NotUsed"), + type = c("numeric", "character", "numeric", "character") + ) + + df2 <- xportr_type(df, meta_example) + expect_equal( + df2, + structure( + list( + Subj = c(123, 456, 789, NA, NA, NA), + Different = c("a", "b", "c", "", NA, NA), + Val = c(1, 2, 3, NA, NA, NA), + Param = c("param1", "param2", "param3", "", NA, NA) + ), + row.names = c(NA, -6L), + `_xportr.df_arg_` = "df", + class = "data.frame" + ) + ) +}) + + test_that("xportr_type: Variable types are coerced as expected and can raise messages", { expect_message( df2 <- xportr_type(df, meta_example), diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R index e9658b9a..915aae5c 100644 --- a/tests/testthat/test-write.R +++ b/tests/testthat/test-write.R @@ -65,12 +65,22 @@ test_that("xportr_write: expect error when label is over 40 characters", { expect_error(xportr_write(data_to_save, tmp, label = paste(rep("a", 41), collapse = ""))) }) -test_that("xportr_write: expect error when an xpt validation fails", { +test_that("xportr_write: expect error when an xpt validation fails with strict_checks set to TRUE", { tmpdir <- tempdir() tmp <- file.path(tmpdir, "xyz.xpt") attr(data_to_save$X, "format.sas") <- "foo" on.exit(unlink(tmpdir)) - expect_error(xportr_write(data_to_save, tmp, label = "label")) + expect_error(xportr_write(data_to_save, tmp, label = "label", strict_checks = TRUE)) +}) + +test_that("xportr_write: expect warning when an xpt validation fails with strict_checks set to FALSE", { + tmpdir <- tempdir() + tmp <- file.path(tmpdir, "xyz.xpt") + attr(data_to_save$X, "format.sas") <- "foo" + + on.exit(unlink(tmpdir)) + + expect_warning(xportr_write(data_to_save, tmp, label = "label", strict_checks = FALSE)) })