Skip to content

Commit

Permalink
Merge branch 'devel' into 84_xportr_deep_dive_vignette
Browse files Browse the repository at this point in the history
  • Loading branch information
bms63 authored Jun 15, 2023
2 parents f632838 + 79550f9 commit fa58e0c
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 16 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* Added function `xportr_metadata()` to explicitly set metadata at the start of a pipeline (#44)
* Metadata order columns are now coerced to numeric by default in `xportr_order()` to prevent character sorting (#149)
* Message is shown on `xportr_*` functions when the metadata being used has multiple variables with the same name in the same domain (#128)
* Fixed an issue with `xport_type()` where `DT`, `DTM` variables with a format specified in the metadata (e.g. date9., datetime20.) were being converted to numeric, which will cause a 10 year difference when reading it back by `read_xpt()`. SAS's uniform start date is 1960 whereas Linux's uniform start date is 1970.

## Documentation

Expand Down
1 change: 1 addition & 0 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#' dataset = "test",
#' variable = c("Subj", "Param", "Val", "NotUsed"),
#' type = c("numeric", "character", "numeric", "character"),
#' format = NA,
#' order = c(1, 3, 4, 2)
#' )
#'
Expand Down
19 changes: 12 additions & 7 deletions R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
#' metadata <- data.frame(
#' dataset = "test",
#' variable = c("Subj", "Param", "Val", "NotUsed"),
#' type = c("numeric", "character", "numeric", "character")
#' type = c("numeric", "character", "numeric", "character"),
#' format = NA
#' )
#'
#' .df <- data.frame(
Expand Down Expand Up @@ -51,6 +52,7 @@ xportr_type <- function(.df,
type_name <- getOption("xportr.type_name")
characterTypes <- c(getOption("xportr.character_types"), "_character")
numericTypes <- c(getOption("xportr.numeric_types"), "_numeric")
format_name <- getOption("xportr.format_name")

## Common section to detect domain from argument or pipes

Expand All @@ -73,8 +75,9 @@ xportr_type <- function(.df,
metadata <- metadata %>%
filter(!!sym(domain_name) == domain)
}
metadata <- metadata %>%
select(!!sym(variable_name), !!sym(type_name))

metacore <- metadata %>%
select(!!sym(variable_name), !!sym(type_name), !!sym(format_name))

# Common check for multiple variables name
check_multiple_var_specs(metadata, variable_name)
Expand All @@ -92,9 +95,13 @@ xportr_type <- function(.df,
# _character is used here as a mask of character, in case someone doesn't
# want 'character' coerced to character
type.x = if_else(type.x %in% characterTypes, "_character", type.x),
type.x = if_else(type.x %in% numericTypes, "_numeric", type.x),
type.x = if_else(type.x %in% numericTypes | (grepl("DT$|DTM$|TM$", variable) & !is.na(format)),
"_numeric",
type.x
),
type.y = if_else(is.na(type.y), type.x, type.y),
type.y = tolower(type.y),
type.y = if_else(type.y %in% characterTypes, "_character", type.y),
type.y = if_else(type.y %in% characterTypes | (grepl("DTC$", variable) & is.na(format)), "_character", type.y),
type.y = if_else(type.y %in% numericTypes, "_numeric", type.y)
)

Expand All @@ -105,7 +112,6 @@ xportr_type <- function(.df,
type_mismatch_ind <- which(meta_ordered$type.x != meta_ordered$type.y)
type_log(meta_ordered, type_mismatch_ind, verbose)


# Check if variable types match
is_correct <- sapply(meta_ordered[["type.x"]] == meta_ordered[["type.y"]], isTRUE)
# Use the original variable iff metadata is missing that variable
Expand All @@ -128,6 +134,5 @@ xportr_type <- function(.df,
}
}, is_correct
)

.df
}
5 changes: 2 additions & 3 deletions R/utils-xportr.R
Original file line number Diff line number Diff line change
Expand Up @@ -357,9 +357,8 @@ first_class <- function(x) {
#' @param metadata A data frame containing variable level metadata.
#' @param variable_name string with `getOption('xportr.variable_name')`
#' @noRd
check_multiple_var_specs <- function(
metadata,
variable_name = getOption("xportr.variable_name")) {
check_multiple_var_specs <- function(metadata,
variable_name = getOption("xportr.variable_name")) {
variable_len <- pluck(metadata, variable_name) %||% c()
if (NROW(variable_len) != NROW(unique(variable_len))) {
cli_alert_info(
Expand Down
3 changes: 2 additions & 1 deletion R/xportr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
globalVariables(c(
"abbr_parsed", "abbr_stem", "adj_orig", "adj_parsed", "col_pos", "dict_varname",
"lower_original_varname", "my_minlength", "num_st_ind", "original_varname",
"renamed_n", "renamed_var", "use_bundle", "viable_start", "type.x", "type.y"
"renamed_n", "renamed_var", "use_bundle", "viable_start", "type.x", "type.y",
"variable"
))

# The following block is used by usethis to automatically manage
Expand Down
1 change: 1 addition & 0 deletions man/xportr_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/xportr_type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion tests/testthat/test-depreciation.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ test_that("xportr_type: deprecated metacore argument still works and gives warni
df_meta <- data.frame(
dataset = "df",
variable = c("Subj", "Param", "Val", "NotUsed"),
type = c("numeric", "character", "numeric", "character")
type = c("numeric", "character", "numeric", "character"),
format = NA
)

df2 <- xportr_type(df, metacore = df_meta)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-length.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ test_that("xportr_length: Accepts valid domain names in metadata object", {
test_that("xportr_length: CDISC data frame is being piped after another xportr function", {
adsl <- minimal_table(30)
metadata <- minimal_metadata(
dataset = TRUE, length = TRUE, type = TRUE, var_names = colnames(adsl)
dataset = TRUE, length = TRUE, type = TRUE, format = TRUE, var_names = colnames(adsl)
)

# Setup temporary options with active verbose
Expand Down
63 changes: 61 additions & 2 deletions tests/testthat/test-type.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
meta_example <- data.frame(
dataset = "df",
variable = c("Subj", "Param", "Val", "NotUsed"),
type = c("numeric", "character", "numeric", "character")
type = c("numeric", "character", "numeric", "character"),
format = NA
)

df <- data.frame(
Expand All @@ -23,7 +24,8 @@ test_that("xportr_type: NAs are handled as expected", {
meta_example <- data.frame(
dataset = "df",
variable = c("Subj", "Param", "Val", "NotUsed"),
type = c("numeric", "character", "numeric", "character")
type = c("numeric", "character", "numeric", "character"),
format = NA
)

df2 <- xportr_type(df, meta_example)
Expand Down Expand Up @@ -176,6 +178,63 @@ test_that("xportr_type: error when metadata is not set", {
)
})

test_that("xportr_type: date variables are not converted to numeric", {
df <- data.frame(RFICDT = as.Date("2017-03-30"), RFICDTM = as.POSIXct("2017-03-30"))
metacore_meta <- suppressWarnings(
metacore::metacore(
var_spec = data.frame(
variable = c("RFICDT", "RFICDTM"),
type = "integer",
label = c("RFICDT Label", "RFICDTM Label"),
length = c(1, 2),
common = NA_character_,
format = c("date9.", "datetime20.")
)
)
)
expect_message(
{
processed_df <- xportr_type(df, metacore_meta)
},
NA
)
expect_equal(lapply(df, class), lapply(processed_df, class))
expect_equal(df$RFICDT, processed_df$RFICDT)
expect_equal(df$RFICDTM, processed_df$RFICDTM)

xportr_write(processed_df, file.path(tempdir(), "dfdates.xpt"))
df_xpt <- read_xpt(file.path(tempdir(), "dfdates.xpt"))

expect_equal(lapply(df, class), lapply(df_xpt, class))
expect_equal(df$RFICDT, df_xpt$RFICDT, ignore_attr = TRUE)
expect_equal(as.character(df$RFICDTM), as.character(df_xpt$RFICDTM), ignore_attr = TRUE)

metadata <- data.frame(
dataset = c("adsl", "adsl", "adsl", "adsl"),
variable = c("USUBJID", "DMDTC", "RFICDT", "RFICDTM"),
type = c("text", "date", "integer", "integer"),
format = c(NA, NA, "date9.", "datetime15.")
)

adsl_original <- data.frame(
USUBJID = c("test1", "test2"),
DMDTC = c("2017-03-30", "2017-01-08"),
RFICDT = c("2017-03-30", "2017-01-08"),
RFICDTM = c("2017-03-30", "2017-01-08")
)


adsl_original$RFICDT <- as.Date(adsl_original$RFICDT)
adsl_original$RFICDTM <- as.POSIXct(adsl_original$RFICDTM)

expect_message(adsl_xpt2 <- adsl_original %>%
xportr_type(metadata), NA)

attr(adsl_original, "_xportr.df_arg_") <- "adsl_original"

expect_equal(adsl_original, adsl_xpt2)
})

test_that("xportr_type: Gets warning when metadata has multiple rows with same variable", {
# This test uses the (2) functions below to reduce code duplication
# All `expect_*` are being called inside the functions
Expand Down

0 comments on commit fa58e0c

Please sign in to comment.