Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #142 issue_142_updated to account for DT, DTM, TM variables #145

Merged
merged 18 commits into from
Jun 15, 2023
Merged
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* Added function `xportr_metadata()` to explicitly set metadata at the start of a pipeline (#44)
* Metadata order columns are now coerced to numeric by default in `xportr_order()` to prevent character sorting (#149)
* Message is shown on `xportr_*` functions when the metadata being used has multiple variables with the same name in the same domain (#128)
* Fixed an issue with `xport_type()` where `DT`, `DTM` variables with a format specified in the metadata (e.g. date9., datetime20.) were being converted to numeric, which will cause a 10 year difference when reading it back by `read_xpt()`. SAS's uniform start date is 1960 whereas Linux's uniform start date is 1970.

## Documentation

Expand Down
1 change: 1 addition & 0 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#' dataset = "test",
#' variable = c("Subj", "Param", "Val", "NotUsed"),
#' type = c("numeric", "character", "numeric", "character"),
#' format = NA,
#' order = c(1, 3, 4, 2)
#' )
#'
Expand Down
19 changes: 12 additions & 7 deletions R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
#' metadata <- data.frame(
#' dataset = "test",
#' variable = c("Subj", "Param", "Val", "NotUsed"),
#' type = c("numeric", "character", "numeric", "character")
#' type = c("numeric", "character", "numeric", "character"),
#' format = NA
#' )
#'
#' .df <- data.frame(
Expand Down Expand Up @@ -51,6 +52,7 @@ xportr_type <- function(.df,
type_name <- getOption("xportr.type_name")
characterTypes <- c(getOption("xportr.character_types"), "_character")
numericTypes <- c(getOption("xportr.numeric_types"), "_numeric")
format_name <- getOption("xportr.format_name")

## Common section to detect domain from argument or pipes

Expand All @@ -73,8 +75,9 @@ xportr_type <- function(.df,
metadata <- metadata %>%
filter(!!sym(domain_name) == domain)
}
metadata <- metadata %>%
select(!!sym(variable_name), !!sym(type_name))

metacore <- metadata %>%
select(!!sym(variable_name), !!sym(type_name), !!sym(format_name))

# Common check for multiple variables name
check_multiple_var_specs(metadata, variable_name)
Expand All @@ -92,9 +95,13 @@ xportr_type <- function(.df,
# _character is used here as a mask of character, in case someone doesn't
# want 'character' coerced to character
type.x = if_else(type.x %in% characterTypes, "_character", type.x),
type.x = if_else(type.x %in% numericTypes, "_numeric", type.x),
type.x = if_else(type.x %in% numericTypes | (grepl("DT$|DTM$|TM$", variable) & !is.na(format)),
"_numeric",
type.x
),
type.y = if_else(is.na(type.y), type.x, type.y),
type.y = tolower(type.y),
type.y = if_else(type.y %in% characterTypes, "_character", type.y),
type.y = if_else(type.y %in% characterTypes | (grepl("DTC$", variable) & is.na(format)), "_character", type.y),
type.y = if_else(type.y %in% numericTypes, "_numeric", type.y)
)

Expand All @@ -105,7 +112,6 @@ xportr_type <- function(.df,
type_mismatch_ind <- which(meta_ordered$type.x != meta_ordered$type.y)
type_log(meta_ordered, type_mismatch_ind, verbose)


# Check if variable types match
is_correct <- sapply(meta_ordered[["type.x"]] == meta_ordered[["type.y"]], isTRUE)
# Use the original variable iff metadata is missing that variable
Expand All @@ -128,6 +134,5 @@ xportr_type <- function(.df,
}
}, is_correct
)

.df
}
5 changes: 2 additions & 3 deletions R/utils-xportr.R
Original file line number Diff line number Diff line change
Expand Up @@ -357,9 +357,8 @@ first_class <- function(x) {
#' @param metadata A data frame containing variable level metadata.
#' @param variable_name string with `getOption('xportr.variable_name')`
#' @noRd
check_multiple_var_specs <- function(
metadata,
variable_name = getOption("xportr.variable_name")) {
check_multiple_var_specs <- function(metadata,
variable_name = getOption("xportr.variable_name")) {
variable_len <- pluck(metadata, variable_name) %||% c()
if (NROW(variable_len) != NROW(unique(variable_len))) {
cli_alert_info(
Expand Down
3 changes: 2 additions & 1 deletion R/xportr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
globalVariables(c(
"abbr_parsed", "abbr_stem", "adj_orig", "adj_parsed", "col_pos", "dict_varname",
"lower_original_varname", "my_minlength", "num_st_ind", "original_varname",
"renamed_n", "renamed_var", "use_bundle", "viable_start", "type.x", "type.y"
"renamed_n", "renamed_var", "use_bundle", "viable_start", "type.x", "type.y",
"variable"
))

# The following block is used by usethis to automatically manage
Expand Down
1 change: 1 addition & 0 deletions man/xportr_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/xportr_type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion tests/testthat/test-depreciation.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ test_that("xportr_type: deprecated metacore argument still works and gives warni
df_meta <- data.frame(
dataset = "df",
variable = c("Subj", "Param", "Val", "NotUsed"),
type = c("numeric", "character", "numeric", "character")
type = c("numeric", "character", "numeric", "character"),
format = NA
)

df2 <- xportr_type(df, metacore = df_meta)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-length.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ test_that("xportr_length: Accepts valid domain names in metadata object", {
test_that("xportr_length: CDISC data frame is being piped after another xportr function", {
adsl <- minimal_table(30)
metadata <- minimal_metadata(
dataset = TRUE, length = TRUE, type = TRUE, var_names = colnames(adsl)
dataset = TRUE, length = TRUE, type = TRUE, format = TRUE, var_names = colnames(adsl)
)

# Setup temporary options with active verbose
Expand Down
63 changes: 61 additions & 2 deletions tests/testthat/test-type.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
meta_example <- data.frame(
dataset = "df",
variable = c("Subj", "Param", "Val", "NotUsed"),
type = c("numeric", "character", "numeric", "character")
type = c("numeric", "character", "numeric", "character"),
format = NA
)

df <- data.frame(
Expand All @@ -23,7 +24,8 @@ test_that("xportr_type: NAs are handled as expected", {
meta_example <- data.frame(
dataset = "df",
variable = c("Subj", "Param", "Val", "NotUsed"),
type = c("numeric", "character", "numeric", "character")
type = c("numeric", "character", "numeric", "character"),
format = NA
)

df2 <- xportr_type(df, meta_example)
Expand Down Expand Up @@ -176,6 +178,63 @@ test_that("xportr_type: error when metadata is not set", {
)
})

test_that("xportr_type: date variables are not converted to numeric", {
df <- data.frame(RFICDT = as.Date("2017-03-30"), RFICDTM = as.POSIXct("2017-03-30"))
metacore_meta <- suppressWarnings(
metacore::metacore(
var_spec = data.frame(
variable = c("RFICDT", "RFICDTM"),
type = "integer",
label = c("RFICDT Label", "RFICDTM Label"),
length = c(1, 2),
common = NA_character_,
format = c("date9.", "datetime20.")
)
)
)
expect_message(
{
processed_df <- xportr_type(df, metacore_meta)
},
NA
)
expect_equal(lapply(df, class), lapply(processed_df, class))
expect_equal(df$RFICDT, processed_df$RFICDT)
expect_equal(df$RFICDTM, processed_df$RFICDTM)

xportr_write(processed_df, file.path(tempdir(), "dfdates.xpt"))
df_xpt <- read_xpt(file.path(tempdir(), "dfdates.xpt"))

expect_equal(lapply(df, class), lapply(df_xpt, class))
expect_equal(df$RFICDT, df_xpt$RFICDT, ignore_attr = TRUE)
expect_equal(as.character(df$RFICDTM), as.character(df_xpt$RFICDTM), ignore_attr = TRUE)

metadata <- data.frame(
dataset = c("adsl", "adsl", "adsl", "adsl"),
variable = c("USUBJID", "DMDTC", "RFICDT", "RFICDTM"),
type = c("text", "date", "integer", "integer"),
format = c(NA, NA, "date9.", "datetime15.")
)

adsl_original <- data.frame(
USUBJID = c("test1", "test2"),
DMDTC = c("2017-03-30", "2017-01-08"),
RFICDT = c("2017-03-30", "2017-01-08"),
RFICDTM = c("2017-03-30", "2017-01-08")
)


adsl_original$RFICDT <- as.Date(adsl_original$RFICDT)
adsl_original$RFICDTM <- as.POSIXct(adsl_original$RFICDTM)

expect_message(adsl_xpt2 <- adsl_original %>%
xportr_type(metadata), NA)

attr(adsl_original, "_xportr.df_arg_") <- "adsl_original"

expect_equal(adsl_original, adsl_xpt2)
})

test_that("xportr_type: Gets warning when metadata has multiple rows with same variable", {
# This test uses the (2) functions below to reduce code duplication
# All `expect_*` are being called inside the functions
Expand Down