Merge branch 'main' into 223_clean_test_code

atorus-research · Feb 28, 2024 · 7aea0b3 · 7aea0b3
2 parents c71e591 + d1b4c21
commit 7aea0b3
Show file tree

Hide file tree

Showing 33 changed files with 517 additions and 293 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: xportr
 Title: Utilities to Output CDISC SDTM/ADaM XPT Files
-Version: 0.3.1.9021
+Version: 0.3.1.9025
 Authors@R: c(
  person("Eli", "Miller", , "Eli.Miller@AtorusResearch.com", role = c("aut", "cre"),
  comment = c(ORCID = "0000-0002-2127-9456")),
@@ -30,17 +30,14 @@ Imports:
  dplyr (>= 1.0.2),
  glue (>= 1.4.2),
  haven (>= 2.5.0),
- janitor,
  lifecycle,
  magrittr,
  purrr (>= 0.3.4),
  readr,
  rlang (>= 0.4.10),
  stringr (>= 1.4.0),
- tidyselect,
- tm
+ tidyselect
 Suggests: 
- admiral,
  DT,
  knitr,
  labelled,
@@ -53,6 +50,5 @@ VignetteBuilder:
  knitr
 Config/testthat/edition: 3
 Encoding: UTF-8
-LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,11 +8,10 @@ export(xportr_length)
 export(xportr_metadata)
 export(xportr_options)
 export(xportr_order)
+export(xportr_split)
 export(xportr_type)
 export(xportr_write)
 export(xpt_validate)
-import(haven)
-import(rlang)
 importFrom(checkmate,assert)
 importFrom(checkmate,assert_character)
 importFrom(checkmate,assert_choice)
@@ -32,6 +31,7 @@ importFrom(cli,cli_alert_success)
 importFrom(cli,cli_div)
 importFrom(cli,cli_h2)
 importFrom(cli,cli_text)
+importFrom(cli,cli_warn)
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
 importFrom(dplyr,as_tibble)
@@ -54,7 +54,7 @@ importFrom(dplyr,ungroup)
 importFrom(glue,glue)
 importFrom(glue,glue_collapse)
 importFrom(graphics,stem)
-importFrom(janitor,make_clean_names)
+importFrom(haven,write_xpt)
 importFrom(lifecycle,deprecated)
 importFrom(magrittr,"%>%")
 importFrom(magrittr,extract2)
@@ -65,15 +65,24 @@ importFrom(purrr,map_chr)
 importFrom(purrr,map_dbl)
 importFrom(purrr,pluck)
 importFrom(purrr,walk)
+importFrom(purrr,walk2)
 importFrom(readr,parse_number)
+importFrom(rlang,"%||%")
+importFrom(rlang,":=")
+importFrom(rlang,.data)
+importFrom(rlang,abort)
+importFrom(rlang,inform)
+importFrom(rlang,local_options)
+importFrom(rlang,sym)
+importFrom(rlang,warn)
+importFrom(rlang,with_options)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
 importFrom(stringr,str_replace)
 importFrom(stringr,str_replace_all)
 importFrom(tidyselect,all_of)
 importFrom(tidyselect,any_of)
 importFrom(tidyselect,where)
-importFrom(tm,stemDocument)
 importFrom(utils,capture.output)
 importFrom(utils,packageVersion)
 importFrom(utils,str)

diff --git a/NEWS.md b/NEWS.md
@@ -3,19 +3,25 @@
 ## New Features and Bug Fixes
 
 * `xportr_metadata()` can set `verbose` for a whole pipeline, i.e. setting `verbose` in `xportr_metadata()` will populate to all `xportr` functions. (#151)
+
 * All `xportr` functions now have `verbose = NULL` as the default (#151)
+
 * Remove unused packages from Suggests (#221)
 
 * `xportr_write()` now accepts `metadata` argument which can be used to set the dataset label to stay consistent with the other `xportr_*` functions. It is noteworthy that the dataset label set using the `xportr_df_label()` function will be retained during the `xportr_write()`.
+
 * Exporting a new dataset `dataset_spec` that contains the Dataset Specification for ADSL. (#179)
+
 * Added a check for character variable lengths up to 200 bytes in `xpt_validate()`(#91, #189).
+
 * File name check is moved to strict_checks condition to allow underscores in the file name. Underscores are allowed in xpt but not per FDA requirements. (#126)
 
 * It is now possible to get and set the xportr options using the helper function `xportr_options()` (#130)
 
 * Added `xportr.character_metadata_types` and `xportr.numeric_metadata_types` to list the metadata types that are character or numeric. Updated `xportr.character_types` and `xportr.numeric_types` to list only the R types that are character and the R types that are numeric. This ensures that all R types, including dates, are now managed by xportr_type. If the R type differs from the metadata type, the variable is coerced (#161)..
 
 * Adds argument assertions to public functions using `{checkmate}` (#175)
+* `xportr_split()` is a new function that allows users to split a dataset into multiple output files based on a variable. (#183)
 
 * `xportr_metadata()` can set `verbose` for a whole pipeline, i.e. setting `verbose` in `xportr_metadata()` will populate to all `xportr` functions. (#151)
 
@@ -25,6 +31,8 @@
 
 * New argument in `xportr_length()` allows selection between the length from metadata, as previously done, or from the calculated maximum length per variable when `length_source` is set to “data” (#91)
 
+* Make `xportr_type()` drop factor levels when coercing variables
+
 * `xportr_length()` assigns the maximum length value instead of 200 for a character variable when the length is missing in the metadata (#207)
 
 ## Deprecation and Breaking Changes
@@ -37,6 +45,9 @@ done to make the use of xportr functions more explicit. (#182)
 * The `metacore` argument, which was renamed to `metadata` in the following six xportr functions: (`xportr_df_label()`, `xportr_format()`, `xportr_label()`, `xportr_length()`, `xportr_order()`, and `xportr_type()`) in version `0.3.0` with a soft deprecation warning, has now been hard deprecated. Please update your code to use the new `metadata` argument in place of `metacore`.
 
 * `SASlength` and `SAStype` were removed since they did not have an impact on `xpt_validate` or any other functions (#132)
+* Removes `admiral` from suggested dependencies (#237)
+* `adsl` data object is now called `adsl_xportr` (#237)
+* Data objects are no longer lazy loaded, which means that when needed the user must call `data("name_of_object")` first (#237)
 
 ## Documentation
 
@@ -50,17 +61,13 @@ done to make the use of xportr functions more explicit. (#182)
 
 * Tests use `{withr}` to create temporary files that are automatically deleted (#219)
 
-# xportr 0.3.1
+# xportr 0.3.2
 
-## New Features and Bug Fixes
-
-* Make `xportr_type()` drop factor levels when coercing variables
+* Removed unused packages, `{tm}` and `{janitor}` from Imports (#241)
 
-## Documentation
-
-* Set up Development version of Website (#187)
+# xportr 0.3.1
 
-## Deprecation and Breaking Changes
+* Fixed issues around code coverage (#170) and `lintr` (#176)
 
 # xportr 0.3.0
 

diff --git a/R/data.R b/R/data.R
@@ -2,62 +2,70 @@
 #'
 #' An example dataset containing subject level data
 #'
-#' @format ## `adsl`
-#' A data frame with 254 rows and 48 columns:
+#' @source Dataset created by `admiral::use_ad_template("adsl")`
+#' @usage data("adsl_xportr")
+#'
+#' @format ## `adsl_xportr`
+#' A data frame with 306 rows and 51 columns:
 #' \describe{
 #' \item{STUDYID}{Study Identifier}
 #' \item{USUBJID}{Unique Subject Identifier}
 #' \item{SUBJID}{Subject Identifier for the Study}
+#' \item{RFSTDTC}{Subject Reference Start Date/Time}
+#' \item{RFENDTC}{Subject Reference End Date/Time}
+#' \item{RFXSTDTC}{Date/Time of First Study Treatment}
+#' \item{RFXENDTC}{Date/Time of Last Study Treatment}
+#' \item{RFICDTC}{Date/Time of Informed Consent}
+#' \item{RFPENDTC}{Date/Time of End of Participation}
+#' \item{DTHDTC}{Date/Time of Death}
+#' \item{DTHFL}{Subject Death Flag}
 #' \item{SITEID}{Study Site Identifier}
-#' \item{SITEGR1}{Pooled Site Group 1}
+#' \item{AGE}{Age}
+#' \item{AGEU}{Age Units}
+#' \item{SEX}{Sex}
+#' \item{RACE}{Race}
+#' \item{ETHNIC}{Ethnicity}
+#' \item{ARMCD}{Planned Arm Code}
 #' \item{ARM}{Description of Planned Arm}
+#' \item{ACTARMCD}{Actual Arm Code}
+#' \item{ACTARM}{Description of Actual Arm}
+#' \item{COUNTRY}{Country}
+#' \item{DMDTC}{Date/Time of Collection}
+#' \item{DMDY}{Study Day of Collection}
 #' \item{TRT01P}{Planned Treatment for Period 01}
-#' \item{TRT01PN}{Planned Treatment for Period 01 (N)}
 #' \item{TRT01A}{Actual Treatment for Period 01}
-#' \item{TRT01AN}{Actual Treatment for Period 01 (N)}
+#' \item{TRTSDTM}{Datetime of First Exposure to Treatment}
+#' \item{TRTSTMF}{Time of First Exposure Imputation Flag}
+#' \item{TRTEDTM}{Datetime of Last Exposure to Treatment}
+#' \item{TRTETMF}{Time of Last Exposure Imputation Flag}
 #' \item{TRTSDT}{Date of First Exposure to Treatment}
 #' \item{TRTEDT}{Date of Last Exposure to Treatment}
-#' \item{TRTDUR}{Duration of Treatment (days)}
-#' \item{AVGDD}{Avg Daily Dose (as planned)}
-#' \item{CUMDOSE}{Cumulative Dose (as planned)}
-#' \item{AGE}{Age}
-#' \item{AGEGR1}{Pooled Age Group 1}
-#' \item{AGEGR1N}{Pooled Age Group 1 (N)}
-#' \item{AGEU}{Age Units}
-#' \item{RACE}{Race}
-#' \item{RACEN}{Race (N)}
-#' \item{SEX}{Sex}
-#' \item{ETHNIC}{Ethnicity}
+#' \item{TRTDURD}{Total Treatment Duration (Days)}
+#' \item{SCRFDT}{Screen Failure Date}
+#' \item{EOSDT}{End of Study Date}
+#' \item{EOSSTT}{End of Study Status}
+#' \item{FRVDT}{Final Retrieval Visit Date}
+#' \item{RANDDT}{Date of Randomization}
+#' \item{DTHDT}{Date of Death}
+#' \item{DTHDTF}{Date of Death Imputation Flag}
+#' \item{DTHADY}{Relative Day of Death}
+#' \item{LDDTHELD}{Elapsed Days from Last Dose to Death}
+#' \item{LSTALVDT}{Date Last Known Alive}
 #' \item{SAFFL}{Safety Population Flag}
-#' \item{ITTFL}{Intent-To-Treat Population Flag}
-#' \item{EFFFL}{Efficacy Population Flag}
-#' \item{COMP8FL}{Completers of Week 8 Population Flag}
-#' \item{COMP16FL}{Completers of Week 16 Population Flag}
-#' \item{COMP24FL}{Completers of Week 24 Population Flag}
-#' \item{DISCONFL}{Did the Subject Discontinue the Study}
-#' \item{DSRAEFL}{Discontinued due to AE}
-#' \item{DTHFL}{Subject Died}
-#' \item{BMIBL}{Baseline BMI (kg/m^2)}
-#' \item{BMIBLGR1}{Pooled Baseline BMI Group 1}
-#' \item{HEIGHTBL}{Baseline Height (cm)}
-#' \item{WEIGHTBL}{Baseline Weight (kg)}
-#' \item{EDUCLVL}{Years of Education}
-#' \item{DISONSDT}{Date of Onset of Disease}
-#' \item{DURDIS}{Duration of Disease (Months)}
-#' \item{DURDSGR1}{Pooled Disease Duration Group 1}
-#' \item{VISIT1DT}{Date of Visit 1}
-#' \item{RFSTDTC}{Subject Reference Start Date/Time}
-#' \item{RFENDTC}{Subject Reference End Date/Time}
-#' \item{VISNUMEN}{End of Trt Visit (Vis 12 or Early Term.)}
-#' \item{RFENDT}{Date of Discontinuation/Completion}
-#' \item{DCDECOD}{Standardized Disposition Term}
-#' \item{DCREASCD}{Reason for Discontinuation}
-#' \item{MMSETOT}{MMSE Total}
+#' \item{RACEGR1}{Pooled Race Group 1}
+#' \item{AGEGR1}{Pooled Age Group 1}
+#' \item{REGION1}{Geographic Region 1}
+#' \item{LDDTHGR1}{Last Dose to Death - Days Elapsed Group 1}
+#' \item{DTH30FL}{Death Within 30 Days of Last Trt Flag}
+#' \item{DTHA30FL}{Death After 30 Days from Last Trt Flag}
+#' \item{DTHB30FL}{Death Within 30 Days of First Trt Flag}
 #' }
-"adsl"
+"adsl_xportr"
 
 #' Example Dataset Variable Specification
 #'
+#' @usage data("var_spec")
+#'
 #' @format ## `var_spec`
 #' A data frame with 216 rows and 19 columns:
 #' \describe{
@@ -85,6 +93,7 @@
 
 #' Example Dataset Specification
 #'
+#' @usage data("dataset_spec")
 #' @format ## `dataset_spec`
 #' A data frame with 1 row and 9 columns:
 #' \describe{

diff --git a/R/length.R b/R/length.R
@@ -8,12 +8,6 @@
 #' @inheritParams xportr
 #' @param metadata A data frame containing variable level metadata. See
 #' 'Metadata' section for details.
-#' @param domain Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset
-#' the metadata object. If none is passed, then name of the dataset passed as
-#' .df will be used.
-#' @param verbose The action this function takes when an action is taken on the
-#' dataset or function validation finds an issue. See 'Messaging' section for
-#' details. Options are 'stop', 'warn', 'message', and 'none'
 #' @param length_source Choose the assigned length from either metadata or data.
 #'
 #' If `"metadata"` is specified, the assigned length is from the metadata length.
@@ -155,18 +149,18 @@ xportr_length <- function(.df,
  attr(.df[[i]], "width") <- length_data[[i]]
  }
 
+
  length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name)
  length_msg <- length_msg %>%
  mutate(
  length_df = as.numeric(length_msg[[paste0(variable_length, ".x")]]),
  length_meta = as.numeric(length_msg[[paste0(variable_length, ".y")]])
  ) %>%
- filter(length_df < length_meta) %>%
- select(variable_name, length_df, length_meta)
+ filter(.data$length_df < .data$length_meta) %>%
+ select(any_of(c(variable_name, "length_df", "length_meta")))
 
  max_length_msg(length_msg, verbose)
  }
 
-
  .df
 }
diff --git a/R/metadata.R b/R/metadata.R
@@ -32,14 +32,12 @@
 #'
 #' xportr_metadata(adlb, metadata, "test")
 #'
-#' if (rlang::is_installed("magrittr")) {
-#' library(magrittr)
+#' library(magrittr)
 #'
-#' adlb %>%
-#' xportr_metadata(metadata, "test") %>%
-#' xportr_type() %>%
-#' xportr_order()
-#' }
+#' adlb %>%
+#' xportr_metadata(metadata, "test") %>%
+#' xportr_type() %>%
+#' xportr_order()
 xportr_metadata <- function(.df,
  metadata = NULL,
  domain = NULL,

diff --git a/R/split.R b/R/split.R
@@ -0,0 +1,37 @@
+#' Split xpt file output
+#'
+#' Per the FDA Study Data Technical Conformance
+#' Guide(https://www.fda.gov/media/88173/download) section 3.3.2, dataset files
+#' sizes shouldn't exceed 5 GB. If datasets are large enough, they should be
+#' split based on a variable. For example, laboratory readings in `ADLB` can be
+#' split by `LBCAT` to split up hematology and chemistry data.
+#'
+#' This function will tell `xportr_write()` to split the data frame based on the
+#' variable passed in `split_by`. When written, the file name will be prepended
+#' with a number for uniqueness. These files should be noted in the Reviewer Guides per
+#' CDISC guidance to note how you split your files.
+#'
+#' @inheritParams xportr_length
+#' @param split_by A quoted variable that will be passed to `base::split()`.
+#'
+#' @return A data frame with an additional attribute added so `xportr_write()`
+#' knows how to split the data frame.
+#'
+#'
+#' @export
+#'
+#' @examples
+#' data("adsl_xportr")
+#' adsl <- adsl_xportr
+#'
+#' adlb <- data.frame(
+#' USUBJID = c(1001, 1002, 1003),
+#' LBCAT = c("HEMATOLOGY", "HEMATOLOGY", "CHEMISTRY")
+#' )
+#'
+#' adsl <- xportr_split(adsl, "LBCAT")
+xportr_split <- function(.df, split_by = NULL) {
+ attr(.df, "_xportr.split_by_") <- split_by
+
+ return(.df)
+}
diff --git a/R/support-test.R b/R/support-test.R
@@ -49,7 +49,7 @@ minimal_table <- function(n_rows = 3, cols = c("x", "y")) {
  d = sample(Sys.Date() + c(1, -1, 10, -10), size = n_rows, replace = TRUE),
  e = sample(c(1, 2), replace = TRUE, size = n_rows)
  ) %>%
- mutate(e = if_else(seq_along(e) %% 2 == 0, NA, e)) %>%
+ mutate(e = if_else(seq_along(.data$e) %% 2 == 0, NA, .data$e)) %>%
  select(all_of(tolower(cols)))
 }