update the way columns are checked and their names matched with the o…

…riginal column names
epiverse-trace · May 31, 2024 · 8c87a73 · 8c87a73
1 parent 8ecfe27
commit 8c87a73
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 19 deletions.
diff --git a/R/check_date_sequence.R b/R/check_date_sequence.R
@@ -71,7 +71,7 @@ check_date_sequence <- function(data, target_columns) {
                               value = tmp_data)
     warning("Detected ", length(bad_order),
             " incorrect date sequences at line(s): ",
-            paste(bad_order, sep = ", "),
+            paste(bad_order, collapse = ", "),
             call. = FALSE)
   }
 

diff --git a/R/column_name_standardization.R b/R/column_name_standardization.R
@@ -97,25 +97,36 @@ standardize_column_names <- function(data, keep = NULL, rename = NULL) {
 #' @keywords internal
 #'
 retrieve_column_names <- function(data, target_columns) {
+  # when 'linelist_tags' is provided, it will be returned as is
   if (length(target_columns) == 1L && target_columns == "linelist_tags") {
     return(target_columns)
   }
+
+  # extract the report object to make it easily accessible
   report    <- attr(data, "report")
-  new_names <- target_columns
-  idx       <- which(target_columns %in% names(data))
-  if (length(idx) < length(target_columns)) {
-    new_names          <- target_columns[idx]
-    target_columns     <- target_columns[-idx]
-    if ("colnames" %in% names(report) &&
-        all(target_columns %in% report[["colnames"]][["before"]])) {
-      all_column_names <- report[["colnames"]]
-      idx              <- match(target_columns, all_column_names[["before"]])
-      new_names        <- c(new_names, all_column_names[["after"]][idx])
-    } else {
-      stop("Could not find the following column names: ",
-           paste(target_columns, sep = ", "))
-    }
+  if (is.null(report) || !("colnames" %in% names(report))) {
+    return(target_columns)
+  }
+
+  # when no target column is provided, it will return NULL
+  if (is.null(target_columns)) {
+    return(NULL)
   }
 
+  # detect the current names
+  # identify the old names
+  new_names      <- target_columns[target_columns %in% names(data)]
+  target_columns <- target_columns[!(target_columns %in% names(data))]
+  if ("colnames" %in% names(report) &&
+      all(target_columns %in% report[["colnames"]][["before"]])) {
+    all_column_names <- report[["colnames"]]
+    idx              <- match(target_columns, all_column_names[["before"]])
+    new_names        <- c(new_names, all_column_names[["after"]][idx])
+  } else {
+    stop("Could not find the following column names: ",
+         paste(target_columns, collapse = ", "))
+  }
+
+
   return(new_names)
 }
diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -2,6 +2,7 @@ CMD
 Codecov
 Epiverse
 Kamvar
+LSHTM
 Lifecycle
 ORCID
 OSX
@@ -13,6 +14,7 @@ analytics
 bookdown
 dplyr
 epiCleanr
+funder
 grp
 interoperates
 kableExtra

diff --git a/man/cleanepi-package.Rd b/man/cleanepi-package.Rd
diff --git a/tests/testthat/test-check_date_sequence.R b/tests/testthat/test-check_date_sequence.R
@@ -26,14 +26,13 @@ data <- data |>
 
 test_that("check_date_sequence sends a warning when incorrect column nams are
           found", {
-            expect_error(
+            expect_warning(
               check_date_sequence(
                 data           = data,
                 target_columns = c("date_first_pcr_positive_test",
                                    "date.of.admission", "fake_name")
               ),
-              regexp = cat("Could not find the following column names:
-                           fake_name")
+              regexp = cat("Removing unrecognised column name: fake_name")
             )
 
             expect_warning(