Skip to content

Commit

Permalink
update the way columns are checked and their names matched with the o…
Browse files Browse the repository at this point in the history
…riginal column names
  • Loading branch information
Karim-Mane committed May 31, 2024
1 parent 8ecfe27 commit 8c87a73
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 19 deletions.
2 changes: 1 addition & 1 deletion R/check_date_sequence.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ check_date_sequence <- function(data, target_columns) {
value = tmp_data)
warning("Detected ", length(bad_order),
" incorrect date sequences at line(s): ",
paste(bad_order, sep = ", "),
paste(bad_order, collapse = ", "),
call. = FALSE)
}

Expand Down
39 changes: 25 additions & 14 deletions R/column_name_standardization.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,25 +97,36 @@ standardize_column_names <- function(data, keep = NULL, rename = NULL) {
#' @keywords internal
#'
retrieve_column_names <- function(data, target_columns) {
# when 'linelist_tags' is provided, it will be returned as is
if (length(target_columns) == 1L && target_columns == "linelist_tags") {
return(target_columns)
}

# extract the report object to make it easily accessible
report <- attr(data, "report")
new_names <- target_columns
idx <- which(target_columns %in% names(data))
if (length(idx) < length(target_columns)) {
new_names <- target_columns[idx]
target_columns <- target_columns[-idx]
if ("colnames" %in% names(report) &&
all(target_columns %in% report[["colnames"]][["before"]])) {
all_column_names <- report[["colnames"]]
idx <- match(target_columns, all_column_names[["before"]])
new_names <- c(new_names, all_column_names[["after"]][idx])
} else {
stop("Could not find the following column names: ",
paste(target_columns, sep = ", "))
}
if (is.null(report) || !("colnames" %in% names(report))) {
return(target_columns)
}

# when no target column is provided, it will return NULL
if (is.null(target_columns)) {
return(NULL)
}

# detect the current names
# identify the old names
new_names <- target_columns[target_columns %in% names(data)]
target_columns <- target_columns[!(target_columns %in% names(data))]
if ("colnames" %in% names(report) &&
all(target_columns %in% report[["colnames"]][["before"]])) {
all_column_names <- report[["colnames"]]
idx <- match(target_columns, all_column_names[["before"]])
new_names <- c(new_names, all_column_names[["after"]][idx])
} else {
stop("Could not find the following column names: ",
paste(target_columns, collapse = ", "))
}


return(new_names)
}
2 changes: 2 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CMD
Codecov
Epiverse
Kamvar
LSHTM
Lifecycle
ORCID
OSX
Expand All @@ -13,6 +14,7 @@ analytics
bookdown
dplyr
epiCleanr
funder
grp
interoperates
kableExtra
Expand Down
4 changes: 3 additions & 1 deletion man/cleanepi-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions tests/testthat/test-check_date_sequence.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,13 @@ data <- data |>

test_that("check_date_sequence sends a warning when incorrect column nams are
found", {
expect_error(
expect_warning(
check_date_sequence(
data = data,
target_columns = c("date_first_pcr_positive_test",
"date.of.admission", "fake_name")
),
regexp = cat("Could not find the following column names:
fake_name")
regexp = cat("Removing unrecognised column name: fake_name")
)

expect_warning(
Expand Down

0 comments on commit 8c87a73

Please sign in to comment.