From 8e2543dd5b75beb8eb940f10bd571d3baf6b1e3a Mon Sep 17 00:00:00 2001 From: CJ Yetman Date: Mon, 15 Jan 2024 21:07:36 +0100 Subject: [PATCH] allow for multiple trailing and/or leading whitespace in header (#63) --- R/determine_headers.R | 6 ++--- tests/testthat/test-determine_headers.R | 30 ++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/R/determine_headers.R b/R/determine_headers.R index afa3da3..c96134c 100644 --- a/R/determine_headers.R +++ b/R/determine_headers.R @@ -59,9 +59,9 @@ determine_headers <- function(filepath) { num_of_cols <- length(headers) if (num_of_cols >= 3) { - isin_col <- grep(pattern = "^[[:space:]]?isin[[:space:]]?$", x = headers, ignore.case = TRUE, value = TRUE) - market_value_col <- grep(pattern = "^[[:space:]]?market[._ ]{0,1}value[[:space:]]?$", x = headers, ignore.case = TRUE, value = TRUE) - currency_col <- grep(pattern = "^[[:space:]]?currency[[:space:]]?$", x = headers, ignore.case = TRUE, value = TRUE) + isin_col <- grep(pattern = "^[[:space:]]*isin[[:space:]]*$", x = headers, ignore.case = TRUE, value = TRUE) + market_value_col <- grep(pattern = "^[[:space:]]*market[._ ]{0,1}value[[:space:]]*$", x = headers, ignore.case = TRUE, value = TRUE) + currency_col <- grep(pattern = "^[[:space:]]*currency[[:space:]]*$", x = headers, ignore.case = TRUE, value = TRUE) if (num_of_cols > 3) { investor_name_col <- grep(pattern = "investor", x = headers, ignore.case = TRUE, value = TRUE) diff --git a/tests/testthat/test-determine_headers.R b/tests/testthat/test-determine_headers.R index 07518fa..d768555 100644 --- a/tests/testthat/test-determine_headers.R +++ b/tests/testthat/test-determine_headers.R @@ -22,7 +22,35 @@ test_that("column names with no underscore are properly determined", { test_that("column names with leading and lagging whitespace are properly determined", { proper_names <- c("investor_name", "portfolio_name", "isin", "market_value", "currency") lead_and_lag_whitespace <- withr::local_tempfile() - writeLines(" investor_name,portfolio_name , isin ,market_value,currency\nx,y,z,1,a", lead_and_lag_whitespace) + writeLines(" investor_name,portfolio_name , isin , market_value,currency \nx,y,z,1,a", lead_and_lag_whitespace) + expect_setequal(names(determine_headers(lead_and_lag_whitespace)), proper_names) +}) + +test_that("column names with leading and lagging whitespace are properly determined (double-padded)", { + proper_names <- c("investor_name", "portfolio_name", "isin", "market_value", "currency") + lead_and_lag_whitespace <- withr::local_tempfile() + writeLines(" investor_name,portfolio_name , isin , market_value,currency \nx,y,z,1,a", lead_and_lag_whitespace) + expect_setequal(names(determine_headers(lead_and_lag_whitespace)), proper_names) +}) + +test_that("column names with leading and lagging whitespace are properly determined (tab-padded)", { + proper_names <- c("investor_name", "portfolio_name", "isin", "market_value", "currency") + lead_and_lag_whitespace <- withr::local_tempfile() + writeLines("\tinvestor_name,portfolio_name\t,\tisin\t,\tmarket_value,currency \nx,y,z,1,a", lead_and_lag_whitespace) + expect_setequal(names(determine_headers(lead_and_lag_whitespace)), proper_names) +}) + +test_that("column names with leading and lagging whitespace are properly determined (double-tab-padded)", { + proper_names <- c("investor_name", "portfolio_name", "isin", "market_value", "currency") + lead_and_lag_whitespace <- withr::local_tempfile() + writeLines("\t\tinvestor_name,portfolio_name\t\t,\t\tisin\t\t,\t\tmarket_value,currency \nx,y,z,1,a", lead_and_lag_whitespace) + expect_setequal(names(determine_headers(lead_and_lag_whitespace)), proper_names) +}) + +test_that("column names with leading and lagging whitespace are properly determined (space-and-tab-padded)", { + proper_names <- c("investor_name", "portfolio_name", "isin", "market_value", "currency") + lead_and_lag_whitespace <- withr::local_tempfile() + writeLines(" \tinvestor_name,portfolio_name\t ,\t isin\t\t, \tmarket_value,currency \nx,y,z,1,a", lead_and_lag_whitespace) expect_setequal(names(determine_headers(lead_and_lag_whitespace)), proper_names) })