From 2dd00b8e4bd5c1b3aaaeabae49dfe15f790eba98 Mon Sep 17 00:00:00 2001
From: Jan Marvin Garbuszus <jan.garbuszus@ruhr-uni-bochum.de>
Date: Mon, 17 Jun 2024 13:49:43 +0200
Subject: [PATCH] [read] add `check_names` argument. closes #1050 (#1051)

---
 R/class-workbook.R                         |  5 ++++-
 R/read.R                                   | 10 ++++++++++
 man/wbWorkbook.Rd                          |  3 +++
 man/wb_to_df.Rd                            |  5 +++++
 tests/testthat/test-read_from_created_wb.R | 20 ++++++++++++++++++++
 5 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/R/class-workbook.R b/R/class-workbook.R
index 02077662c..32b86ff34 100644
--- a/R/class-workbook.R
+++ b/R/class-workbook.R
@@ -2478,6 +2478,7 @@ wbWorkbook <- R6::R6Class(
     #' @param na.numbers A numeric vector of digits which are to be interpreted as NA. Blank cells will be returned as NA.
     #' @param fill_merged_cells If TRUE, the value in a merged cell is given to all cells within the merge.
     #' @param keep_attributes If TRUE additional attributes are returned. (These are used internally to define a cell type.)
+    #' @param check_names If TRUE then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names.
     #' @return a data frame
     to_df = function(
       sheet,
@@ -2501,6 +2502,7 @@ wbWorkbook <- R6::R6Class(
       types,
       named_region,
       keep_attributes   = FALSE,
+      check_names       = FALSE,
       ...
     ) {
 
@@ -2531,7 +2533,8 @@ wbWorkbook <- R6::R6Class(
         show_formula      = show_formula,
         convert           = convert,
         types             = types,
-        named_region      = named_region
+        named_region      = named_region,
+        check_names       = check_names
       )
     },
 
diff --git a/R/read.R b/R/read.R
index fa4e2fad1..716ede98d 100644
--- a/R/read.R
+++ b/R/read.R
@@ -59,6 +59,7 @@
 #' @param fill_merged_cells If `TRUE`, the value in a merged cell is given to all cells within the merge.
 #' @param keep_attributes If `TRUE` additional attributes are returned.
 #'   (These are used internally to define a cell type.)
+#' @param check_names If `TRUE` then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names.
 #' @param ... additional arguments
 #'
 #' @examples
@@ -149,6 +150,7 @@ wb_to_df <- function(
     types,
     named_region,
     keep_attributes   = FALSE,
+    check_names       = FALSE,
     ...
 ) {
 
@@ -596,6 +598,10 @@ wb_to_df <- function(
   }
 
   if (col_names) {
+    if (check_names) {
+      xlsx_cols_names <- make.names(xlsx_cols_names, unique = TRUE)
+    }
+
     names(z)  <- xlsx_cols_names
     names(tt) <- xlsx_cols_names
   }
@@ -630,6 +636,7 @@ read_xlsx <- function(
   na.strings        = "#N/A",
   na.numbers        = NA,
   fill_merged_cells = FALSE,
+  check_names       = FALSE,
   ...
 ) {
 
@@ -657,6 +664,7 @@ read_xlsx <- function(
     na.strings        = na.strings,
     na.numbers        = na.numbers,
     fill_merged_cells = fill_merged_cells,
+    check_names       = check_names,
     ...
   )
 }
@@ -679,6 +687,7 @@ wb_read <- function(
   named_region,
   na.strings      = "NA",
   na.numbers      = NA,
+  check_names     = FALSE,
   ...
 ) {
 
@@ -705,6 +714,7 @@ wb_read <- function(
     named_region    = named_region,
     na.strings      = na.strings,
     na.numbers      = na.numbers,
+    check_names     = check_names,
     ...
   )
 
diff --git a/man/wbWorkbook.Rd b/man/wbWorkbook.Rd
index 3e1ec8022..fd80f3866 100644
--- a/man/wbWorkbook.Rd
+++ b/man/wbWorkbook.Rd
@@ -930,6 +930,7 @@ to_df
   types,
   named_region,
   keep_attributes = FALSE,
+  check_names = FALSE,
   ...
 )}\if{html}{\out{</div>}}
 }
@@ -979,6 +980,8 @@ to_df
 
 \item{\code{keep_attributes}}{If TRUE additional attributes are returned. (These are used internally to define a cell type.)}
 
+\item{\code{check_names}}{If TRUE then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names.}
+
 \item{\code{...}}{additional arguments}
 }
 \if{html}{\out{</div>}}
diff --git a/man/wb_to_df.Rd b/man/wb_to_df.Rd
index 8fdac82a7..67bb05325 100644
--- a/man/wb_to_df.Rd
+++ b/man/wb_to_df.Rd
@@ -29,6 +29,7 @@ wb_to_df(
   types,
   named_region,
   keep_attributes = FALSE,
+  check_names = FALSE,
   ...
 )
 
@@ -48,6 +49,7 @@ read_xlsx(
   na.strings = "#N/A",
   na.numbers = NA,
   fill_merged_cells = FALSE,
+  check_names = FALSE,
   ...
 )
 
@@ -66,6 +68,7 @@ wb_read(
   named_region,
   na.strings = "NA",
   na.numbers = NA,
+  check_names = FALSE,
   ...
 )
 }
@@ -121,6 +124,8 @@ If no sheet is selected, the first appearance will be selected. See \code{\link[
 \item{keep_attributes}{If \code{TRUE} additional attributes are returned.
 (These are used internally to define a cell type.)}
 
+\item{check_names}{If \code{TRUE} then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names.}
+
 \item{...}{additional arguments}
 }
 \description{
diff --git a/tests/testthat/test-read_from_created_wb.R b/tests/testthat/test-read_from_created_wb.R
index 07bf8fa88..5dfd45664 100644
--- a/tests/testthat/test-read_from_created_wb.R
+++ b/tests/testthat/test-read_from_created_wb.R
@@ -201,3 +201,23 @@ test_that("column names are not missing with col_names = FALSE", {
   expect_equal(exp, got)
 
 })
+
+test_that("check_names works", {
+
+  dd <- data.frame(
+    "a and b"  = 1:2,
+    "a-and-b" = 3:4,
+    check.names = FALSE
+  )
+
+  wb <- write_xlsx(x = dd)
+
+  exp <- c("a and b", "a-and-b")
+  got <- names(wb_to_df(wb, check_names = FALSE))
+  expect_equal(exp, got)
+
+  exp <- c("a.and.b", "a.and.b.1")
+  got <- names(wb_to_df(wb, check_names = TRUE))
+  expect_equal(exp, got)
+
+})