From 2dd00b8e4bd5c1b3aaaeabae49dfe15f790eba98 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Mon, 17 Jun 2024 13:49:43 +0200 Subject: [PATCH] [read] add `check_names` argument. closes #1050 (#1051) --- R/class-workbook.R | 5 ++++- R/read.R | 10 ++++++++++ man/wbWorkbook.Rd | 3 +++ man/wb_to_df.Rd | 5 +++++ tests/testthat/test-read_from_created_wb.R | 20 ++++++++++++++++++++ 5 files changed, 42 insertions(+), 1 deletion(-) diff --git a/R/class-workbook.R b/R/class-workbook.R index 02077662c..32b86ff34 100644 --- a/R/class-workbook.R +++ b/R/class-workbook.R @@ -2478,6 +2478,7 @@ wbWorkbook <- R6::R6Class( #' @param na.numbers A numeric vector of digits which are to be interpreted as NA. Blank cells will be returned as NA. #' @param fill_merged_cells If TRUE, the value in a merged cell is given to all cells within the merge. #' @param keep_attributes If TRUE additional attributes are returned. (These are used internally to define a cell type.) + #' @param check_names If TRUE then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names. #' @return a data frame to_df = function( sheet, @@ -2501,6 +2502,7 @@ wbWorkbook <- R6::R6Class( types, named_region, keep_attributes = FALSE, + check_names = FALSE, ... ) { @@ -2531,7 +2533,8 @@ wbWorkbook <- R6::R6Class( show_formula = show_formula, convert = convert, types = types, - named_region = named_region + named_region = named_region, + check_names = check_names ) }, diff --git a/R/read.R b/R/read.R index fa4e2fad1..716ede98d 100644 --- a/R/read.R +++ b/R/read.R @@ -59,6 +59,7 @@ #' @param fill_merged_cells If `TRUE`, the value in a merged cell is given to all cells within the merge. #' @param keep_attributes If `TRUE` additional attributes are returned. #' (These are used internally to define a cell type.) +#' @param check_names If `TRUE` then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names. #' @param ... additional arguments #' #' @examples @@ -149,6 +150,7 @@ wb_to_df <- function( types, named_region, keep_attributes = FALSE, + check_names = FALSE, ... ) { @@ -596,6 +598,10 @@ wb_to_df <- function( } if (col_names) { + if (check_names) { + xlsx_cols_names <- make.names(xlsx_cols_names, unique = TRUE) + } + names(z) <- xlsx_cols_names names(tt) <- xlsx_cols_names } @@ -630,6 +636,7 @@ read_xlsx <- function( na.strings = "#N/A", na.numbers = NA, fill_merged_cells = FALSE, + check_names = FALSE, ... ) { @@ -657,6 +664,7 @@ read_xlsx <- function( na.strings = na.strings, na.numbers = na.numbers, fill_merged_cells = fill_merged_cells, + check_names = check_names, ... ) } @@ -679,6 +687,7 @@ wb_read <- function( named_region, na.strings = "NA", na.numbers = NA, + check_names = FALSE, ... ) { @@ -705,6 +714,7 @@ wb_read <- function( named_region = named_region, na.strings = na.strings, na.numbers = na.numbers, + check_names = check_names, ... ) diff --git a/man/wbWorkbook.Rd b/man/wbWorkbook.Rd index 3e1ec8022..fd80f3866 100644 --- a/man/wbWorkbook.Rd +++ b/man/wbWorkbook.Rd @@ -930,6 +930,7 @@ to_df types, named_region, keep_attributes = FALSE, + check_names = FALSE, ... )}\if{html}{\out{}} } @@ -979,6 +980,8 @@ to_df \item{\code{keep_attributes}}{If TRUE additional attributes are returned. (These are used internally to define a cell type.)} +\item{\code{check_names}}{If TRUE then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names.} + \item{\code{...}}{additional arguments} } \if{html}{\out{}} diff --git a/man/wb_to_df.Rd b/man/wb_to_df.Rd index 8fdac82a7..67bb05325 100644 --- a/man/wb_to_df.Rd +++ b/man/wb_to_df.Rd @@ -29,6 +29,7 @@ wb_to_df( types, named_region, keep_attributes = FALSE, + check_names = FALSE, ... ) @@ -48,6 +49,7 @@ read_xlsx( na.strings = "#N/A", na.numbers = NA, fill_merged_cells = FALSE, + check_names = FALSE, ... ) @@ -66,6 +68,7 @@ wb_read( named_region, na.strings = "NA", na.numbers = NA, + check_names = FALSE, ... ) } @@ -121,6 +124,8 @@ If no sheet is selected, the first appearance will be selected. See \code{\link[ \item{keep_attributes}{If \code{TRUE} additional attributes are returned. (These are used internally to define a cell type.)} +\item{check_names}{If \code{TRUE} then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names.} + \item{...}{additional arguments} } \description{ diff --git a/tests/testthat/test-read_from_created_wb.R b/tests/testthat/test-read_from_created_wb.R index 07bf8fa88..5dfd45664 100644 --- a/tests/testthat/test-read_from_created_wb.R +++ b/tests/testthat/test-read_from_created_wb.R @@ -201,3 +201,23 @@ test_that("column names are not missing with col_names = FALSE", { expect_equal(exp, got) }) + +test_that("check_names works", { + + dd <- data.frame( + "a and b" = 1:2, + "a-and-b" = 3:4, + check.names = FALSE + ) + + wb <- write_xlsx(x = dd) + + exp <- c("a and b", "a-and-b") + got <- names(wb_to_df(wb, check_names = FALSE)) + expect_equal(exp, got) + + exp <- c("a.and.b", "a.and.b.1") + got <- names(wb_to_df(wb, check_names = TRUE)) + expect_equal(exp, got) + +})