From d058b8a65fb0ad30658889a39e14a814d0a8431e Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Wed, 24 Jul 2024 19:25:49 +0200 Subject: [PATCH] [read] read shared formulas. closes #1072 (#1091) * [read] read shared formulas * [read] handle wb_to_df() where shared cells origin is outside of the selected dims region * [read] handle all kinds of shared formulas * update NEWS --- NAMESPACE | 3 + NEWS.md | 2 + R/openxlsx2-package.R | 10 +- R/read.R | 36 +++++++ R/utils.R | 107 +++++++++++++++++++++ tests/testthat/test-read_from_created_wb.R | 54 +++++++++++ 6 files changed, 207 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3e6deff24..1c99b6d29 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -182,16 +182,19 @@ importFrom(grDevices,tiff) importFrom(magrittr,"%>%") importFrom(stringi,stri_c) importFrom(stringi,stri_encode) +importFrom(stringi,stri_extract_all_regex) importFrom(stringi,stri_isempty) importFrom(stringi,stri_join) importFrom(stringi,stri_match) importFrom(stringi,stri_match_all_regex) +importFrom(stringi,stri_match_first_regex) importFrom(stringi,stri_opts_collator) importFrom(stringi,stri_order) importFrom(stringi,stri_pad_left) importFrom(stringi,stri_rand_strings) importFrom(stringi,stri_read_lines) importFrom(stringi,stri_replace_all_fixed) +importFrom(stringi,stri_replace_all_regex) importFrom(stringi,stri_split_fixed) importFrom(stringi,stri_split_regex) importFrom(stringi,stri_sub) diff --git a/NEWS.md b/NEWS.md index db87774f7..e8fc1a548 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Experimental support for shared formulas. Similar to spreadsheet software, when a cell is dragged to horizontally or vertically. This requires the formula to be written only for a single cell and it is filled by spreadsheet software for the remaining dimensions. `wb_add_formula()` gained a new argument `shared`. [1074](https://github.com/JanMarvin/openxlsx2/pull/1074) +* Experimental support for reading shared formulas. If `show_formula` is used with `wb_to_df()`, we try to show the value that is shown in spreadsheet software. [1091](https://github.com/JanMarvin/openxlsx2/pull/1091) + *************************************************************************** diff --git a/R/openxlsx2-package.R b/R/openxlsx2-package.R index 6efe91cf0..4c4ca7389 100644 --- a/R/openxlsx2-package.R +++ b/R/openxlsx2-package.R @@ -12,11 +12,11 @@ #' @import R6 #' @importFrom grDevices bmp col2rgb colors dev.copy dev.list dev.off jpeg palette png rgb tiff #' @importFrom magrittr %>% -#' @importFrom stringi stri_c stri_encode stri_isempty stri_join stri_match -#' stri_match_all_regex stri_order stri_opts_collator stri_pad_left -#' stri_rand_strings stri_read_lines stri_replace_all_fixed -#' stri_split_fixed stri_split_regex stri_sub stri_unescape_unicode -#' stri_unique +#' @importFrom stringi stri_c stri_encode stri_extract_all_regex +#' stri_isempty stri_join stri_match stri_match_all_regex stri_match_first_regex +#' stri_order stri_opts_collator stri_pad_left stri_rand_strings stri_read_lines +#' stri_replace_all_fixed stri_replace_all_regex stri_split_fixed +#' stri_split_regex stri_sub stri_unescape_unicode stri_unique #' @importFrom utils download.file head menu read.csv unzip #' @importFrom zip zip #' diff --git a/R/read.R b/R/read.R index 716ede98d..73679ffa6 100644 --- a/R/read.R +++ b/R/read.R @@ -424,9 +424,45 @@ wb_to_df <- function( } if (show_formula) { + + if (any(cc$f_t == "shared")) { + + # depending on the sheet, this might require updates to many cells + # TODO reduce this to cells, that are part of `cc`. Currently we + # might waste time, updating cells that are not visible to the user + cc_shared <- wb$worksheets[[sheet]]$sheet_data$cc + cc_shared <- cc_shared[cc_shared$f_t == "shared", ] + cc_shared <- cc_shared[order(as.integer(cc_shared$f_si)), ] + + # carry forward the shared formula + cc_shared$f <- ave2(cc_shared$f, cc_shared$f_si, carry_forward) + + # calculate differences from the formula cell, to the shared cells + cc_shared$cols <- ave2(col2int(cc_shared$c_r), cc_shared$f_si, calc_distance) + cc_shared$rows <- ave2(as.integer(cc_shared$row_r), cc_shared$f_si, calc_distance) + + # begin updating the formulas. find a1 notion, get the next cell, update formula + cells <- find_a1_notation(cc_shared$f) + repls <- vector("list", length = length(cells)) + + for (i in seq_along(cells)) { + repls[[i]] <- next_cell(cells[[i]], cc_shared$cols[i], cc_shared$rows[i]) + } + + cc_shared$f <- replace_a1_notation(cc_shared$f, repls) + cc_shared$cols <- NULL + cc_shared$rows <- NULL + + # reduce and assign + cc_shared <- cc_shared[which(cc_shared$r %in% cc$r), ] + cc[match(cc_shared$r, cc$r), ] <- cc_shared + + } + sel <- cc$f != "" cc$val[sel] <- replaceXMLEntities(cc$f[sel]) cc$typ[sel] <- "s" + } # convert "na_string" to missing diff --git a/R/utils.R b/R/utils.R index 121bd739f..52011ddc8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1274,3 +1274,110 @@ get_dims <- function(dims, check = FALSE, cols = FALSE, rows = FALSE) { return(rows) } + +#' the function to find the cell +#' this function is used with `show_formula` in [wb_to_df()] +#' @param string the formula +#' @family internal +#' @noRd +find_a1_notation <- function(string) { + pattern <- "\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?" + stringi::stri_extract_all_regex(string, pattern) +} + +#' the function to replace the next cell +#' this function is used with `show_formula` in [wb_to_df()] +#' @param cell the cell from a shared formula [find_a1_notation()] +#' @param cols,rows an integer where to move the cell +#' @family internal +#' @noRd +next_cell <- function(cell, cols = 0L, rows = 0L) { + + z <- vector("character", length(cell)) + for (i in seq_along(cell)) { + # Match individual cells and ranges + match <- stringi::stri_match_first_regex(cell[[i]], "^(\\$?)([A-Z]+)(\\$?)(\\d+)(:(\\$?)([A-Z]+)(\\$?)(\\d+))?$") + + # if shared formula contains no A1 reference + if (is.na(match[1, 1])) return(NA_character_) + + # Extract parts of the cell + fixed_col1 <- match[2] + col1 <- match[3] + fixed_row1 <- match[4] + row1 <- as.numeric(match[5]) + + fixed_col2 <- match[7] + col2 <- match[8] + fixed_row2 <- match[9] + row2 <- as.numeric(match[10]) + + if (is.na(col2)) { + + # Handle individual cell + next_col <- if (fixed_col1 == "") int2col(col2int(col1) + cols) else col1 + next_row <- if (fixed_row1 == "") row1 + rows else row1 + z[i] <- paste0(fixed_col1, next_col, fixed_row1, next_row) + + } else { + + # Handle range + next_col1 <- if (fixed_col1 == "") int2col(col2int(col1) + cols) else col1 + next_row1 <- if (fixed_row1 == "") row1 + rows else row1 + next_col2 <- if (fixed_col2 == "") int2col(col2int(col2) + cols) else col2 + next_row2 <- if (fixed_col2 == "") row2 + rows else row2 + z[i] <- paste0(fixed_col1, next_col1, fixed_row1, next_row1, ":", fixed_col2, next_col2, fixed_row2, next_row2) + + } + } + + z +} + +#' the replacement function for shared formulas +#' this function is used with `show_formula` in [wb_to_df()] +#' @param string the formula +#' @param replacements the replacements, from [next_cell()] +#' @family internal +#' @noRd +replace_a1_notation <- function(strings, replacements) { + + # create sprintf-able strings + strings <- stringi::stri_replace_all_regex( + strings, + "\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?", + "%s" + ) + + # insert replacements into string + repl_fun <- function(str, y) { + if (!anyNA(y)) # else keep str as is + str <- do.call(sprintf, c(str, as.list(y))) + str + } + + z <- vector("character", length(strings)) + for (i in seq_along(strings)) { + z[i] <- repl_fun(strings[[i]], replacements[[i]]) + } + + z +} + +# extend shared formula into all formula cells +carry_forward <- function(x) { + rep(x[1], length(x)) +} + +# calculate difference for each shared formula to the origin +calc_distance <- function(x) { + x - x[1] +} + +# ave function to avoid a dependency on stats. if we ever rely on stats, +# this can be replaced by stats::ave +ave2 <- function(x, y, FUN) { + g <- as.factor(y) + split(x, g) <- lapply(split(x, g), FUN) + x +} diff --git a/tests/testthat/test-read_from_created_wb.R b/tests/testthat/test-read_from_created_wb.R index 5dfd45664..4c778b34f 100644 --- a/tests/testthat/test-read_from_created_wb.R +++ b/tests/testthat/test-read_from_created_wb.R @@ -221,3 +221,57 @@ test_that("check_names works", { expect_equal(exp, got) }) + +test_that("shared formulas are handled", { + + wb <- wb_workbook()$ + add_worksheet()$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ + add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E12", shared = TRUE) + + exp <- structure( + c("SUM($A4:A4) + B$1", "SUM($A5:A5) + B$1", "SUM($A6:A6) + B$1", + "SUM($A4:B4) + C$1", "SUM($A5:B5) + C$1", "SUM($A6:B6) + C$1", + "SUM($A4:C4) + D$1", "SUM($A5:C5) + D$1", "SUM($A6:C6) + D$1"), + dim = c(3L, 3L), + dimnames = list(c("10", "11", "12"), c("A", "B", "C")) + ) + got <- as.matrix(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE)) + expect_equal(exp, got) + + # shared formula w/o A1 notation + wb <- wb_workbook()$ + add_worksheet()$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ + add_formula(x = "TODAY()", dims = "A8:E12", shared = TRUE) + + exp <- rep("TODAY()", 9) + got <- unname(unlist(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE))) + expect_equal(exp, got) + + # a bunch of mixed shared formulas + wb <- wb_workbook()$ + add_worksheet()$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ + add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E9", shared = TRUE)$ + add_formula(x = "SUM($A2:A2)", dims = "A10:E11", shared = TRUE)$ + add_formula(x = "A2", dims = "A12:E13", shared = TRUE) + + exp <- c("SUM($A2:B2) + C$1", "SUM($A3:B3) + C$1", "SUM($A2:B2)", "SUM($A3:B3)", + "B2", "B3", "SUM($A2:C2) + D$1", "SUM($A3:C3) + D$1", "SUM($A2:C2)", + "SUM($A3:C3)", "C2", "C3", "SUM($A2:D2) + E$1", "SUM($A3:D3) + E$1", + "SUM($A2:D2)", "SUM($A3:D3)", "D2", "D3") + got <- unname(unlist(wb_to_df(wb, dims = "B8:D13", col_names = FALSE, show_formula = TRUE))) + expect_equal(exp, got) + + # make sure that replacements work as expected + wb <- wb_workbook()$ + add_worksheet()$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ + add_formula(x = "A2 + B2", dims = "A12:E13", shared = TRUE) + + exp <- c("A2 + B2", "A3 + B3", "B2 + C2", "B3 + C3", "C2 + D2", "C3 + D3", "D2 + E2", "D3 + E3") + got <- unname(unlist(wb_to_df(wb, dims = "A12:D13", col_names = FALSE, show_formula = TRUE))) + expect_equal(exp, got) + +})