diff --git a/NAMESPACE b/NAMESPACE index 3e6deff24..1c99b6d29 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -182,16 +182,19 @@ importFrom(grDevices,tiff) importFrom(magrittr,"%>%") importFrom(stringi,stri_c) importFrom(stringi,stri_encode) +importFrom(stringi,stri_extract_all_regex) importFrom(stringi,stri_isempty) importFrom(stringi,stri_join) importFrom(stringi,stri_match) importFrom(stringi,stri_match_all_regex) +importFrom(stringi,stri_match_first_regex) importFrom(stringi,stri_opts_collator) importFrom(stringi,stri_order) importFrom(stringi,stri_pad_left) importFrom(stringi,stri_rand_strings) importFrom(stringi,stri_read_lines) importFrom(stringi,stri_replace_all_fixed) +importFrom(stringi,stri_replace_all_regex) importFrom(stringi,stri_split_fixed) importFrom(stringi,stri_split_regex) importFrom(stringi,stri_sub) diff --git a/R/openxlsx2-package.R b/R/openxlsx2-package.R index 6efe91cf0..4c4ca7389 100644 --- a/R/openxlsx2-package.R +++ b/R/openxlsx2-package.R @@ -12,11 +12,11 @@ #' @import R6 #' @importFrom grDevices bmp col2rgb colors dev.copy dev.list dev.off jpeg palette png rgb tiff #' @importFrom magrittr %>% -#' @importFrom stringi stri_c stri_encode stri_isempty stri_join stri_match -#' stri_match_all_regex stri_order stri_opts_collator stri_pad_left -#' stri_rand_strings stri_read_lines stri_replace_all_fixed -#' stri_split_fixed stri_split_regex stri_sub stri_unescape_unicode -#' stri_unique +#' @importFrom stringi stri_c stri_encode stri_extract_all_regex +#' stri_isempty stri_join stri_match stri_match_all_regex stri_match_first_regex +#' stri_order stri_opts_collator stri_pad_left stri_rand_strings stri_read_lines +#' stri_replace_all_fixed stri_replace_all_regex stri_split_fixed +#' stri_split_regex stri_sub stri_unescape_unicode stri_unique #' @importFrom utils download.file head menu read.csv unzip #' @importFrom zip zip #' diff --git a/R/read.R b/R/read.R index 13d38e0ea..73679ffa6 100644 --- a/R/read.R +++ b/R/read.R @@ -427,54 +427,29 @@ wb_to_df <- function( if (any(cc$f_t == "shared")) { + # depending on the sheet, this might require updates to many cells + # TODO reduce this to cells, that are part of `cc`. Currently we + # might waste time, updating cells that are not visible to the user cc_shared <- wb$worksheets[[sheet]]$sheet_data$cc cc_shared <- cc_shared[cc_shared$f_t == "shared", ] cc_shared <- cc_shared[order(as.integer(cc_shared$f_si)), ] - # extend shared formula into all formula cells - carry_forward <- function(x) { - last_val <- NA - for (i in seq_along(x)) { - if (x[i] != "") { - last_val <- x[i] - } else { - x[i] <- last_val - } - } - return(x) - } + # carry forward the shared formula + cc_shared$f <- ave2(cc_shared$f, cc_shared$f_si, carry_forward) - cc_shared$f <- ave( - cc_shared$f, - as.integer(cc_shared$f_si), - FUN = carry_forward - ) - - # calculate difference for each shared formula to the origin - calc_distance <- function(x) { - x - x[1] - } + # calculate differences from the formula cell, to the shared cells + cc_shared$cols <- ave2(col2int(cc_shared$c_r), cc_shared$f_si, calc_distance) + cc_shared$rows <- ave2(as.integer(cc_shared$row_r), cc_shared$f_si, calc_distance) - cc_shared$cols <- ave( - col2int(cc_shared$c_r), - as.integer(cc_shared$f_si), - FUN = calc_distance - ) - cc_shared$rows <- ave( - as.integer(cc_shared$row_r), - as.integer(cc_shared$f_si), - FUN = calc_distance - ) - - # TODO skip non A1 shared cells + # begin updating the formulas. find a1 notion, get the next cell, update formula cells <- find_a1_notation(cc_shared$f) - repls <- cells + repls <- vector("list", length = length(cells)) - for (i in seq_len(nrow(cells))) { - repls[i, ] <- next_cell(cells[i, ], cc_shared$cols[i], cc_shared$rows[i]) + for (i in seq_along(cells)) { + repls[[i]] <- next_cell(cells[[i]], cc_shared$cols[i], cc_shared$rows[i]) } - cc_shared$f <- replace_a1_notation(cc_shared["f"], cells, repls) + cc_shared$f <- replace_a1_notation(cc_shared$f, repls) cc_shared$cols <- NULL cc_shared$rows <- NULL diff --git a/R/utils.R b/R/utils.R index 1e11cc871..5741815fe 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1276,26 +1276,30 @@ get_dims <- function(dims, check = FALSE, cols = FALSE, rows = FALSE) { } #' the function to find the cell +#' this function is used with `show_formula` in [wb_to_df()] #' @param string the formula #' @family internal #' @noRd find_a1_notation <- function(string) { pattern <- "\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?" - as.data.frame(do.call("rbind", stringi::stri_extract_all_regex(string, pattern))) + stringi::stri_extract_all_regex(string, pattern) } #' the function to replace the next cell +#' this function is used with `show_formula` in [wb_to_df()] #' @param cell the cell from a shared formula [find_a1_notation()] #' @param cols,rows an integer where to move the cell #' @family internal #' @noRd next_cell <- function(cell, cols = 0L, rows = 0L) { - z <- NULL - for (i in cell) { + + z <- vector("character", length(cell)) + for (i in seq_along(cell)) { # Match individual cells and ranges - match <- stringi::stri_match_first_regex(i, "^(\\$?)([A-Z]+)(\\$?)(\\d+)(:(\\$?)([A-Z]+)(\\$?)(\\d+))?$") + match <- stringi::stri_match_first_regex(cell[[i]], "^(\\$?)([A-Z]+)(\\$?)(\\d+)(:(\\$?)([A-Z]+)(\\$?)(\\d+))?$") - if (is.na(match[1, 1])) stop("Invalid A1 notation") + # if shared formula contains no A1 reference + if (is.na(match[1, 1])) return(NA_character_) # Extract parts of the cell fixed_col1 <- match[2] @@ -1313,7 +1317,7 @@ next_cell <- function(cell, cols = 0L, rows = 0L) { # Handle individual cell next_col <- if (fixed_col1 == "") int2col(col2int(col1) + cols) else col1 next_row <- if (fixed_row1 == "") row1 + rows else row1 - z <- c(z, paste0(fixed_col1, next_col, fixed_row1, next_row)) + z[i] <- paste0(fixed_col1, next_col, fixed_row1, next_row) } else { @@ -1322,39 +1326,58 @@ next_cell <- function(cell, cols = 0L, rows = 0L) { next_row1 <- if (fixed_row1 == "") row1 + rows else row1 next_col2 <- if (fixed_col2 == "") int2col(col2int(col2) + cols) else col2 next_row2 <- if (fixed_col2 == "") row2 + rows else row2 - z <- c(z, paste0(fixed_col1, next_col1, fixed_row1, next_row1, ":", fixed_col2, next_col2, fixed_row2, next_row2)) + z[i] <- paste0(fixed_col1, next_col1, fixed_row1, next_row1, ":", fixed_col2, next_col2, fixed_row2, next_row2) } } - as.data.frame(matrix(z, ncol = 2)) + z } #' the replacement function for shared formulas +#' this function is used with `show_formula` in [wb_to_df()] #' @param string the formula -#' @param matches the matches, obtained via [find_a1_notation()] #' @param replacements the replacements, from [next_cell()] #' @family internal #' @noRd -replace_a1_notation <- function(strings, matches, replacements) { - - # strings <- data.frame(rep(string, NROW(replacements))) +replace_a1_notation <- function(strings, replacements) { - # matches <- as.data.frame( - # matrix(matches, nrow = NROW(replacements), ncol = length(matches), byrow = TRUE) - # ) + # create sprintf-able strings + strings <- stringi::stri_replace_all_regex( + strings, + "\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?", + "%s" + ) - repl_fun <- function(str, x, y) { - for (i in seq_along(x)) { - str <- stringi::stri_replace_first_fixed(str, x[i], y[i]) - } + # insert replacements into string + repl_fun <- function(str, y) { + if (!anyNA(y)) # else keep str as is + str <- do.call(sprintf, c(str, as.list(y))) str } - z <- NULL - for (i in seq_len(NROW(strings))) { - z <- c(z, repl_fun(strings[i, ], matches[i, ], replacements[i, ])) + z <- vector("character", length(strings)) + for (i in seq_along(strings)) { + z[i] <- repl_fun(strings[[i]], replacements[[i]]) } z } + +# extend shared formula into all formula cells +carry_forward <- function(x) { + rep(x[1], length(x)) +} + +# calculate difference for each shared formula to the origin +calc_distance <- function(x) { + x - x[1] +} + +# ave function to avoid a dependency on stats. if we ever rely on stats, +# this can be replaced by stats::ave +ave2 <- function (x, y, FUN) { + g <- as.factor(y) + split(x, g) <- lapply(split(x, g), FUN) + x +} diff --git a/tests/testthat/test-read_from_created_wb.R b/tests/testthat/test-read_from_created_wb.R index 2e17a68d3..4c778b34f 100644 --- a/tests/testthat/test-read_from_created_wb.R +++ b/tests/testthat/test-read_from_created_wb.R @@ -223,9 +223,10 @@ test_that("check_names works", { }) test_that("shared formulas are handled", { + wb <- wb_workbook()$ add_worksheet()$ - add_data(x = matrix(rnorm(5*5), ncol = 5, nrow = 5))$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E12", shared = TRUE) exp <- structure( @@ -237,4 +238,40 @@ test_that("shared formulas are handled", { ) got <- as.matrix(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE)) expect_equal(exp, got) + + # shared formula w/o A1 notation + wb <- wb_workbook()$ + add_worksheet()$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ + add_formula(x = "TODAY()", dims = "A8:E12", shared = TRUE) + + exp <- rep("TODAY()", 9) + got <- unname(unlist(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE))) + expect_equal(exp, got) + + # a bunch of mixed shared formulas + wb <- wb_workbook()$ + add_worksheet()$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ + add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E9", shared = TRUE)$ + add_formula(x = "SUM($A2:A2)", dims = "A10:E11", shared = TRUE)$ + add_formula(x = "A2", dims = "A12:E13", shared = TRUE) + + exp <- c("SUM($A2:B2) + C$1", "SUM($A3:B3) + C$1", "SUM($A2:B2)", "SUM($A3:B3)", + "B2", "B3", "SUM($A2:C2) + D$1", "SUM($A3:C3) + D$1", "SUM($A2:C2)", + "SUM($A3:C3)", "C2", "C3", "SUM($A2:D2) + E$1", "SUM($A3:D3) + E$1", + "SUM($A2:D2)", "SUM($A3:D3)", "D2", "D3") + got <- unname(unlist(wb_to_df(wb, dims = "B8:D13", col_names = FALSE, show_formula = TRUE))) + expect_equal(exp, got) + + # make sure that replacements work as expected + wb <- wb_workbook()$ + add_worksheet()$ + add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$ + add_formula(x = "A2 + B2", dims = "A12:E13", shared = TRUE) + + exp <- c("A2 + B2", "A3 + B3", "B2 + C2", "B3 + C3", "C2 + D2", "C3 + D3", "D2 + E2", "D3 + E3") + got <- unname(unlist(wb_to_df(wb, dims = "A12:D13", col_names = FALSE, show_formula = TRUE))) + expect_equal(exp, got) + })