Skip to content

Commit

Permalink
[read] read shared formulas. closes #1072 (#1091)
Browse files Browse the repository at this point in the history
* [read] read shared formulas

* [read] handle wb_to_df() where shared cells origin is outside of the selected dims region

* [read] handle all kinds of shared formulas

* update NEWS
  • Loading branch information
JanMarvin authored Jul 24, 2024
1 parent 228c51f commit d058b8a
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 5 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -182,16 +182,19 @@ importFrom(grDevices,tiff)
importFrom(magrittr,"%>%")
importFrom(stringi,stri_c)
importFrom(stringi,stri_encode)
importFrom(stringi,stri_extract_all_regex)
importFrom(stringi,stri_isempty)
importFrom(stringi,stri_join)
importFrom(stringi,stri_match)
importFrom(stringi,stri_match_all_regex)
importFrom(stringi,stri_match_first_regex)
importFrom(stringi,stri_opts_collator)
importFrom(stringi,stri_order)
importFrom(stringi,stri_pad_left)
importFrom(stringi,stri_rand_strings)
importFrom(stringi,stri_read_lines)
importFrom(stringi,stri_replace_all_fixed)
importFrom(stringi,stri_replace_all_regex)
importFrom(stringi,stri_split_fixed)
importFrom(stringi,stri_split_regex)
importFrom(stringi,stri_sub)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

* Experimental support for shared formulas. Similar to spreadsheet software, when a cell is dragged to horizontally or vertically. This requires the formula to be written only for a single cell and it is filled by spreadsheet software for the remaining dimensions. `wb_add_formula()` gained a new argument `shared`. [1074](https://github.com/JanMarvin/openxlsx2/pull/1074)

* Experimental support for reading shared formulas. If `show_formula` is used with `wb_to_df()`, we try to show the value that is shown in spreadsheet software. [1091](https://github.com/JanMarvin/openxlsx2/pull/1091)


***************************************************************************

Expand Down
10 changes: 5 additions & 5 deletions R/openxlsx2-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
#' @import R6
#' @importFrom grDevices bmp col2rgb colors dev.copy dev.list dev.off jpeg palette png rgb tiff
#' @importFrom magrittr %>%
#' @importFrom stringi stri_c stri_encode stri_isempty stri_join stri_match
#' stri_match_all_regex stri_order stri_opts_collator stri_pad_left
#' stri_rand_strings stri_read_lines stri_replace_all_fixed
#' stri_split_fixed stri_split_regex stri_sub stri_unescape_unicode
#' stri_unique
#' @importFrom stringi stri_c stri_encode stri_extract_all_regex
#' stri_isempty stri_join stri_match stri_match_all_regex stri_match_first_regex
#' stri_order stri_opts_collator stri_pad_left stri_rand_strings stri_read_lines
#' stri_replace_all_fixed stri_replace_all_regex stri_split_fixed
#' stri_split_regex stri_sub stri_unescape_unicode stri_unique
#' @importFrom utils download.file head menu read.csv unzip
#' @importFrom zip zip
#'
Expand Down
36 changes: 36 additions & 0 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,45 @@ wb_to_df <- function(
}

if (show_formula) {

if (any(cc$f_t == "shared")) {

# depending on the sheet, this might require updates to many cells
# TODO reduce this to cells, that are part of `cc`. Currently we
# might waste time, updating cells that are not visible to the user
cc_shared <- wb$worksheets[[sheet]]$sheet_data$cc
cc_shared <- cc_shared[cc_shared$f_t == "shared", ]
cc_shared <- cc_shared[order(as.integer(cc_shared$f_si)), ]

# carry forward the shared formula
cc_shared$f <- ave2(cc_shared$f, cc_shared$f_si, carry_forward)

# calculate differences from the formula cell, to the shared cells
cc_shared$cols <- ave2(col2int(cc_shared$c_r), cc_shared$f_si, calc_distance)
cc_shared$rows <- ave2(as.integer(cc_shared$row_r), cc_shared$f_si, calc_distance)

# begin updating the formulas. find a1 notion, get the next cell, update formula
cells <- find_a1_notation(cc_shared$f)
repls <- vector("list", length = length(cells))

for (i in seq_along(cells)) {
repls[[i]] <- next_cell(cells[[i]], cc_shared$cols[i], cc_shared$rows[i])
}

cc_shared$f <- replace_a1_notation(cc_shared$f, repls)
cc_shared$cols <- NULL
cc_shared$rows <- NULL

# reduce and assign
cc_shared <- cc_shared[which(cc_shared$r %in% cc$r), ]
cc[match(cc_shared$r, cc$r), ] <- cc_shared

}

sel <- cc$f != ""
cc$val[sel] <- replaceXMLEntities(cc$f[sel])
cc$typ[sel] <- "s"

}

# convert "na_string" to missing
Expand Down
107 changes: 107 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1274,3 +1274,110 @@ get_dims <- function(dims, check = FALSE, cols = FALSE, rows = FALSE) {
return(rows)

}

#' the function to find the cell
#' this function is used with `show_formula` in [wb_to_df()]
#' @param string the formula
#' @family internal
#' @noRd
find_a1_notation <- function(string) {
pattern <- "\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?"
stringi::stri_extract_all_regex(string, pattern)
}

#' the function to replace the next cell
#' this function is used with `show_formula` in [wb_to_df()]
#' @param cell the cell from a shared formula [find_a1_notation()]
#' @param cols,rows an integer where to move the cell
#' @family internal
#' @noRd
next_cell <- function(cell, cols = 0L, rows = 0L) {

z <- vector("character", length(cell))
for (i in seq_along(cell)) {
# Match individual cells and ranges
match <- stringi::stri_match_first_regex(cell[[i]], "^(\\$?)([A-Z]+)(\\$?)(\\d+)(:(\\$?)([A-Z]+)(\\$?)(\\d+))?$")

# if shared formula contains no A1 reference
if (is.na(match[1, 1])) return(NA_character_)

# Extract parts of the cell
fixed_col1 <- match[2]
col1 <- match[3]
fixed_row1 <- match[4]
row1 <- as.numeric(match[5])

fixed_col2 <- match[7]
col2 <- match[8]
fixed_row2 <- match[9]
row2 <- as.numeric(match[10])

if (is.na(col2)) {

# Handle individual cell
next_col <- if (fixed_col1 == "") int2col(col2int(col1) + cols) else col1
next_row <- if (fixed_row1 == "") row1 + rows else row1
z[i] <- paste0(fixed_col1, next_col, fixed_row1, next_row)

} else {

# Handle range
next_col1 <- if (fixed_col1 == "") int2col(col2int(col1) + cols) else col1
next_row1 <- if (fixed_row1 == "") row1 + rows else row1
next_col2 <- if (fixed_col2 == "") int2col(col2int(col2) + cols) else col2
next_row2 <- if (fixed_col2 == "") row2 + rows else row2
z[i] <- paste0(fixed_col1, next_col1, fixed_row1, next_row1, ":", fixed_col2, next_col2, fixed_row2, next_row2)

}
}

z
}

#' the replacement function for shared formulas
#' this function is used with `show_formula` in [wb_to_df()]
#' @param string the formula
#' @param replacements the replacements, from [next_cell()]
#' @family internal
#' @noRd
replace_a1_notation <- function(strings, replacements) {

# create sprintf-able strings
strings <- stringi::stri_replace_all_regex(
strings,
"\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?",
"%s"
)

# insert replacements into string
repl_fun <- function(str, y) {
if (!anyNA(y)) # else keep str as is
str <- do.call(sprintf, c(str, as.list(y)))
str
}

z <- vector("character", length(strings))
for (i in seq_along(strings)) {
z[i] <- repl_fun(strings[[i]], replacements[[i]])
}

z
}

# extend shared formula into all formula cells
carry_forward <- function(x) {
rep(x[1], length(x))
}

# calculate difference for each shared formula to the origin
calc_distance <- function(x) {
x - x[1]
}

# ave function to avoid a dependency on stats. if we ever rely on stats,
# this can be replaced by stats::ave
ave2 <- function(x, y, FUN) {
g <- as.factor(y)
split(x, g) <- lapply(split(x, g), FUN)
x
}
54 changes: 54 additions & 0 deletions tests/testthat/test-read_from_created_wb.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,57 @@ test_that("check_names works", {
expect_equal(exp, got)

})

test_that("shared formulas are handled", {

wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E12", shared = TRUE)

exp <- structure(
c("SUM($A4:A4) + B$1", "SUM($A5:A5) + B$1", "SUM($A6:A6) + B$1",
"SUM($A4:B4) + C$1", "SUM($A5:B5) + C$1", "SUM($A6:B6) + C$1",
"SUM($A4:C4) + D$1", "SUM($A5:C5) + D$1", "SUM($A6:C6) + D$1"),
dim = c(3L, 3L),
dimnames = list(c("10", "11", "12"), c("A", "B", "C"))
)
got <- as.matrix(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE))
expect_equal(exp, got)

# shared formula w/o A1 notation
wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "TODAY()", dims = "A8:E12", shared = TRUE)

exp <- rep("TODAY()", 9)
got <- unname(unlist(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE)))
expect_equal(exp, got)

# a bunch of mixed shared formulas
wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E9", shared = TRUE)$
add_formula(x = "SUM($A2:A2)", dims = "A10:E11", shared = TRUE)$
add_formula(x = "A2", dims = "A12:E13", shared = TRUE)

exp <- c("SUM($A2:B2) + C$1", "SUM($A3:B3) + C$1", "SUM($A2:B2)", "SUM($A3:B3)",
"B2", "B3", "SUM($A2:C2) + D$1", "SUM($A3:C3) + D$1", "SUM($A2:C2)",
"SUM($A3:C3)", "C2", "C3", "SUM($A2:D2) + E$1", "SUM($A3:D3) + E$1",
"SUM($A2:D2)", "SUM($A3:D3)", "D2", "D3")
got <- unname(unlist(wb_to_df(wb, dims = "B8:D13", col_names = FALSE, show_formula = TRUE)))
expect_equal(exp, got)

# make sure that replacements work as expected
wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "A2 + B2", dims = "A12:E13", shared = TRUE)

exp <- c("A2 + B2", "A3 + B3", "B2 + C2", "B3 + C3", "C2 + D2", "C3 + D3", "D2 + E2", "D3 + E3")
got <- unname(unlist(wb_to_df(wb, dims = "A12:D13", col_names = FALSE, show_formula = TRUE)))
expect_equal(exp, got)

})

0 comments on commit d058b8a

Please sign in to comment.