Skip to content

Commit

Permalink
[read] handle all kinds of shared formulas
Browse files Browse the repository at this point in the history
  • Loading branch information
JanMarvin committed Jul 24, 2024
1 parent e167661 commit 24c2541
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 66 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -182,16 +182,19 @@ importFrom(grDevices,tiff)
importFrom(magrittr,"%>%")
importFrom(stringi,stri_c)
importFrom(stringi,stri_encode)
importFrom(stringi,stri_extract_all_regex)
importFrom(stringi,stri_isempty)
importFrom(stringi,stri_join)
importFrom(stringi,stri_match)
importFrom(stringi,stri_match_all_regex)
importFrom(stringi,stri_match_first_regex)
importFrom(stringi,stri_opts_collator)
importFrom(stringi,stri_order)
importFrom(stringi,stri_pad_left)
importFrom(stringi,stri_rand_strings)
importFrom(stringi,stri_read_lines)
importFrom(stringi,stri_replace_all_fixed)
importFrom(stringi,stri_replace_all_regex)
importFrom(stringi,stri_split_fixed)
importFrom(stringi,stri_split_regex)
importFrom(stringi,stri_sub)
Expand Down
10 changes: 5 additions & 5 deletions R/openxlsx2-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
#' @import R6
#' @importFrom grDevices bmp col2rgb colors dev.copy dev.list dev.off jpeg palette png rgb tiff
#' @importFrom magrittr %>%
#' @importFrom stringi stri_c stri_encode stri_isempty stri_join stri_match
#' stri_match_all_regex stri_order stri_opts_collator stri_pad_left
#' stri_rand_strings stri_read_lines stri_replace_all_fixed
#' stri_split_fixed stri_split_regex stri_sub stri_unescape_unicode
#' stri_unique
#' @importFrom stringi stri_c stri_encode stri_extract_all_regex
#' stri_isempty stri_join stri_match stri_match_all_regex stri_match_first_regex
#' stri_order stri_opts_collator stri_pad_left stri_rand_strings stri_read_lines
#' stri_replace_all_fixed stri_replace_all_regex stri_split_fixed
#' stri_split_regex stri_sub stri_unescape_unicode stri_unique
#' @importFrom utils download.file head menu read.csv unzip
#' @importFrom zip zip
#'
Expand Down
51 changes: 13 additions & 38 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -427,54 +427,29 @@ wb_to_df <- function(

if (any(cc$f_t == "shared")) {

# depending on the sheet, this might require updates to many cells
# TODO reduce this to cells, that are part of `cc`. Currently we
# might waste time, updating cells that are not visible to the user
cc_shared <- wb$worksheets[[sheet]]$sheet_data$cc
cc_shared <- cc_shared[cc_shared$f_t == "shared", ]
cc_shared <- cc_shared[order(as.integer(cc_shared$f_si)), ]

# extend shared formula into all formula cells
carry_forward <- function(x) {
last_val <- NA
for (i in seq_along(x)) {
if (x[i] != "") {
last_val <- x[i]
} else {
x[i] <- last_val
}
}
return(x)
}
# carry forward the shared formula
cc_shared$f <- ave2(cc_shared$f, cc_shared$f_si, carry_forward)

cc_shared$f <- ave(
cc_shared$f,
as.integer(cc_shared$f_si),
FUN = carry_forward
)

# calculate difference for each shared formula to the origin
calc_distance <- function(x) {
x - x[1]
}
# calculate differences from the formula cell, to the shared cells
cc_shared$cols <- ave2(col2int(cc_shared$c_r), cc_shared$f_si, calc_distance)
cc_shared$rows <- ave2(as.integer(cc_shared$row_r), cc_shared$f_si, calc_distance)

cc_shared$cols <- ave(
col2int(cc_shared$c_r),
as.integer(cc_shared$f_si),
FUN = calc_distance
)
cc_shared$rows <- ave(
as.integer(cc_shared$row_r),
as.integer(cc_shared$f_si),
FUN = calc_distance
)

# TODO skip non A1 shared cells
# begin updating the formulas. find a1 notion, get the next cell, update formula
cells <- find_a1_notation(cc_shared$f)
repls <- cells
repls <- vector("list", length = length(cells))

for (i in seq_len(nrow(cells))) {
repls[i, ] <- next_cell(cells[i, ], cc_shared$cols[i], cc_shared$rows[i])
for (i in seq_along(cells)) {
repls[[i]] <- next_cell(cells[[i]], cc_shared$cols[i], cc_shared$rows[i])
}

cc_shared$f <- replace_a1_notation(cc_shared["f"], cells, repls)
cc_shared$f <- replace_a1_notation(cc_shared$f, repls)
cc_shared$cols <- NULL
cc_shared$rows <- NULL

Expand Down
67 changes: 45 additions & 22 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1276,26 +1276,30 @@ get_dims <- function(dims, check = FALSE, cols = FALSE, rows = FALSE) {
}

#' the function to find the cell
#' this function is used with `show_formula` in [wb_to_df()]
#' @param string the formula
#' @family internal
#' @noRd
find_a1_notation <- function(string) {
pattern <- "\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?"
as.data.frame(do.call("rbind", stringi::stri_extract_all_regex(string, pattern)))
stringi::stri_extract_all_regex(string, pattern)
}

#' the function to replace the next cell
#' this function is used with `show_formula` in [wb_to_df()]
#' @param cell the cell from a shared formula [find_a1_notation()]
#' @param cols,rows an integer where to move the cell
#' @family internal
#' @noRd
next_cell <- function(cell, cols = 0L, rows = 0L) {
z <- NULL
for (i in cell) {

z <- vector("character", length(cell))
for (i in seq_along(cell)) {
# Match individual cells and ranges
match <- stringi::stri_match_first_regex(i, "^(\\$?)([A-Z]+)(\\$?)(\\d+)(:(\\$?)([A-Z]+)(\\$?)(\\d+))?$")
match <- stringi::stri_match_first_regex(cell[[i]], "^(\\$?)([A-Z]+)(\\$?)(\\d+)(:(\\$?)([A-Z]+)(\\$?)(\\d+))?$")

if (is.na(match[1, 1])) stop("Invalid A1 notation")
# if shared formula contains no A1 reference
if (is.na(match[1, 1])) return(NA_character_)

# Extract parts of the cell
fixed_col1 <- match[2]
Expand All @@ -1313,7 +1317,7 @@ next_cell <- function(cell, cols = 0L, rows = 0L) {
# Handle individual cell
next_col <- if (fixed_col1 == "") int2col(col2int(col1) + cols) else col1
next_row <- if (fixed_row1 == "") row1 + rows else row1
z <- c(z, paste0(fixed_col1, next_col, fixed_row1, next_row))
z[i] <- paste0(fixed_col1, next_col, fixed_row1, next_row)

} else {

Expand All @@ -1322,39 +1326,58 @@ next_cell <- function(cell, cols = 0L, rows = 0L) {
next_row1 <- if (fixed_row1 == "") row1 + rows else row1
next_col2 <- if (fixed_col2 == "") int2col(col2int(col2) + cols) else col2
next_row2 <- if (fixed_col2 == "") row2 + rows else row2
z <- c(z, paste0(fixed_col1, next_col1, fixed_row1, next_row1, ":", fixed_col2, next_col2, fixed_row2, next_row2))
z[i] <- paste0(fixed_col1, next_col1, fixed_row1, next_row1, ":", fixed_col2, next_col2, fixed_row2, next_row2)

}
}

as.data.frame(matrix(z, ncol = 2))
z
}

#' the replacement function for shared formulas
#' this function is used with `show_formula` in [wb_to_df()]
#' @param string the formula
#' @param matches the matches, obtained via [find_a1_notation()]
#' @param replacements the replacements, from [next_cell()]
#' @family internal
#' @noRd
replace_a1_notation <- function(strings, matches, replacements) {

# strings <- data.frame(rep(string, NROW(replacements)))
replace_a1_notation <- function(strings, replacements) {

# matches <- as.data.frame(
# matrix(matches, nrow = NROW(replacements), ncol = length(matches), byrow = TRUE)
# )
# create sprintf-able strings
strings <- stringi::stri_replace_all_regex(
strings,
"\\$?[A-Z]\\$?[0-9]+(:\\$?[A-Z]\\$?[0-9]+)?",
"%s"
)

repl_fun <- function(str, x, y) {
for (i in seq_along(x)) {
str <- stringi::stri_replace_first_fixed(str, x[i], y[i])
}
# insert replacements into string
repl_fun <- function(str, y) {
if (!anyNA(y)) # else keep str as is
str <- do.call(sprintf, c(str, as.list(y)))
str
}

z <- NULL
for (i in seq_len(NROW(strings))) {
z <- c(z, repl_fun(strings[i, ], matches[i, ], replacements[i, ]))
z <- vector("character", length(strings))
for (i in seq_along(strings)) {
z[i] <- repl_fun(strings[[i]], replacements[[i]])
}

z
}

# extend shared formula into all formula cells
carry_forward <- function(x) {
rep(x[1], length(x))
}

# calculate difference for each shared formula to the origin
calc_distance <- function(x) {
x - x[1]
}

# ave function to avoid a dependency on stats. if we ever rely on stats,
# this can be replaced by stats::ave
ave2 <- function (x, y, FUN) {

Check warning on line 1379 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=1379,col=17,[function_left_parentheses_linter] Remove spaces before the left parenthesis in a function definition.
g <- as.factor(y)
split(x, g) <- lapply(split(x, g), FUN)
x
}
39 changes: 38 additions & 1 deletion tests/testthat/test-read_from_created_wb.R
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,10 @@ test_that("check_names works", {
})

test_that("shared formulas are handled", {

wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5*5), ncol = 5, nrow = 5))$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E12", shared = TRUE)

exp <- structure(
Expand All @@ -237,4 +238,40 @@ test_that("shared formulas are handled", {
)
got <- as.matrix(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE))
expect_equal(exp, got)

# shared formula w/o A1 notation
wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "TODAY()", dims = "A8:E12", shared = TRUE)

exp <- rep("TODAY()", 9)
got <- unname(unlist(wb_to_df(wb, dims = "A10:C12", col_names = FALSE, show_formula = TRUE)))
expect_equal(exp, got)

# a bunch of mixed shared formulas
wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "SUM($A2:A2) + B$1", dims = "A8:E9", shared = TRUE)$
add_formula(x = "SUM($A2:A2)", dims = "A10:E11", shared = TRUE)$
add_formula(x = "A2", dims = "A12:E13", shared = TRUE)

exp <- c("SUM($A2:B2) + C$1", "SUM($A3:B3) + C$1", "SUM($A2:B2)", "SUM($A3:B3)",
"B2", "B3", "SUM($A2:C2) + D$1", "SUM($A3:C3) + D$1", "SUM($A2:C2)",
"SUM($A3:C3)", "C2", "C3", "SUM($A2:D2) + E$1", "SUM($A3:D3) + E$1",
"SUM($A2:D2)", "SUM($A3:D3)", "D2", "D3")
got <- unname(unlist(wb_to_df(wb, dims = "B8:D13", col_names = FALSE, show_formula = TRUE)))
expect_equal(exp, got)

# make sure that replacements work as expected
wb <- wb_workbook()$
add_worksheet()$
add_data(x = matrix(rnorm(5 * 5), ncol = 5, nrow = 5))$
add_formula(x = "A2 + B2", dims = "A12:E13", shared = TRUE)

exp <- c("A2 + B2", "A3 + B3", "B2 + C2", "B3 + C3", "C2 + D2", "C3 + D3", "D2 + E2", "D3 + E3")
got <- unname(unlist(wb_to_df(wb, dims = "A12:D13", col_names = FALSE, show_formula = TRUE)))
expect_equal(exp, got)

})

0 comments on commit 24c2541

Please sign in to comment.