Skip to content

Commit

Permalink
fixes for wb_load() from #688 (#732)
Browse files Browse the repository at this point in the history
  • Loading branch information
JanMarvin authored Aug 12, 2023
1 parent 77f0e94 commit b2b2aeb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 41 deletions.
10 changes: 6 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
## Fixes

* corrections in vignettes
* fixes for loading workbooks with threaded comments
* fixes for loading workbooks with embeddings other than docx


***************************************************************************
Expand All @@ -24,19 +26,19 @@

* Cleanups
* remove deprecated functions

* remove deprecated arguments
* `xy` argument
* arguments `col`, `row`, `cols`, `rows`. `start_col`, `start_row` and `gridExpand` were deprecated in favor of `dims`. Row and column vectors can be converted to `dims` using `wb_dims()`.
* `xlsx_file` in favor of `file` in `wb_to_df()`

* deprecating function
* `convertToExcelDate()` for `convert_to_excel_date()`
* `convertToExcelDate()` for `convert_to_excel_date()`
* `wb_grid_lines()` for `wb_set_grid_lines()`

* make `get_cell_refs()`, `get_date_origin()`, `guess_col_type()`, and `write_file()`, `dataframe_to_dims()`, `dims_to_dataframe()`, `wb_get_sheet_name()` internal functions
* make classes `styles_mgr()`, `wbSheetData`, `wbWorksheet`, `wbChartSheet`, `wbComment`, `wbHyperlink` internal

## New features

* `wb_dims()` was added as a more convenient replacement for `rowcol_to_dims()`.([691](https://github.com/JanMarvin/openxlsx2/pull/691) and [702](https://github.com/JanMarvin/openxlsx2/pull/702), @olivroy) The new function can take either numeric (for rows or columns) or character (column) vectors, in addition it is able to create dimensions for R objects that are coercible to data frame. This allows the following variants:
Expand Down
2 changes: 1 addition & 1 deletion R/class-workbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -1630,7 +1630,7 @@ wbWorkbook <- R6::R6Class(
if (nThreadComments > 0) {
xlThreadComments <- dir_create(tmpDir, "xl", "threadedComments")

for (i in seq_len(nSheets)) {
for (i in seq_along(self$threadComments)) {
if (length(self$threadComments[[i]])) {
write_file(
head = "<ThreadedComments xmlns=\"http://schemas.microsoft.com/office/spreadsheetml/2018/threadedcomments\" xmlns:x=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">",
Expand Down
56 changes: 20 additions & 36 deletions R/wb_load.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,24 +216,17 @@ wb_load <- function(
}

##
chartSheetRIds <- NULL
if (!data_only && length(chartSheetsXML)) {
workbookRelsXML <- grep("chartsheets/sheet", workbookRelsXML, fixed = TRUE, value = TRUE)

chartSheetRIds <- unlist(getId(workbookRelsXML))
chartsheet_rId_mapping <- unlist(regmatches(workbookRelsXML, gregexpr("sheet[0-9]+\\.xml", workbookRelsXML, perl = TRUE, ignore.case = TRUE)))

sheetNo <- as.integer(regmatches(chartSheetsXML, regexpr("(?<=sheet)[0-9]+(?=\\.xml)", chartSheetsXML, perl = TRUE)))
chartSheetsXML <- chartSheetsXML[order(sheetNo)]

chartSheetsRelsXML <- grep_xml("xl/chartsheets/_rels")
sheetNo2 <- as.integer(regmatches(chartSheetsRelsXML, regexpr("(?<=sheet)[0-9]+(?=\\.xml\\.rels)", chartSheetsRelsXML, perl = TRUE)))
chartSheetsRelsXML <- chartSheetsRelsXML[order(sheetNo2)]

chartSheetsRelsDir <- dirname(chartSheetsRelsXML[1])
}


## xl\
## xl\workbook
if (length(workbookXML)) {
Expand Down Expand Up @@ -308,7 +301,7 @@ wb_load <- function(
} else if (sheets$typ[i] == "worksheet") {
content_type <- read_xml(ContentTypesXML)
override <- xml_attr(content_type, "Types", "Override")
overrideAttr <- as.data.frame(do.call("rbind", override))
overrideAttr <- as.data.frame(do.call("rbind", override), stringsAsFactors = FALSE)
xmls <- basename(unlist(overrideAttr$PartName))
drawings <- grep("drawing", xmls, value = TRUE)
wb$add_worksheet(sheets$name[i], visible = is_visible[i], has_drawing = !is.na(drawings[i]))
Expand Down Expand Up @@ -810,10 +803,8 @@ wb_load <- function(

for (i in seq_len(nSheets)) {
if (sheets$typ[i] == "chartsheet") {
ind <- which(chartSheetRIds == sheets$`r:id`[i])
rels_file <- file.path(chartSheetsRelsDir, paste0(chartsheet_rId_mapping[ind], ".rels"))
rels_file <- file.path(xmlDir, "xl", "chartsheets", "_rels", paste0(file_names[i], ".rels"))
} else {
ind <- sheets$`r:id`[i]
rels_file <- file.path(xmlDir, "xl", "worksheets", "_rels", paste0(file_names[i], ".rels"))
}
if (file.exists(rels_file)) {
Expand Down Expand Up @@ -852,15 +843,6 @@ wb_load <- function(

wb$worksheets_rels <- xml

xml <- lapply(seq_along(allRels), function(i) {
if (haveRels[i]) {
xml <- xml_node(allRels[[i]], "Relationships", "Relationship")
} else {
xml <- character()
}
return(xml)
})

for (ws in seq_along(wb$worksheets)) {

# This relships tracks the file numbering. drawing1.xml or comments2.xml
Expand Down Expand Up @@ -955,13 +937,6 @@ wb_load <- function(
tables_xml <- vapply(tablesXML, FUN = read_xml, pointer = FALSE, FUN.VALUE = NA_character_)
tabs <- rbindlist(xml_attr(tables_xml, "table"))

wb$append("Content_Types", sprintf('<Override PartName="/xl/tables/table%s.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml"/>', nrow(wb$tables)))

# # TODO When does this happen?
# if (length(tabs["displayName"]) != length(tablesXML)) {
# tabs[["displayName"]] <- paste0("Table", seq_along(tablesXML))
# }

wb$tables <- data.frame(
tab_name = tabs[["displayName"]],
tab_sheet = tableSheets,
Expand All @@ -971,10 +946,7 @@ wb_load <- function(
stringsAsFactors = FALSE
)

# ## relabel ids
# for (i in seq_len(nrow(wb$tables))) {
# wb$tables$tab_xml[i] <- xml_attr_mod(wb$tables$tab_xml[i], xml_attributes = c(id = as.character(i + 2)))
# }
wb$append("Content_Types", sprintf('<Override PartName="/xl/tables/table%s.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml"/>', nrow(wb$tables)))

## every worksheet containing a table, has a table part. this references
## the display name, so that we know which tables are still around.
Expand Down Expand Up @@ -1124,10 +1096,10 @@ wb_load <- function(
}

## Threaded comments
if (length(threadCommentsXML) > 0) {
if (any(length(threadCommentsXML) > 0)) {

if (lengths(threadCommentsXML)) {
wb$threadComments <- lapply(threadCommentsXML, read_xml, pointer = FALSE)
if (any(lengths(threadCommentsXML))) {
wb$threadComments <- lapply(threadCommentsXML, function(x) xml_node(x, "ThreadedComments", "threadedComment"))
}

wb$append(
Expand All @@ -1152,8 +1124,20 @@ wb_load <- function(

## Embedded docx
if (length(embeddings) > 0) {
# TODO only valid for docx. need to check xls and doc?
wb$append("Content_Types", '<Default Extension="docx" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>')

# get the embedded files extensions
files <- unique(gsub(".+\\.(\\w+)$", "\\1", embeddings))

# get the required ContentTypes
content_type <- read_xml(ContentTypesXML)
extensions <- rbindlist(xml_attr(content_type, "Types", "Default"))
extensions <- extensions[extensions$Extension %in% files, ]

# append the content types
default <- sprintf('<Default Extension="%s" ContentType="%s"/>',
extensions$Extension, extensions$ContentType)
wb$append("Content_Types", default)

wb$embeddings <- embeddings
}

Expand Down

0 comments on commit b2b2aeb

Please sign in to comment.