Skip to content

Commit

Permalink
[writing] provide openxlsx2.export_with_pugi = FALSE option
Browse files Browse the repository at this point in the history
  • Loading branch information
JanMarvin committed Jul 20, 2024
1 parent 228c51f commit 15daf92
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 8 deletions.
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,10 @@ set_sst <- function(sharedStrings) {
.Call(`_openxlsx2_set_sst`, sharedStrings)
}

write_worksheet_slim <- function(sheet_data, prior, post, fl) {
invisible(.Call(`_openxlsx2_write_worksheet_slim`, sheet_data, prior, post, fl))
}

write_worksheet <- function(prior, post, sheet_data) {
.Call(`_openxlsx2_write_worksheet`, prior, post, sheet_data)
}
Expand Down
33 changes: 26 additions & 7 deletions R/class-workbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -9318,14 +9318,33 @@ wbWorkbook <- R6::R6Class(
ws$sheet_data$cc_out <- NULL
}

# create entire sheet prior to writing it
sheet_xml <- write_worksheet(
prior = prior,
post = post,
sheet_data = ws$sheet_data
)
ws_file <- file.path(xlworksheetsDir, sprintf("sheet%s.xml", i))
write_xmlPtr(doc = sheet_xml, fl = ws_file)

use_pugixml_export <- getOption("openxlsx2.export_with_pugi", default = TRUE)

if (use_pugixml_export) {

Check warning on line 9326 in R/class-workbook.R

View workflow job for this annotation

GitHub Actions / lint

file=R/class-workbook.R,line=9326,col=1,[trailing_whitespace_linter] Trailing whitespace is superfluous.
# create entire sheet prior to writing it
sheet_xml <- write_worksheet(
prior = prior,
post = post,
sheet_data = ws$sheet_data
)
write_xmlPtr(doc = sheet_xml, fl = ws_file)

Check warning on line 9334 in R/class-workbook.R

View workflow job for this annotation

GitHub Actions / lint

file=R/class-workbook.R,line=9334,col=1,[trailing_whitespace_linter] Trailing whitespace is superfluous.
} else {

if (grepl("</worksheet>", prior))
prior <- substr(prior, 1, nchar(prior) - 13) # remove " </worksheet>"

write_worksheet_slim(
sheet_data = ws$sheet_data,
prior = prior,
post = post,
fl = ws_file
)

}

## write worksheet rels
if (length(self$worksheets_rels[[i]]) || hasHL) {
Expand Down
14 changes: 14 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,19 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// write_worksheet_slim
void write_worksheet_slim(Rcpp::Environment sheet_data, std::string prior, std::string post, std::string fl);
RcppExport SEXP _openxlsx2_write_worksheet_slim(SEXP sheet_dataSEXP, SEXP priorSEXP, SEXP postSEXP, SEXP flSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::Environment >::type sheet_data(sheet_dataSEXP);
Rcpp::traits::input_parameter< std::string >::type prior(priorSEXP);
Rcpp::traits::input_parameter< std::string >::type post(postSEXP);
Rcpp::traits::input_parameter< std::string >::type fl(flSEXP);
write_worksheet_slim(sheet_data, prior, post, fl);
return R_NilValue;
END_RCPP
}
// write_worksheet
XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment sheet_data);
RcppExport SEXP _openxlsx2_write_worksheet(SEXP priorSEXP, SEXP postSEXP, SEXP sheet_dataSEXP) {
Expand Down Expand Up @@ -1068,6 +1081,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_openxlsx2_read_colors", (DL_FUNC) &_openxlsx2_read_colors, 1},
{"_openxlsx2_write_colors", (DL_FUNC) &_openxlsx2_write_colors, 1},
{"_openxlsx2_set_sst", (DL_FUNC) &_openxlsx2_set_sst, 1},
{"_openxlsx2_write_worksheet_slim", (DL_FUNC) &_openxlsx2_write_worksheet_slim, 4},
{"_openxlsx2_write_worksheet", (DL_FUNC) &_openxlsx2_write_worksheet, 3},
{"_openxlsx2_write_xmlPtr", (DL_FUNC) &_openxlsx2_write_xmlPtr, 2},
{"_openxlsx2_styles_bin", (DL_FUNC) &_openxlsx2_styles_bin, 3},
Expand Down
194 changes: 193 additions & 1 deletion src/write_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,199 @@ Rcpp::CharacterVector set_sst(Rcpp::CharacterVector sharedStrings) {
return sst;
}

// write xml by streaming to files. this takes whatever input we provide and
// dumps it into the file. no xml checking, no unicode checking
void xml_sheet_data_slim(
Rcpp::DataFrame row_attr,
Rcpp::DataFrame cc,
std::string prior,
std::string post,
std::string fl
) {

std::ofstream file(fl);

auto lastrow = 0; // integer value of the last row with column data
auto thisrow = 0; // integer value of the current row with column data
auto row_idx = 0; // the index of the row_attr file. this is != rowid
auto rowid = 0; // integer value of the r field in row_attr

std::string xml_preserver = " ";

file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
file << prior;

if (cc.nrow() && cc.ncol()) {
// we cannot access rows directly in the dataframe.
// Have to extract the columns and use these
Rcpp::CharacterVector cc_row_r = cc["row_r"]; // 1
Rcpp::CharacterVector cc_r = cc["r"]; // A1
Rcpp::CharacterVector cc_v = cc["v"];
Rcpp::CharacterVector cc_c_t = cc["c_t"];
Rcpp::CharacterVector cc_c_s = cc["c_s"];
Rcpp::CharacterVector cc_c_cm = cc["c_cm"];
Rcpp::CharacterVector cc_c_ph = cc["c_ph"];
Rcpp::CharacterVector cc_c_vm = cc["c_vm"];
Rcpp::CharacterVector cc_f = cc["f"];
Rcpp::CharacterVector cc_f_t = cc["f_t"];
Rcpp::CharacterVector cc_f_ref = cc["f_ref"];
Rcpp::CharacterVector cc_f_ca = cc["f_ca"];
Rcpp::CharacterVector cc_f_si = cc["f_si"];
Rcpp::CharacterVector cc_is = cc["is"];

Rcpp::CharacterVector row_r = row_attr["r"];


file << "<sheetData>";
for (auto i = 0; i < cc.nrow(); ++i) {

thisrow = std::stoi(Rcpp::as<std::string>(cc_row_r[i]));

if (lastrow < thisrow) {

// there might be entirely empty rows in between. this is the case for
// loadExample. We check the rowid and write the line and skip until we
// have every row and only then continue writing the column
while (rowid < thisrow) {

rowid = std::stoi(Rcpp::as<std::string>(
row_r[row_idx]
));

if (row_idx) file << "</row>";
file << "<row";
Rcpp::CharacterVector attrnams = row_attr.names();

for (auto j = 0; j < row_attr.ncol(); ++j) {

Rcpp::CharacterVector cv_s = "";
cv_s = Rcpp::as<Rcpp::CharacterVector>(row_attr[j])[row_idx];

if (cv_s[0] != "") {
const std::string val_strl = Rcpp::as<std::string>(cv_s);
file << " " << attrnams[j] << "=\"" << val_strl.c_str() << "\"";
}
}
file << ">"; // end <r ...>

// read the next row_idx when visiting again
++row_idx;
}
}

// create node <c>
file << "<c";

// Every cell consists of a typ and a val list. Certain functions have an
// additional attr list.

// append attributes <c r="A1" ...>
file << " r" << "=\"" << to_string(cc_r[i]).c_str() << "\"";

if (!to_string(cc_c_s[i]).empty())
file << " s" << "=\"" << to_string(cc_c_s[i]).c_str() << "\"";

// assign type if not <v> aka numeric
if (!to_string(cc_c_t[i]).empty())
file << " t" << "=\"" << to_string(cc_c_t[i]).c_str() << "\"";

// CellMetaIndex: suppress curly brackets in spreadsheet software
if (!to_string(cc_c_cm[i]).empty())
file << " cm" << "=\"" << to_string(cc_c_cm[i]).c_str() << "\"";

// phonetics spelling
if (!to_string(cc_c_ph[i]).empty())
file << " ph" << "=\"" << to_string(cc_c_ph[i]).c_str() << "\"";

// suppress curly brackets in spreadsheet software
if (!to_string(cc_c_vm[i]).empty())
file << " vm" << "=\"" << to_string(cc_c_vm[i]).c_str() << "\"";

file << ">"; // end <c ...>

bool f_si = false;

// <f> ... </f>
// f node: formula to be evaluated
if (!to_string(cc_f[i]).empty() || !to_string(cc_f_t[i]).empty() || !to_string(cc_f_si[i]).empty()) {
file << "<f";
if (!to_string(cc_f_t[i]).empty()) {
file << " t" << "=\"" << to_string(cc_f_t[i]).c_str() << "\"";
}
if (!to_string(cc_f_ref[i]).empty()) {
file << " ref" << "=\"" << to_string(cc_f_ref[i]).c_str() << "\"";
}
if (!to_string(cc_f_ca[i]).empty()) {
file << " ca" << "=\"" << to_string(cc_f_ca[i]).c_str() << "\"";
}
if (!to_string(cc_f_si[i]).empty()) {
file << " si" << "=\"" << to_string(cc_f_si[i]).c_str() << "\"";
f_si = true;
}
file << ">";

file << to_string(cc_f[i]).c_str();

file << "</f>";
}

// v node: value stored from evaluated formula
if (!to_string(cc_v[i]).empty()) {
if (!f_si & (to_string(cc_v[i]).compare(xml_preserver.c_str()) == 0)) {
// this looks strange
file << "<v xml:space=\"preserve\">";
file << " ";
file << "</v>";
} else {
file << "<v>" << to_string(cc_v[i]).c_str() << "</v>";
}
}

// <is><t> ... </t></is>
if (to_string(cc_c_t[i]).compare("inlineStr") == 0) {
if (!to_string(cc_is[i]).empty()) {
file << to_string(cc_is[i]).c_str();
}
}

file << "</c>";

// update lastrow
lastrow = thisrow;
}

file << "</row>";
file << "</sheetData>";
} else {
file << "<sheetData/>";
}


file << post;
file << "</worksheet>";

file.close();

}

// export worksheet without pugixml
// this should be way quicker, uses far less memory, but also skips all of the checks pugi does
//
// [[Rcpp::export]]
void write_worksheet_slim(
Rcpp::Environment sheet_data,
std::string prior,
std::string post,
std::string fl
){
// sheet_data will be in order, just need to check for row_heights
// CharacterVector cell_col = int_to_col(sheet_data.field("cols"));
Rcpp::DataFrame row_attr = Rcpp::as<Rcpp::DataFrame>(sheet_data["row_attr"]);
Rcpp::DataFrame cc = Rcpp::as<Rcpp::DataFrame>(sheet_data["cc_out"]);

xml_sheet_data_slim(row_attr, cc, prior, post, fl);
}


// creates an xml row
// data in xml is ordered row wise. therefore we need the row attributes and
Expand Down Expand Up @@ -178,7 +371,6 @@ pugi::xml_document xml_sheet_data(Rcpp::DataFrame row_attr, Rcpp::DataFrame cc)
return doc;
}


// TODO: convert to pugi
// function that creates the xml worksheet
// uses preparated data and writes it. It passes data to set_row() which will
Expand Down
23 changes: 23 additions & 0 deletions tests/testthat/test-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -1239,3 +1239,26 @@ test_that("sheet is a valid argument in write_xlsx", {
wb2 <- write_xlsx(x = mtcars, sheet = "data")
expect_equal(wb1$get_sheet_names(), wb2$get_sheet_names())
})

test_that("writing without pugixml works", {

temp <- temp_xlsx()
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

temp <- temp_xlsx()
options("openxlsx2.export_with_pugi" = FALSE)
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

temp <- temp_xlsx()
options("openxlsx2.export_with_pugi" = TRUE)
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

temp <- temp_xlsx()
options("openxlsx2.export_with_pugi" = NULL)
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

})

0 comments on commit 15daf92

Please sign in to comment.