Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[writing] provide openxlsx2.export_with_pugi = FALSE option #1090

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,10 @@ set_sst <- function(sharedStrings) {
.Call(`_openxlsx2_set_sst`, sharedStrings)
}

write_worksheet_slim <- function(sheet_data, prior, post, fl) {
invisible(.Call(`_openxlsx2_write_worksheet_slim`, sheet_data, prior, post, fl))
}

write_worksheet <- function(prior, post, sheet_data) {
.Call(`_openxlsx2_write_worksheet`, prior, post, sheet_data)
}
Expand Down
33 changes: 26 additions & 7 deletions R/class-workbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -9318,14 +9318,33 @@
ws$sheet_data$cc_out <- NULL
}

# create entire sheet prior to writing it
sheet_xml <- write_worksheet(
prior = prior,
post = post,
sheet_data = ws$sheet_data
)
ws_file <- file.path(xlworksheetsDir, sprintf("sheet%s.xml", i))
write_xmlPtr(doc = sheet_xml, fl = ws_file)

use_pugixml_export <- getOption("openxlsx2.export_with_pugi", default = TRUE)

if (use_pugixml_export) {

Check warning on line 9326 in R/class-workbook.R

View workflow job for this annotation

GitHub Actions / lint

file=R/class-workbook.R,line=9326,col=1,[trailing_whitespace_linter] Trailing whitespace is superfluous.
# create entire sheet prior to writing it
sheet_xml <- write_worksheet(
prior = prior,
post = post,
sheet_data = ws$sheet_data
)
write_xmlPtr(doc = sheet_xml, fl = ws_file)

Check warning on line 9334 in R/class-workbook.R

View workflow job for this annotation

GitHub Actions / lint

file=R/class-workbook.R,line=9334,col=1,[trailing_whitespace_linter] Trailing whitespace is superfluous.
} else {

if (grepl("</worksheet>", prior))
prior <- substr(prior, 1, nchar(prior) - 13) # remove " </worksheet>"

write_worksheet_slim(
sheet_data = ws$sheet_data,
prior = prior,
post = post,
fl = ws_file
)

}

## write worksheet rels
if (length(self$worksheets_rels[[i]]) || hasHL) {
Expand Down
14 changes: 14 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,19 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// write_worksheet_slim
void write_worksheet_slim(Rcpp::Environment sheet_data, std::string prior, std::string post, std::string fl);
RcppExport SEXP _openxlsx2_write_worksheet_slim(SEXP sheet_dataSEXP, SEXP priorSEXP, SEXP postSEXP, SEXP flSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::Environment >::type sheet_data(sheet_dataSEXP);
Rcpp::traits::input_parameter< std::string >::type prior(priorSEXP);
Rcpp::traits::input_parameter< std::string >::type post(postSEXP);
Rcpp::traits::input_parameter< std::string >::type fl(flSEXP);
write_worksheet_slim(sheet_data, prior, post, fl);
return R_NilValue;
END_RCPP
}
// write_worksheet
XPtrXML write_worksheet(std::string prior, std::string post, Rcpp::Environment sheet_data);
RcppExport SEXP _openxlsx2_write_worksheet(SEXP priorSEXP, SEXP postSEXP, SEXP sheet_dataSEXP) {
Expand Down Expand Up @@ -1068,6 +1081,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_openxlsx2_read_colors", (DL_FUNC) &_openxlsx2_read_colors, 1},
{"_openxlsx2_write_colors", (DL_FUNC) &_openxlsx2_write_colors, 1},
{"_openxlsx2_set_sst", (DL_FUNC) &_openxlsx2_set_sst, 1},
{"_openxlsx2_write_worksheet_slim", (DL_FUNC) &_openxlsx2_write_worksheet_slim, 4},
{"_openxlsx2_write_worksheet", (DL_FUNC) &_openxlsx2_write_worksheet, 3},
{"_openxlsx2_write_xmlPtr", (DL_FUNC) &_openxlsx2_write_xmlPtr, 2},
{"_openxlsx2_styles_bin", (DL_FUNC) &_openxlsx2_styles_bin, 3},
Expand Down
194 changes: 193 additions & 1 deletion src/write_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,199 @@ Rcpp::CharacterVector set_sst(Rcpp::CharacterVector sharedStrings) {
return sst;
}

// write xml by streaming to files. this takes whatever input we provide and
// dumps it into the file. no xml checking, no unicode checking
void xml_sheet_data_slim(
Rcpp::DataFrame row_attr,
Rcpp::DataFrame cc,
std::string prior,
std::string post,
std::string fl
) {

std::ofstream file(fl);

auto lastrow = 0; // integer value of the last row with column data
auto thisrow = 0; // integer value of the current row with column data
auto row_idx = 0; // the index of the row_attr file. this is != rowid
auto rowid = 0; // integer value of the r field in row_attr

std::string xml_preserver = " ";

file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
file << prior;

if (cc.nrow() && cc.ncol()) {
// we cannot access rows directly in the dataframe.
// Have to extract the columns and use these
Rcpp::CharacterVector cc_row_r = cc["row_r"]; // 1
Rcpp::CharacterVector cc_r = cc["r"]; // A1
Rcpp::CharacterVector cc_v = cc["v"];
Rcpp::CharacterVector cc_c_t = cc["c_t"];
Rcpp::CharacterVector cc_c_s = cc["c_s"];
Rcpp::CharacterVector cc_c_cm = cc["c_cm"];
Rcpp::CharacterVector cc_c_ph = cc["c_ph"];
Rcpp::CharacterVector cc_c_vm = cc["c_vm"];
Rcpp::CharacterVector cc_f = cc["f"];
Rcpp::CharacterVector cc_f_t = cc["f_t"];
Rcpp::CharacterVector cc_f_ref = cc["f_ref"];
Rcpp::CharacterVector cc_f_ca = cc["f_ca"];
Rcpp::CharacterVector cc_f_si = cc["f_si"];
Rcpp::CharacterVector cc_is = cc["is"];

Rcpp::CharacterVector row_r = row_attr["r"];


file << "<sheetData>";
for (auto i = 0; i < cc.nrow(); ++i) {

thisrow = std::stoi(Rcpp::as<std::string>(cc_row_r[i]));

if (lastrow < thisrow) {

// there might be entirely empty rows in between. this is the case for
// loadExample. We check the rowid and write the line and skip until we
// have every row and only then continue writing the column
while (rowid < thisrow) {

rowid = std::stoi(Rcpp::as<std::string>(
row_r[row_idx]
));

if (row_idx) file << "</row>";
file << "<row";
Rcpp::CharacterVector attrnams = row_attr.names();

for (auto j = 0; j < row_attr.ncol(); ++j) {

Rcpp::CharacterVector cv_s = "";
cv_s = Rcpp::as<Rcpp::CharacterVector>(row_attr[j])[row_idx];

if (cv_s[0] != "") {
const std::string val_strl = Rcpp::as<std::string>(cv_s);
file << " " << attrnams[j] << "=\"" << val_strl.c_str() << "\"";
}
}
file << ">"; // end <r ...>

// read the next row_idx when visiting again
++row_idx;
}
}

// create node <c>
file << "<c";

// Every cell consists of a typ and a val list. Certain functions have an
// additional attr list.

// append attributes <c r="A1" ...>
file << " r" << "=\"" << to_string(cc_r[i]).c_str() << "\"";

if (!to_string(cc_c_s[i]).empty())
file << " s" << "=\"" << to_string(cc_c_s[i]).c_str() << "\"";

// assign type if not <v> aka numeric
if (!to_string(cc_c_t[i]).empty())
file << " t" << "=\"" << to_string(cc_c_t[i]).c_str() << "\"";

// CellMetaIndex: suppress curly brackets in spreadsheet software
if (!to_string(cc_c_cm[i]).empty())
file << " cm" << "=\"" << to_string(cc_c_cm[i]).c_str() << "\"";

// phonetics spelling
if (!to_string(cc_c_ph[i]).empty())
file << " ph" << "=\"" << to_string(cc_c_ph[i]).c_str() << "\"";

// suppress curly brackets in spreadsheet software
if (!to_string(cc_c_vm[i]).empty())
file << " vm" << "=\"" << to_string(cc_c_vm[i]).c_str() << "\"";

file << ">"; // end <c ...>

bool f_si = false;

// <f> ... </f>
// f node: formula to be evaluated
if (!to_string(cc_f[i]).empty() || !to_string(cc_f_t[i]).empty() || !to_string(cc_f_si[i]).empty()) {
file << "<f";
if (!to_string(cc_f_t[i]).empty()) {
file << " t" << "=\"" << to_string(cc_f_t[i]).c_str() << "\"";
}
if (!to_string(cc_f_ref[i]).empty()) {
file << " ref" << "=\"" << to_string(cc_f_ref[i]).c_str() << "\"";
}
if (!to_string(cc_f_ca[i]).empty()) {
file << " ca" << "=\"" << to_string(cc_f_ca[i]).c_str() << "\"";
}
if (!to_string(cc_f_si[i]).empty()) {
file << " si" << "=\"" << to_string(cc_f_si[i]).c_str() << "\"";
f_si = true;
}
file << ">";

file << to_string(cc_f[i]).c_str();

file << "</f>";
}

// v node: value stored from evaluated formula
if (!to_string(cc_v[i]).empty()) {
if (!f_si & (to_string(cc_v[i]).compare(xml_preserver.c_str()) == 0)) {
// this looks strange
file << "<v xml:space=\"preserve\">";
file << " ";
file << "</v>";
} else {
file << "<v>" << to_string(cc_v[i]).c_str() << "</v>";
}
}

// <is><t> ... </t></is>
if (to_string(cc_c_t[i]).compare("inlineStr") == 0) {
if (!to_string(cc_is[i]).empty()) {
file << to_string(cc_is[i]).c_str();
}
}

file << "</c>";

// update lastrow
lastrow = thisrow;
}

file << "</row>";
file << "</sheetData>";
} else {
file << "<sheetData/>";
}


file << post;
file << "</worksheet>";

file.close();

}

// export worksheet without pugixml
// this should be way quicker, uses far less memory, but also skips all of the checks pugi does
//
// [[Rcpp::export]]
void write_worksheet_slim(
Rcpp::Environment sheet_data,
std::string prior,
std::string post,
std::string fl
){
// sheet_data will be in order, just need to check for row_heights
// CharacterVector cell_col = int_to_col(sheet_data.field("cols"));
Rcpp::DataFrame row_attr = Rcpp::as<Rcpp::DataFrame>(sheet_data["row_attr"]);
Rcpp::DataFrame cc = Rcpp::as<Rcpp::DataFrame>(sheet_data["cc_out"]);

xml_sheet_data_slim(row_attr, cc, prior, post, fl);
}


// creates an xml row
// data in xml is ordered row wise. therefore we need the row attributes and
Expand Down Expand Up @@ -178,7 +371,6 @@ pugi::xml_document xml_sheet_data(Rcpp::DataFrame row_attr, Rcpp::DataFrame cc)
return doc;
}


// TODO: convert to pugi
// function that creates the xml worksheet
// uses preparated data and writes it. It passes data to set_row() which will
Expand Down
23 changes: 23 additions & 0 deletions tests/testthat/test-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -1239,3 +1239,26 @@ test_that("sheet is a valid argument in write_xlsx", {
wb2 <- write_xlsx(x = mtcars, sheet = "data")
expect_equal(wb1$get_sheet_names(), wb2$get_sheet_names())
})

test_that("writing without pugixml works", {

temp <- temp_xlsx()
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

temp <- temp_xlsx()
options("openxlsx2.export_with_pugi" = FALSE)
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

temp <- temp_xlsx()
options("openxlsx2.export_with_pugi" = TRUE)
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

temp <- temp_xlsx()
options("openxlsx2.export_with_pugi" = NULL)
expect_silent(write_xlsx(x = mtcars, file = temp))
expect_silent(wb <- wb_load(temp))

})
Loading