Skip to content

Commit

Permalink
[dims] code cleanup (#1068)
Browse files Browse the repository at this point in the history
* [dims] code cleanup

* [dims] speed up non consecutive column selection
  • Loading branch information
JanMarvin authored Jun 29, 2024
1 parent bad89ff commit 7c94ede
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 51 deletions.
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ needed_cells <- function(range) {
.Call(`_openxlsx2_needed_cells`, range)
}

dims_to_df <- function(rows, cols, filled, fill) {
.Call(`_openxlsx2_dims_to_df`, rows, cols, filled, fill)
dims_to_df <- function(rows, cols, filled, fill, fcols) {
.Call(`_openxlsx2_dims_to_df`, rows, cols, filled, fill, fcols)
}

long_to_wide <- function(z, tt, zz) {
Expand Down
30 changes: 30 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1236,3 +1236,33 @@ print.fmt_txt <- function(x, ...) {
is_dims <- function(x) {
grepl("^[A-Z]+[0-9]+(:[A-Z]+[0-9]+)?$", x)
}

#' check if non consecutive dims is equal sized: "A1:A4,B1:B4"
get_dims <- function(dims, check = TRUE, cols = TRUE) {

rows <- unique(
lapply(dims, FUN = function(dim) {
dimensions <- strsplit(dim, ":")[[1]]
as.integer(gsub("[[:upper:]]", "", dimensions))
})
)

if (check)
return(length(rows) == 1)

if (cols) {
cols <- unique(
lapply(dims, FUN = function(dim) {
dimensions <- strsplit(dim, ":")[[1]]
col2int(gsub("[[:digit:]]", "", dimensions))
})
)

cols <- lapply(cols, function(icols) seq.int(min(icols), max(icols)))

return(unique(unlist(cols)))
}

return(rows)

}
71 changes: 46 additions & 25 deletions R/wb_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,42 +23,62 @@ dims_to_dataframe <- function(dims, fill = FALSE, empty_rm = FALSE) {
has_dim_sep <- TRUE
}

rows_out <- NULL
cols_out <- NULL
filled <- NULL
for (dim in dims) {
# this is only required, if dims is not equal sized
rows_out <- NULL
cols_out <- NULL
filled <- NULL
full_cols <- NULL

if (!grepl(":", dim)) {
dim <- paste0(dim, ":", dim)
}
# condition 1) contains dims separator, but all dims are of
# equal size: "A1:A5,B1:B5"
# condition 2) either "A1:B5" or separator, but unequal size or "A1:A2,A4:A6,B1:B5"
if (has_dim_sep && get_dims(dims)) {

if (length(dims) > 1)
filled <- c(filled, needed_cells(dim))
full_rows <- get_dims(dims, check = FALSE, cols = FALSE)
full_cols <- sort(get_dims(dims, check = FALSE, cols = TRUE))

if (identical(dim, "Inf:-Inf")) {
# This should probably be fixed elsewhere?
stop("dims are inf:-inf")
} else {
dimensions <- strsplit(dim, ":")[[1]]
rows_out <- unlist(full_rows)
cols_out <- int2col(full_cols)
full_cols <- full_cols - min(full_cols) # is always a zero offset

} else {

for (dim in dims) {

if (!grepl(":", dim)) {
dim <- paste0(dim, ":", dim)
}

if (length(dims) > 1)
filled <- c(filled, needed_cells(dim))

if (identical(dim, "Inf:-Inf")) {
# This should probably be fixed elsewhere?
stop("dims are inf:-inf")
} else {
dimensions <- strsplit(dim, ":")[[1]]

rows <- as.numeric(gsub("[[:upper:]]", "", dimensions))
if (all(is.na(rows))) rows <- c(1, 1048576)
rows <- seq.int(rows[1], rows[2])
rows <- as.numeric(gsub("[[:upper:]]", "", dimensions))
if (all(is.na(rows))) rows <- c(1, 1048576)
rows <- seq.int(rows[1], rows[2])

rows_out <- unique(c(rows_out, rows))
rows_out <- unique(c(rows_out, rows))

# TODO seq.wb_columns? make a wb_cols vector?
cols <- gsub("[[:digit:]]", "", dimensions)
cols <- int2col(seq.int(col2int(cols[1]), col2int(cols[2])))
# TODO seq.wb_columns? make a wb_cols vector?
cols <- gsub("[[:digit:]]", "", dimensions)
cols <- int2col(seq.int(col2int(cols[1]), col2int(cols[2])))

cols_out <- unique(c(cols_out, cols))
cols_out <- unique(c(cols_out, cols))
}
}
}

if (has_dim_sep) {
if (empty_rm) {
cols_out <- int2col(sort(col2int(cols_out)))
rows_out <- sort(rows_out)
cols_out <- int2col(sort(col2int(cols_out)))
rows_out <- sort(rows_out)
# with empty_rm the dataframe will contain only needed columns
if (!is.null(full_cols)) full_cols <- seq_along(cols_out) - 1L
} else {
# somehow we have to make sure that all columns are covered
col_ints <- col2int(cols_out)
Expand All @@ -73,7 +93,8 @@ dims_to_dataframe <- function(dims, fill = FALSE, empty_rm = FALSE) {
rows = rows_out,
cols = cols_out,
filled = filled,
fill = fill
fill = fill,
fcols = full_cols
)
}

Expand Down
9 changes: 5 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,17 @@ BEGIN_RCPP
END_RCPP
}
// dims_to_df
SEXP dims_to_df(Rcpp::IntegerVector rows, Rcpp::CharacterVector cols, Rcpp::Nullable<Rcpp::CharacterVector> filled, bool fill);
RcppExport SEXP _openxlsx2_dims_to_df(SEXP rowsSEXP, SEXP colsSEXP, SEXP filledSEXP, SEXP fillSEXP) {
SEXP dims_to_df(Rcpp::IntegerVector rows, Rcpp::CharacterVector cols, Rcpp::Nullable<Rcpp::CharacterVector> filled, bool fill, Rcpp::Nullable<Rcpp::IntegerVector> fcols);
RcppExport SEXP _openxlsx2_dims_to_df(SEXP rowsSEXP, SEXP colsSEXP, SEXP filledSEXP, SEXP fillSEXP, SEXP fcolsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type rows(rowsSEXP);
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type cols(colsSEXP);
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::CharacterVector> >::type filled(filledSEXP);
Rcpp::traits::input_parameter< bool >::type fill(fillSEXP);
rcpp_result_gen = Rcpp::wrap(dims_to_df(rows, cols, filled, fill));
Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::IntegerVector> >::type fcols(fcolsSEXP);
rcpp_result_gen = Rcpp::wrap(dims_to_df(rows, cols, filled, fill, fcols));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -1005,7 +1006,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_openxlsx2_copy", (DL_FUNC) &_openxlsx2_copy, 1},
{"_openxlsx2_validate_dims", (DL_FUNC) &_openxlsx2_validate_dims, 1},
{"_openxlsx2_needed_cells", (DL_FUNC) &_openxlsx2_needed_cells, 1},
{"_openxlsx2_dims_to_df", (DL_FUNC) &_openxlsx2_dims_to_df, 4},
{"_openxlsx2_dims_to_df", (DL_FUNC) &_openxlsx2_dims_to_df, 5},
{"_openxlsx2_long_to_wide", (DL_FUNC) &_openxlsx2_long_to_wide, 3},
{"_openxlsx2_is_charnum", (DL_FUNC) &_openxlsx2_is_charnum, 1},
{"_openxlsx2_wide_to_long", (DL_FUNC) &_openxlsx2_wide_to_long, 14},
Expand Down
50 changes: 30 additions & 20 deletions src/helper_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,13 @@ bool has_cell(const std::string& str, const std::unordered_set<std::string>& vec

// provide a basic rbindlist for lists of named characters
// [[Rcpp::export]]
SEXP dims_to_df(Rcpp::IntegerVector rows, Rcpp::CharacterVector cols, Rcpp::Nullable<Rcpp::CharacterVector> filled, bool fill) {
SEXP dims_to_df(Rcpp::IntegerVector rows, Rcpp::CharacterVector cols, Rcpp::Nullable<Rcpp::CharacterVector> filled, bool fill,
Rcpp::Nullable<Rcpp::IntegerVector> fcols) {

size_t kk = cols.size();
size_t nn = rows.size();

bool has_fcols = fcols.isNotNull();
bool has_filled = filled.isNotNull();

// 1. create the list
Expand All @@ -353,30 +355,38 @@ SEXP dims_to_df(Rcpp::IntegerVector rows, Rcpp::CharacterVector cols, Rcpp::Null
SET_VECTOR_ELT(df, i, Rcpp::CharacterVector(nn, NA_STRING));
}

if (has_filled && fill) {
if (fill) {
if (has_filled) {

std::vector<std::string> flld = Rcpp::as<std::vector<std::string>>(filled.get());
std::unordered_set<std::string> flls(flld.begin(), flld.end());
std::vector<std::string> flld = Rcpp::as<std::vector<std::string>>(filled.get());
std::unordered_set<std::string> flls(flld.begin(), flld.end());

// with has_filled we always have to run this loop
for (size_t i = 0; i < kk; ++i) {
Rcpp::CharacterVector cvec = Rcpp::as<Rcpp::CharacterVector>(df[i]);
std::string coli = Rcpp::as<std::string>(cols[i]);
for (size_t j = 0; j < nn; ++j) {
std::string cell = coli + std::to_string(rows[j]);
if (has_cell(cell, flls))
cvec[j] = coli + std::to_string(rows[j]);
// else cvec[j] = "";
// with has_filled we always have to run this loop
for (size_t i = 0; i < kk; ++i) {
Rcpp::CharacterVector cvec = Rcpp::as<Rcpp::CharacterVector>(df[i]);
std::string coli = Rcpp::as<std::string>(cols[i]);
for (size_t j = 0; j < nn; ++j) {
std::string cell = coli + std::to_string(rows[j]);
if (has_cell(cell, flls))
cvec[j] = cell;
}
}
}

} else if (fill) { // insert cells into data frame
} else { // insert cells into data frame

for (size_t i = 0; i < kk; ++i) {
Rcpp::CharacterVector cvec = Rcpp::as<Rcpp::CharacterVector>(df[i]);
std::string coli = Rcpp::as<std::string>(cols[i]);
for (size_t j = 0; j < nn; ++j) {
cvec[j] = coli + std::to_string(rows[j]);
std::vector<size_t> fcls;
if (has_fcols) {
fcls = Rcpp::as<std::vector<size_t>>(fcols.get());
}

for (size_t i = 0; i < kk; ++i) {
if (has_fcols && std::find(fcls.begin(), fcls.end(), i) == fcls.end())
continue;
Rcpp::CharacterVector cvec = Rcpp::as<Rcpp::CharacterVector>(df[i]);
std::string coli = Rcpp::as<std::string>(cols[i]);
for (size_t j = 0; j < nn; ++j) {
cvec[j] = coli + std::to_string(rows[j]);
}
}
}

Expand Down

0 comments on commit 7c94ede

Please sign in to comment.