diff --git a/NEWS.md b/NEWS.md index 7f2debd7..92be8da2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # collapse 2.0.15 +* Some changes on the C-side to move the package closer to C API compliance (demanded by R-Core). One notable change is that `gsplit()` no longer supports S4 objects (because `SET_S4_OBJECT` is not part of the API and `asS4()` is too expensive for tight loops). I cannot think of a single example where it would be necessary to split an S4 object, but if you do have applications please file an issue. + * `pivot()` has new arguments `FUN = "last"` and `FUN.args = NULL`, allowing wide and recast pivots with aggregation (default last value as before). `FUN` currently supports a single function returning a scalar value. *Fast Statistical Functions* receive vectorized execution. `FUN.args` can be used to supply a list of function arguments, including data-length arguments such as weights. There are also a couple of internal functions callable using function strings: `"first"`, `"last"`, `"count"`, `"sum"`, `"mean"`, `"min"`, or `"max"`. These are built into the reshaping C-code and thus extremely fast. Thanks @AdrianAntico for the request (#582). * `join()` now provides enhanced verbosity, indicating the average order of the join between the two tables, e.g. diff --git a/man/join.Rd b/man/join.Rd index 4328200b..61c2ec97 100644 --- a/man/join.Rd +++ b/man/join.Rd @@ -29,7 +29,7 @@ join(x, y, \item{y}{a data frame-like object to join with \code{x}.} - \item{on}{character. vector of columns to join on. \code{NULL} uses \code{union(names(x), names(y))}. Use a named vector to match columns named differently in \code{x} and \code{y}, e.g. \code{c("x_id" = "y_id")}.} + \item{on}{character. vector of columns to join on. \code{NULL} uses \code{intersect(names(x), names(y))}. Use a named vector to match columns named differently in \code{x} and \code{y}, e.g. \code{c("x_id" = "y_id")}.} \item{how}{character. Join type: \code{"left"}, \code{"right"}, \code{"inner"}, \code{"full"}, \code{"semi"} or \code{"anti"}. The first letter suffices. } diff --git a/src/base_radixsort.h b/src/base_radixsort.h index 2d5283f3..b23e8d79 100644 --- a/src/base_radixsort.h +++ b/src/base_radixsort.h @@ -8,7 +8,7 @@ // #define IS_ASCII(x) ((x)->sxpinfo.gp & ASCII_MASK) // #define IS_ASCII(x) (LEVELS(x) & ASCII_MASK) -#define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // Replacing STRING_PTR +#define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // NOTE: All of this is copied from Defn.h: https://github.com/wch/r-source/blob/28de75af0541f93832c5899139b969d290bf422e/src/include/Defn.h // to avoid checking for ALTREP in TRUELENGTH, which slows down the code unnecessarily... @@ -25,6 +25,8 @@ #define MYLEV(x) (((SEXPREC_partial *)(x))->sxpinfo.gp) #define IS_ASCII(x) (MYLEV(x) & 64) // from data.table.h +#define SETTOF(x,v) ((((SEXPREC_partial *)(x))->sxpinfo.type)=(v)) + // NOTE: All of this is copied from Defn.h: https://github.com/wch/r-source/blob/28de75af0541f93832c5899139b969d290bf422e/src/include/Defn.h // to avoid checking for ALTREP in TRUELENGTH, which slows down the code unnecessarily... diff --git a/src/data.table_subset.c b/src/data.table_subset.c index 45d5e6fa..0645f52f 100644 --- a/src/data.table_subset.c +++ b/src/data.table_subset.c @@ -82,7 +82,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here? SET_ATTRIB(newdt, shallow_duplicate(ATTRIB(dt))); SET_OBJECT(newdt, OBJECT(dt)); - IS_S4_OBJECT(dt) ? SET_S4_OBJECT(newdt) : UNSET_S4_OBJECT(newdt); // To support S4 objects that include data.table + if(IS_S4_OBJECT(dt)) newdt = asS4(newdt, TRUE, 1); // To support S4 objects that include data.table //SHALLOW_DUPLICATE_ATTRIB(newdt, dt); // SHALLOW_DUPLICATE_ATTRIB would be a bit neater but is only available from R 3.3.0 // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It diff --git a/src/data.table_utils.c b/src/data.table_utils.c index 9d49c7df..0357051f 100644 --- a/src/data.table_utils.c +++ b/src/data.table_utils.c @@ -179,7 +179,7 @@ SEXP dt_na(SEXP x, SEXP cols, SEXP Rprop, SEXP Rcount) { } } if(count) { - SET_TYPEOF(ans, INTSXP); + SETTOF(ans, INTSXP); } else { // This computes the result if(prop < 1.0) { diff --git a/src/flag.cpp b/src/flag.cpp index 30e029a5..0cef506f 100644 --- a/src/flag.cpp +++ b/src/flag.cpp @@ -1,6 +1,9 @@ #include using namespace Rcpp; +LogicalVector intToLogical(IntegerVector x) { + return LogicalVector(x.begin(), x.end()); +} // 7th version: Irregular time series and panels supported ! template @@ -569,8 +572,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = while(i != np) outjp[i++] = ff; for( ; i != row; ++i) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(row); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; @@ -578,8 +580,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = while(i != st) outjp[--i] = ff; for( ; i--; ) outjp[i] = column[i - np]; SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; @@ -702,8 +703,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(os); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; @@ -715,8 +715,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; @@ -840,8 +839,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; @@ -855,8 +853,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; @@ -1009,8 +1006,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else if(np<0) { IntegerVector outjp = no_init_vector(gss); if(names) nam[pos] = "F" + nc[p] + "." + na[j]; @@ -1022,8 +1018,7 @@ List flagleadlCpp(const List& x, const IntegerVector& n = 1, const SEXP& fill = } } SHALLOW_DUPLICATE_ATTRIB(outjp, column); - if(txj == LGLSXP) SET_TYPEOF(outjp, LGLSXP); - out[pos] = outjp; + out[pos] = txj == LGLSXP ? intToLogical(outjp) : outjp; } else { if(names) nam[pos] = na[j]; out[pos] = x[j]; diff --git a/src/gsplit.c b/src/gsplit.c index 41041e5f..6c8d5c5f 100644 --- a/src/gsplit.c +++ b/src/gsplit.c @@ -24,12 +24,12 @@ SEXP gsplit(SEXP x, SEXP gobj, SEXP toint) { int ox = OBJECT(x); // FAZIT: Need to use SET_VECTOR_ELT!! pres[i] = allocVector() doesn't work!! if(TYPEOF(ax) != NILSXP && ox != 0) { - for(int i = 0, s4o = IS_S4_OBJECT(x); i != ng; ++i) { + for(int i = 0; i != ng; ++i) { // , s4o = IS_S4_OBJECT(x) SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); SET_ATTRIB(resi, ax); SET_OBJECT(resi, ox); - if(s4o) SET_S4_OBJECT(resi); + // if(s4o) SET_S4_OBJECT(resi); } } else if(TYPEOF(ax) != NILSXP) { for(int i = 0; i != ng; ++i) { @@ -38,11 +38,11 @@ SEXP gsplit(SEXP x, SEXP gobj, SEXP toint) { SET_ATTRIB(resi, ax); } } else if(ox != 0) { // Is this even possible? Object bits but no attributes? - for(int i = 0, s4o = IS_S4_OBJECT(x); i != ng; ++i) { + for(int i = 0; i != ng; ++i) { // , s4o = IS_S4_OBJECT(x) SEXP resi; SET_VECTOR_ELT(res, i, resi = allocVector(tx, pgs[i])); SET_OBJECT(resi, ox); - if(s4o) SET_S4_OBJECT(resi); + // if(s4o) SET_S4_OBJECT(resi); } } else { for(int i = 0; i != ng; ++i) SET_VECTOR_ELT(res, i, allocVector(tx, pgs[i])); diff --git a/src/kit.h b/src/kit.h index cf4e4103..1de2c883 100644 --- a/src/kit.h +++ b/src/kit.h @@ -55,7 +55,7 @@ #define C_ISNAN(x, y) (B_ISNAN(x, y) || (N_ISNAN(x, y) && x == y)) #define REQUAL(x, y) (N_ISNAN(x, y) ? (x == y) : (B_IsNA(x, y) || B_IsNaN(x, y))) #define CEQUAL(x, y) ((N_ISNAN(x.r, x.i) && N_ISNAN(y.r, y.i)) ? (x.r == y.r && x.i == y.i) : (C_IsNA(x) ? C_IsNA(y) : (C_IsNA(y) ? 0 : (C_ISNAN(x.r, y.r) && C_ISNAN(x.i, y.i))))) -#define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // To replace STRING_PTR +#define SEXPPTR(x) ((SEXP *)DATAPTR(x)) // #define STR_DF mkString("data.frame") // #define MAX(a,b) (((a)>(b))?(a):(b)) // #define IS_LOGICAL(x) (isLogical(x) && LENGTH(x)==1) diff --git a/src/programming.c b/src/programming.c index 8b88229f..b1152018 100644 --- a/src/programming.c +++ b/src/programming.c @@ -1,4 +1,5 @@ #include "collapse_c.h" +#include "base_radixsort.h" SEXP Cna_rm(SEXP x) { const int n = LENGTH(x); @@ -894,20 +895,20 @@ SEXP vtypes(SEXP x, SEXP isnum) { pans[i] = is_num; } } - SET_TYPEOF(ans, LGLSXP); + SETTOF(ans, LGLSXP); break; } case 2: // is.factor for(int i = 0; i != n; ++i) pans[i] = (int)isFactor(px[i]); - SET_TYPEOF(ans, LGLSXP); + SETTOF(ans, LGLSXP); break; case 3: // is.list, needed for list processing functions for(int i = 0; i != n; ++i) pans[i] = TYPEOF(px[i]) == VECSXP; - SET_TYPEOF(ans, LGLSXP); + SETTOF(ans, LGLSXP); break; case 4: // is.sublist, needed for list processing functions for(int i = 0; i != n; ++i) pans[i] = TYPEOF(px[i]) == VECSXP && !isFrame(px[i]); - SET_TYPEOF(ans, LGLSXP); + SETTOF(ans, LGLSXP); break; case 7: // is.atomic(x), needed in atomic_elem() // is.atomic: do_is with op = 200: https://github.com/wch/r-source/blob/9f9033e193071f256e21a181cb053cba983ed4a9/src/main/coerce.c @@ -927,7 +928,7 @@ SEXP vtypes(SEXP x, SEXP isnum) { pans[i] = 0; } } - SET_TYPEOF(ans, LGLSXP); + SETTOF(ans, LGLSXP); break; case 5: // is.atomic(x) || is.list(x), needed in reg_elem() and irreg_elem() for(int i = 0; i != n; ++i) { @@ -949,7 +950,7 @@ SEXP vtypes(SEXP x, SEXP isnum) { pans[i] = 0; } } - SET_TYPEOF(ans, LGLSXP); + SETTOF(ans, LGLSXP); break; case 6: // Faster object type identification, needed in unlist2d: