Skip to content

Commit

Permalink
Merge pull request #660 from SebKrantz/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
SebKrantz authored Nov 2, 2024
2 parents 15f2d3b + 68371dc commit 0d096f7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 31 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

* `qsu()` now adds a `WeightSum` column giving the sum of (non-zero or missing) weights if the `w` argument is used. Thanks @mayer79 for suggesting (#650). For panel data (`pid`) the 'Between' sum of weights is also simply the number of groups, and the 'Within' sum of weights is the 'Overall' sum of weights divided by the number of groups.

* Fixed an inaccuracy in `fquantile()/fnth()` with weights: As per documentation the target sum is `sumwp = (sum(w) - min(w)) * p`, however, in practice, the weight of the minimum element of `x` was used instead of the minimum weight. Since the smallest element in the sample usually has a small weight this was unnoticed for a long while, but thanks to @Jahnic-kb now reported and fixed (#659).

# collapse 2.0.16

* Fixes an installation bug on some Linux systems (conflicting types) (#613).
Expand Down
2 changes: 1 addition & 1 deletion man/fquantile.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ wquantile7R <- function(x, w, probs = c(0.25, 0.5, 0.75), na.rm = TRUE, names =
wo = w[o]
w_cs = cumsum(wo) # Cumulative sum
sumwp = sum(w) # Computing sum(w) - min(w)
sumwp = sumwp - wo[1L]
sumwp = sumwp - min(w)
sumwp = sumwp * probs # Target sums of weights for quantile type 7
res = sapply(sumwp, function(tsump) {
j = which.max(w_cs > tsump) # Lower order statistic
Expand Down
58 changes: 28 additions & 30 deletions src/fnth_fmedian_fquantile.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,38 +88,38 @@ case 4: // quantile type 4: not exact for weighted case, and also bad properties
break;
*/

// Weighted quantiles. Idea: make h dependent on sum of weights and average weight.
// Weighted quantiles. Idea: make h dependent on sum of weights and minimum weight.
#undef RETWQSWITCH
#define RETWQSWITCH(sumw, mu) \
#define RETWQSWITCH(sumw, minw) \
switch(ret) { \
case 7: /* quantile type 7 */ \
h = (sumw - mu) * Q; \
h = (sumw - minw) * Q; \
break; \
case 1: \
case 2: \
case 3: /* average, lower or upper element (adjust algorithm)*/\
h = sumw * Q; \
break; \
case 5: /* quantile type 5*/ \
h = sumw * Q - 0.5*mu; \
h = sumw * Q - 0.5*minw; \
if(h < 0.0) h = 0.0; \
break; \
case 6: /* quantile type 6*/ \
h = (sumw + mu)*Q - mu; \
h = (sumw + minw)*Q - minw; \
if(h < 0.0) h = 0.0; \
break; \
case 8: /* quantile type 8 (best according to H&F 1986)*/ \
h = (sumw + 1.0/3.0 * mu)*Q - 2.0/3.0 * mu; \
h = (sumw + 1.0/3.0 * minw)*Q - 2.0/3.0 * minw; \
if(h < 0.0) h = 0.0; \
break; \
case 9: /* quantile type 9*/ \
h = (sumw + 1.0/4.0 * mu)*Q - 5.0/8.0 * mu; \
h = (sumw + 1.0/4.0 * minw)*Q - 5.0/8.0 * minw; \
if(h < 0.0) h = 0.0; \
break; \
}
/*
case 4: // quantile type 4: does not give exact results
h = sumw * Q - mu;
h = sumw * Q - minw;
break;
*/
/* Redundant? -> yes!
Expand Down Expand Up @@ -229,7 +229,7 @@ double Q, h, a, wb, b; \
for(int i = 0, k = 0; i < np; ++i) { \
Q = probs[i]; \
if(Q > 0.0 && Q < 1.0) { \
RETWQSWITCH(sumw, mu); \
RETWQSWITCH(sumw, minw); \
a = h + eps; \
while(wsum <= a) wsum += pw[po[k++]]; \
a = px[po[k == 0 ? 0 : k-1]]; \
Expand Down Expand Up @@ -423,10 +423,14 @@ SEXP fquantileC(SEXP x, SEXP Rprobs, SEXP w, SEXP o, SEXP Rnarm, SEXP Rtype, SEX
}

} else {
double wsum = 0.0, sumw = 0.0; // wsum is running sum, sumw is the total sum
double wsum = 0.0, sumw = 0.0, minw = DBL_MAX; // wsum is running sum, sumw is the total sum

#pragma omp simd reduction(+:sumw)
for (int i = 0; i < l; ++i) sumw += pw[po[i]];
// #pragma omp simd reduction(+:sumw)
for (int i = 0; i < l; ++i) {
wsum = pw[po[i]];
sumw += wsum;
if(minw > wsum && wsum > 0.0) minw = wsum;
}

wsum = 0.0;
if(ISNAN(sumw)) error("Missing weights in order statistics are currently only supported if x is also missing");
Expand All @@ -435,9 +439,6 @@ SEXP fquantileC(SEXP x, SEXP Rprobs, SEXP w, SEXP o, SEXP Rnarm, SEXP Rtype, SEX
n = 0;
goto wall0;
}
double mu = pw[po[0]]; // = sumw / (l - nw0);
int i = 0;
while(mu <= 0.0) mu = pw[po[++i]];

if(tx == REALSXP) { // Numeric data
double *px = REAL(x)-1;
Expand Down Expand Up @@ -498,28 +499,25 @@ double iquickselect(int *x, const int n, const int ret, const double Q) {

// Expects pw and po to be consistent
double w_compute_h(const double *pw, const int *po, const int l, const int sorted, const int ret, const double Q) {
double sumw = 0.0, mu = 0.0, h = 0.0; /* To avoid -Wmaybe-uninitialized */
double sumw = 0.0, minw = DBL_MAX, h = 0.0; /* To avoid -Wmaybe-uninitialized */
if(sorted) {
#pragma omp simd reduction(+:sumw)
for(int i = 0; i < l; ++i) sumw += pw[i];
if(sumw > eps) {
int i = 0;
mu = pw[0];
while(mu <= 0.0) mu = pw[++i];
// #pragma omp simd reduction(+:sumw)
for(int i = 0; i < l; ++i) {
sumw += pw[i];
if(minw > pw[i] && pw[i] > 0.0) minw = pw[i];
}
} else {
#pragma omp simd reduction(+:sumw)
for(int i = 0; i < l; ++i) sumw += pw[po[i]];
if(sumw > eps) {
int i = 0;
mu = pw[po[0]];
while(mu <= 0.0) mu = pw[po[++i]];
// #pragma omp simd reduction(+:sumw)
for(int i = 0; i < l; ++i) {
h = pw[po[i]];
sumw += h;
if(minw > h && h > 0.0) minw = h;
}
}
if(ISNAN(sumw)) error("Missing weights in order statistics are currently only supported if x is also missing");
if(sumw < 0.0) error("Weights must be positive or zero");
if(mu == 0.0) return NA_REAL;
RETWQSWITCH(sumw, mu);
if(minw == DBL_MAX) return NA_REAL;
RETWQSWITCH(sumw, minw);
return h;
}

Expand Down

0 comments on commit 0d096f7

Please sign in to comment.