Skip to content

Commit

Permalink
3.0.9
Browse files Browse the repository at this point in the history
  • Loading branch information
liborty committed Mar 12, 2024
1 parent 0238e19 commit 81b989c
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 42 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Nonetheless, especially on large datasets, one should devote certain limited fra

* Linear complexity.
* Fast (in-place) iterative partitioning into three subranges (lesser,equal,greater), minimising data movements and memory management.
* Simple pivot selection on small datasets. We define the `middling` value of a sample of four as one of the middle pair of ordered items. This is found in only three comparisons. A `middling` pivot is enough to guarantee convergence of iterative search for the median. Really poor pivots occur only rarely.
* Simple pivot selection strategy: median of three samples (requires only three comparisons). Really poor pivots occur only rarely during the iterative process. For longer data, we do deploy median of three medians but again only on a small sub sample of data.

## Trait Medianf64

Expand Down
61 changes: 27 additions & 34 deletions src/algos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,19 @@ use std::ops::Range;
use indxvec::Mutops;
use crate::{Me,merror};

/// middle valued ref of three, using at most three comparisons
fn midof3<T>(s: &[&T],indx0: usize, indx1: usize, indx2: usize,c: &mut impl FnMut(&T, &T) -> Ordering) -> usize {
let (min, max) = if c(s[indx0],s[indx1]) == Less {
(indx0,indx1)
} else {
(indx1,indx0)
};
let lastref = s[indx2];
if c(s[min],lastref) != Less { return min; };
if c(lastref,s[max]) != Less { return max; };
indx2
}

/// Scan a slice of f64s for NANs
pub fn nans(v: &[f64]) -> bool {
for &f in v {
Expand Down Expand Up @@ -37,23 +50,6 @@ pub fn best1_k<T,F>(s: &[T], k: usize, rng: Range<usize>, c: F) -> &T
k_max
}

/// Index of the middling value of four refs. Makes only three comparisons
fn middling(
idx0: usize,
idx1: usize,
idx2: usize,
idx3: usize,
c: &mut impl FnMut(usize, usize) -> Ordering,
) -> usize {
let max1 = if c(idx0, idx1) == Less { idx1 } else { idx0 };
let max2 = if c(idx2, idx3) == Less { idx3 } else { idx2 };
if c(max1, max2) == Less {
max1
} else {
max2
}
}

/// Minimum value within a range in a slice
/// Finds maximum, when arguments of c are swapped in the function call: `|a,b| c(b,a)`
pub fn min<'a, T>(s: &[&'a T], rng: Range<usize>, c: &mut impl FnMut(&T, &T) -> Ordering) -> &'a T {
Expand Down Expand Up @@ -185,17 +181,16 @@ pub(super) fn oddmedian_by<'a, T>(s: &mut [&'a T], c: &mut impl FnMut(&T, &T) ->
let mut rng = 0..s.len();
let need = s.len() / 2; // median target position in fully partitioned set
loop {
let pivotsub = middling(
rng.start,
rng.start + 1,
rng.end - 2,
rng.end - 1,
&mut |a, b| c(s[a], s[b]),
);
let mut pivotsub = midof3(s, rng.start, rng.start+need, rng.end-1, c);
if rng.len() == 3 { return s[pivotsub]; }
else if rng.len() > 100 {
let pivotsub2 = midof3(s, rng.start+1, rng.start+need+1, rng.end-2, c);
let pivotsub3 = midof3(s, rng.start+2, rng.start+need+2, rng.end-3, c);
pivotsub = midof3(s,pivotsub,pivotsub2,pivotsub3, c);
}
if pivotsub != rng.start {
s.swap(rng.start, pivotsub);
};
let pivotref = s[rng.start];
let (eqsub, gtsub) = <&mut [T]>::part(s, &rng, c);
// well inside lt partition, iterate on it
if need + 2 < eqsub {
Expand All @@ -214,7 +209,7 @@ pub(super) fn oddmedian_by<'a, T>(s: &mut [&'a T], c: &mut impl FnMut(&T, &T) ->
};
if need < gtsub {
// within equals partition, return the pivot
return pivotref;
return s[pivotsub];
};
// first place in gt partition, the solution is its minimum
if need == gtsub {
Expand All @@ -236,14 +231,12 @@ pub(super) fn evenmedian_by<'a, T>(
) -> (&'a T, &'a T) {
let mut rng = 0..s.len();
let need = s.len() / 2 - 1; // median target position in fully partitioned set
loop {
let pivotsub = middling(
rng.start,
rng.start + 1,
rng.end - 2,
rng.end - 1,
&mut |a, b| c(s[a], s[b]),
);
loop {
let mut pivotsub = midof3(s,rng.start,rng.start+need, rng.end-1, c);
if rng.len() > 100 {
let pivotsub2 = midof3(s, rng.start+1, rng.start+need+1, rng.end-2, c);
let pivotsub3 = midof3(s, rng.start+2, rng.start+need+2, rng.end-3, c);
pivotsub = midof3(s,pivotsub,pivotsub2,pivotsub3, c); };
if pivotsub != rng.start {
s.swap(rng.start, pivotsub);
};
Expand Down
17 changes: 17 additions & 0 deletions src/oldalgos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ use core::ops::{Deref, Neg};

const FSIGN: u64 = 0x8000_0000_0000_0000;

/// Index of the middling value of four refs. Makes only three comparisons
fn middling(
idx0: usize,
idx1: usize,
idx2: usize,
idx3: usize,
c: &mut impl FnMut(usize, usize) -> Ordering,
) -> usize {
let max1 = if c(idx0, idx1) == Less { idx1 } else { idx0 };
let max2 = if c(idx2, idx3) == Less { idx3 } else { idx2 };
if c(max1, max2) == Less {
max1
} else {
max2
}
}

/// Copies a slice of f64s, removing any NANs from it.
/// It is advisable to test with `non_nans` first, as there may be none
pub fn scrub_nans(v: &[f64]) -> Vec<f64> {
Expand Down
15 changes: 8 additions & 7 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,22 +174,23 @@ fn errors() -> Result<(), Me> {
Ok(())
}

const NAMES: [&str; 3] = ["median_by", "best_k", "medf_unchecked"];
const NAMES: [&str; 2] = ["median_by","medf_unchecked"];

const CLOSURESF64: [fn(&[f64]); 3] = [
const CLOSURESF64: [fn(&[f64]); 2] = [
|v: &[_]| {
v.median_by(&mut <f64>::total_cmp)
.expect("even median closure failed");
},
|v: &[_]| {
v.medf_unchecked();
},
/*
|v: &[_]| {
let mut sorted: Vec<&f64> = v.iter().collect();
sorted.sort_unstable_by(|&a, &b| a.total_cmp(b));
// sorted[sorted.len()/2];
},
|v: &[_]| {
v.medf_unchecked();
},
/*
|v: &[_]| {
v.qmedian_by(&mut <f64>::total_cmp,|&x| x)
.expect("even median closure failed");
Expand All @@ -208,5 +209,5 @@ const CLOSURESF64: [fn(&[f64]); 3] = [
fn comparison() {
// set_seeds(0); // intialise random numbers generator
// Rnum encapsulates the type of random data to be generated
benchf64(3..100, 1, 10, &NAMES, &CLOSURESF64);
benchf64(93..110, 1, 10, &NAMES, &CLOSURESF64);
}

0 comments on commit 81b989c

Please sign in to comment.