From 81b989cb9a05ed523f400ce91bcffd352d7fb8aa Mon Sep 17 00:00:00 2001 From: L <457124+liborty@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:04:28 +1000 Subject: [PATCH] 3.0.9 --- README.md | 2 +- src/algos.rs | 61 ++++++++++++++++++++++--------------------------- src/oldalgos.rs | 17 ++++++++++++++ tests/tests.rs | 15 ++++++------ 4 files changed, 53 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 689a7bc..8c77a3a 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ Nonetheless, especially on large datasets, one should devote certain limited fra * Linear complexity. * Fast (in-place) iterative partitioning into three subranges (lesser,equal,greater), minimising data movements and memory management. -* Simple pivot selection on small datasets. We define the `middling` value of a sample of four as one of the middle pair of ordered items. This is found in only three comparisons. A `middling` pivot is enough to guarantee convergence of iterative search for the median. Really poor pivots occur only rarely. +* Simple pivot selection strategy: median of three samples (requires only three comparisons). Really poor pivots occur only rarely during the iterative process. For longer data, we do deploy median of three medians but again only on a small sub sample of data. ## Trait Medianf64 diff --git a/src/algos.rs b/src/algos.rs index 640323c..2c0e49f 100644 --- a/src/algos.rs +++ b/src/algos.rs @@ -3,6 +3,19 @@ use std::ops::Range; use indxvec::Mutops; use crate::{Me,merror}; +/// middle valued ref of three, using at most three comparisons +fn midof3(s: &[&T],indx0: usize, indx1: usize, indx2: usize,c: &mut impl FnMut(&T, &T) -> Ordering) -> usize { + let (min, max) = if c(s[indx0],s[indx1]) == Less { + (indx0,indx1) + } else { + (indx1,indx0) + }; + let lastref = s[indx2]; + if c(s[min],lastref) != Less { return min; }; + if c(lastref,s[max]) != Less { return max; }; + indx2 +} + /// Scan a slice of f64s for NANs pub fn nans(v: &[f64]) -> bool { for &f in v { @@ -37,23 +50,6 @@ pub fn best1_k(s: &[T], k: usize, rng: Range, c: F) -> &T k_max } -/// Index of the middling value of four refs. Makes only three comparisons -fn middling( - idx0: usize, - idx1: usize, - idx2: usize, - idx3: usize, - c: &mut impl FnMut(usize, usize) -> Ordering, -) -> usize { - let max1 = if c(idx0, idx1) == Less { idx1 } else { idx0 }; - let max2 = if c(idx2, idx3) == Less { idx3 } else { idx2 }; - if c(max1, max2) == Less { - max1 - } else { - max2 - } -} - /// Minimum value within a range in a slice /// Finds maximum, when arguments of c are swapped in the function call: `|a,b| c(b,a)` pub fn min<'a, T>(s: &[&'a T], rng: Range, c: &mut impl FnMut(&T, &T) -> Ordering) -> &'a T { @@ -185,17 +181,16 @@ pub(super) fn oddmedian_by<'a, T>(s: &mut [&'a T], c: &mut impl FnMut(&T, &T) -> let mut rng = 0..s.len(); let need = s.len() / 2; // median target position in fully partitioned set loop { - let pivotsub = middling( - rng.start, - rng.start + 1, - rng.end - 2, - rng.end - 1, - &mut |a, b| c(s[a], s[b]), - ); + let mut pivotsub = midof3(s, rng.start, rng.start+need, rng.end-1, c); + if rng.len() == 3 { return s[pivotsub]; } + else if rng.len() > 100 { + let pivotsub2 = midof3(s, rng.start+1, rng.start+need+1, rng.end-2, c); + let pivotsub3 = midof3(s, rng.start+2, rng.start+need+2, rng.end-3, c); + pivotsub = midof3(s,pivotsub,pivotsub2,pivotsub3, c); + } if pivotsub != rng.start { s.swap(rng.start, pivotsub); }; - let pivotref = s[rng.start]; let (eqsub, gtsub) = <&mut [T]>::part(s, &rng, c); // well inside lt partition, iterate on it if need + 2 < eqsub { @@ -214,7 +209,7 @@ pub(super) fn oddmedian_by<'a, T>(s: &mut [&'a T], c: &mut impl FnMut(&T, &T) -> }; if need < gtsub { // within equals partition, return the pivot - return pivotref; + return s[pivotsub]; }; // first place in gt partition, the solution is its minimum if need == gtsub { @@ -236,14 +231,12 @@ pub(super) fn evenmedian_by<'a, T>( ) -> (&'a T, &'a T) { let mut rng = 0..s.len(); let need = s.len() / 2 - 1; // median target position in fully partitioned set - loop { - let pivotsub = middling( - rng.start, - rng.start + 1, - rng.end - 2, - rng.end - 1, - &mut |a, b| c(s[a], s[b]), - ); + loop { + let mut pivotsub = midof3(s,rng.start,rng.start+need, rng.end-1, c); + if rng.len() > 100 { + let pivotsub2 = midof3(s, rng.start+1, rng.start+need+1, rng.end-2, c); + let pivotsub3 = midof3(s, rng.start+2, rng.start+need+2, rng.end-3, c); + pivotsub = midof3(s,pivotsub,pivotsub2,pivotsub3, c); }; if pivotsub != rng.start { s.swap(rng.start, pivotsub); }; diff --git a/src/oldalgos.rs b/src/oldalgos.rs index a346fe8..c0bc784 100644 --- a/src/oldalgos.rs +++ b/src/oldalgos.rs @@ -4,6 +4,23 @@ use core::ops::{Deref, Neg}; const FSIGN: u64 = 0x8000_0000_0000_0000; +/// Index of the middling value of four refs. Makes only three comparisons +fn middling( + idx0: usize, + idx1: usize, + idx2: usize, + idx3: usize, + c: &mut impl FnMut(usize, usize) -> Ordering, +) -> usize { + let max1 = if c(idx0, idx1) == Less { idx1 } else { idx0 }; + let max2 = if c(idx2, idx3) == Less { idx3 } else { idx2 }; + if c(max1, max2) == Less { + max1 + } else { + max2 + } +} + /// Copies a slice of f64s, removing any NANs from it. /// It is advisable to test with `non_nans` first, as there may be none pub fn scrub_nans(v: &[f64]) -> Vec { diff --git a/tests/tests.rs b/tests/tests.rs index 5955d07..b9dbca5 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -174,22 +174,23 @@ fn errors() -> Result<(), Me> { Ok(()) } -const NAMES: [&str; 3] = ["median_by", "best_k", "medf_unchecked"]; +const NAMES: [&str; 2] = ["median_by","medf_unchecked"]; -const CLOSURESF64: [fn(&[f64]); 3] = [ +const CLOSURESF64: [fn(&[f64]); 2] = [ |v: &[_]| { v.median_by(&mut ::total_cmp) .expect("even median closure failed"); }, + |v: &[_]| { + v.medf_unchecked(); + }, + /* |v: &[_]| { let mut sorted: Vec<&f64> = v.iter().collect(); sorted.sort_unstable_by(|&a, &b| a.total_cmp(b)); // sorted[sorted.len()/2]; }, - |v: &[_]| { - v.medf_unchecked(); - }, - /* + |v: &[_]| { v.qmedian_by(&mut ::total_cmp,|&x| x) .expect("even median closure failed"); @@ -208,5 +209,5 @@ const CLOSURESF64: [fn(&[f64]); 3] = [ fn comparison() { // set_seeds(0); // intialise random numbers generator // Rnum encapsulates the type of random data to be generated - benchf64(3..100, 1, 10, &NAMES, &CLOSURESF64); + benchf64(93..110, 1, 10, &NAMES, &CLOSURESF64); }