diff --git a/Cargo.toml b/Cargo.toml index 4145647..72979ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "medians" -version = "3.0.10" +version = "3.0.11" authors = ["Libor Spacek"] edition = "2021" description = "Median, Statistical Measures, Mathematics, Statistics" diff --git a/README.md b/README.md index 7dc4996..c2f79f1 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,17 @@ We argued in [`rstats`](https://github.com/liborty/rstats), that using the Geome See [`tests.rs`](https://github.com/liborty/medians/blob/main/tests/tests.rs) for examples of usage. Their automatically generated output can also be found by clicking the 'test' icon at the top of this document and then examining the latest log. +## Outline Usage + +Best methods/functions to be deployed, depending on the end type of data (i.e. type of the items within the input vector/slice). + +- `u8` -> function `medianu8` +- `u64` -> function `medianu64` +- `f64` -> methods of trait Medianf64 +- `T` custom quantisable to u64 -> method `uqmedian` of trait `Median` +- `T` custom comparable by `c` -> method `qmedian_by` of trait `Median` +- `T` custom comparable but not quantisable -> method `median_by` of trait `Median`. + ## Algorithms Analysis Short primitive types are best dealt with by radix search. We have implemented it for `u8`: @@ -39,16 +50,18 @@ Nonetheless, on large datasets, we do devote some of the overall computational e We introduce another new algorithm, implemented as function `medianu64`: - /// Fast medians of u64 end type by binary partitioning - pub fn medianu64(s: &mut [u64]) -> Result, Me> +```rust +/// Fast medians of u64 end type by binary partitioning +pub fn medianu64(s: &mut [u64]) -> Result, Me> +``` on `u64` data, this runs about twice as fast as the general purpose pivoting of `median_by`. The data is partitioned by individual bit values, totally sidestepping the expense of the pivot estimation. The algorithm generally converges well. However, when the data happens to be all bunched up within a small range of values, it will slow down. ### Summary of he main features of our general median algorithm -* Linear complexity. -* Fast (in-place) iterative partitioning into three subranges (lesser,equal,greater), minimising data movements and memory management. -* Simple pivot selection strategy: median of three samples (requires only three comparisons). Really poor pivots occur only rarely during the iterative process. For longer data, we deploy median of three medians. +- Linear complexity. +- Fast (in-place) iterative partitioning into three subranges (lesser,equal,greater), minimising data movements and memory management. +- Simple pivot selection strategy: median of three samples (requires only three comparisons). Really poor pivots occur only rarely during the iterative process. For longer data, we deploy median of three medians. ## Trait Medianf64 @@ -103,9 +116,14 @@ pub trait Median<'a, T> { c: &mut impl FnMut(&T, &T) -> Ordering, q: impl Fn(&T) -> f64, ) -> Result; + /// Median of types quantifiable to u64 by `q`, at the end converted to a single f64. + /// For data that is already `u64`, use function `medianu64` + fn uqmedian( + self, + q: impl Fn(&T) -> u64, + ) -> Result; /// Median by comparison `c`, returns odd/even result - fn median_by(self, c: &mut impl FnMut(&T, &T) -> Ordering) - -> Result, Me>; + fn median_by(self, c: &mut impl FnMut(&T, &T) -> Ordering) -> Result, Me>; /// Zero mean/median data, produced by subtracting the centre fn zeroed(self, centre: f64, quantify: impl Fn(&T) -> f64) -> Result, Me>; /// Median correlation = cosine of an angle between two zero median Vecs @@ -122,6 +140,8 @@ pub trait Median<'a, T> { ## Release Notes +**Version 3.0.11** - Added method `uqmedian` to trait `Median` for types quantifiable to `u64` by some closure `q`. + **Version 3.0.10** - Added `medianu64`. It is faster on u64 data than the general purpose `median_by`. It is using a new algorithm that partitions by bits, thus avoiding the complexities of pivot estimation. **Version 3.0.9** - Improved pivot estimation for large data sets. diff --git a/src/implementations.rs b/src/implementations.rs index 23a6109..761ea46 100644 --- a/src/implementations.rs +++ b/src/implementations.rs @@ -29,7 +29,7 @@ where Medians::Odd(m) => { write!(f, "{YL}odd median: {GR}{}{UN}", *m) } - Medians::Even((m1, m2)) => { + Medians::Even((m1,m2)) => { write!(f, "{YL}even medians: {GR}{} {}{UN}", *m1, *m2) } } @@ -210,6 +210,26 @@ impl<'a, T> Median<'a, T> for &'a [T] { } } + /// Median of `&[T]`, quantifiable to u64's by `q`. Returns a single f64. + /// When T is a primitive type directly convertible to u64, use `as u64` as `q`. + /// When u64:From is implemented, use `|x| x.into()` as `q`. + /// In all other cases, use custom quantification closure `q`. + /// When T is not quantifiable at all, use the ultimate `median_by` method. + fn uqmedian( + self, + q: impl Fn(&T) -> u64, + ) -> Result { + let n = self.len(); + match n { + 0 => return merror("size", "uqmedian_by: zero length data"), + 1 => return Ok(q(&self[0]) as f64), + 2 => return Ok( (q(&self[0]) as f64 + q(&self[1]) as f64) / 2.0 ), + _ => (), + }; + let mut s:Vec = self.iter().map(q).collect(); + Ok(medianu64(&mut s)?.into()) + } + /// Median(s) of unquantifiable type by general comparison closure fn median_by(self, c: &mut impl FnMut(&T, &T) -> Ordering) -> Result, Me> { let n = self.len(); diff --git a/src/lib.rs b/src/lib.rs index d115f74..fdfcbc4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,12 @@ pub trait Median<'a, T> { c: &mut impl FnMut(&T, &T) -> Ordering, q: impl Fn(&T) -> f64, ) -> Result; + /// Median of types quantifiable to u64 by `q`, at the end converted to a single f64. + /// For data that is already `u64`, use function `medianu64` + fn uqmedian( + self, + q: impl Fn(&T) -> u64, + ) -> Result; /// Median by comparison `c`, returns odd/even result fn median_by(self, c: &mut impl FnMut(&T, &T) -> Ordering) -> Result, Me>; /// Zero mean/median data, produced by subtracting the centre diff --git a/tests/tests.rs b/tests/tests.rs index 825acc2..ae181d5 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -185,23 +185,23 @@ fn errors() -> Result<(), Me> { Ok(()) } -const NAMES: [&str; 3] = ["median_by","medf_checked","medianu64"]; +const NAMES: [&str; 3] = ["median_by","medf_checked","uqmedian"]; -const CLOSURESU64: [fn(&mut [u64]); 3] = [ - |v: &mut [_]| { +const CLOSURESU64: [fn(&[u64]); 3] = [ + |v: &[_]| { v.median_by(&mut ::cmp) .expect("median_by closure failed"); }, - |v: &mut [_]| { + |v: &[_]| { let vf:Vec = v.iter().map(|&x| x as f64).collect(); vf.medf_checked() .expect("medf_checked found NaN"); }, - |v: &mut [_]| { - medianu64(v) - .expect("medianu64 found NaN"); + |v: &[_]| { + v.uqmedian(|&x| x) + .expect("uqmedian error"); }, @@ -230,5 +230,5 @@ const CLOSURESU64: [fn(&mut [u64]); 3] = [ fn comparison() { // set_seeds(0); // intialise random numbers generator // Rnum encapsulates the type of random data to be generated - mutbenchu64(100000..100010, 1, 10, &NAMES, &CLOSURESU64); + benchu64(100000..100010, 1, 10, &NAMES, &CLOSURESU64); }