diff --git a/Cargo.toml b/Cargo.toml index 72979ff..3ebe204 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "medians" -version = "3.0.11" +version = "3.1.0" authors = ["Libor Spacek"] edition = "2021" description = "Median, Statistical Measures, Mathematics, Statistics" diff --git a/README.md b/README.md index 3591f63..ca152d5 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,8 @@ pub trait Median<'a, T> { ## Release Notes +**Version 3.1.0** - Adding faster `medu64`, even variant is still work in progress. + **Version 3.0.11** - Added method `uqmedian` to trait `Median` for types quantifiable to `u64` by some closure `q`. Fixed a recent bug in `oddmedian_by`, whereby the pivot reference was not timely saved. Sorry about that. This was affecting results where the pivot coincident with the median was guessed early on. **Version 3.0.10** - Added `medianu64`. It is faster on u64 data than the general purpose `median_by`. It is using a new algorithm that partitions by bits, thus avoiding the complexities of pivot estimation. diff --git a/src/algos.rs b/src/algos.rs index 049610d..520f0c1 100644 --- a/src/algos.rs +++ b/src/algos.rs @@ -1,20 +1,21 @@ -// use crate::{ConstMedians,merror, Me}; use core::cmp::{Ordering, Ordering::*}; -use indxvec::Mutops; +use indxvec::{Mutops, Vecops}; use std::ops::Range; -const FIRST_BIT: u64 = 0x80_00_00_00_00_00_00_00; -// const FIRST_BYTE: u64 = 0xFF_00_00_00_00_00_00_00; +/// Mask of the first bit of a u64 +pub const FIRST_BIT: u64 = 0x80_00_00_00_00_00_00_00; +/* /// Copies &[u64] to Vec pub fn tobebytes(v: &[u64]) -> Vec { - let n = v.len(); + let n = v.len(); let mut bytes = vec![0u8;8*n]; for i in 0..n { - bytes[8*i..][..8].copy_from_slice(&v[i].to_be_bytes()); + bytes[8*i..][..8].copy_from_slice(&v[i].to_be_bytes()); } bytes } +*/ /// Partitions `s: &mut [u64]` within range `rng`, using bitmask. /// Returns the boundary of the rearranged partitions gtstart, where @@ -38,26 +39,31 @@ pub fn part_binary(s: &mut [u64], rng: &Range, bitmask: u64) -> usize { gtstart } -/// Moves all items that have upper bytes equal to val to the beginning of the range. -/// Overwrites whatever was there! -pub fn collect(s:&mut [u64], rng: &Range, val: u64, byteno:usize) -> usize { - let mut nestart = rng.start; - for &eq in s.iter().take(rng.end).skip(rng.start) { - if (eq >> (8*byteno)) == val { nestart += 1; } else { break; }; - }; // nestart is at the first non-equal item - for i in nestart + 1..rng.end { - if (s[i] >> (8*byteno)) == val { - s[nestart] = s[i]; - nestart += 1; - }; - } - nestart -} - - +/// index of middle valued ref of three, using at most three comparisons { +pub fn midof3( + s: &[T], + indx0: usize, + indx1: usize, + indx2: usize, + c: &mut impl FnMut(&T, &T) -> Ordering, +) -> usize { + let (min, max) = if c(&s[indx0], &s[indx1]) == Less { + (indx0, indx1) + } else { + (indx1, indx0) + }; + let lastref = &s[indx2]; + if c(&s[min], lastref) != Less { + return min; + }; + if c(lastref, &s[max]) != Less { + return max; + }; + indx2 +} -/// index of middle valued item of three, using at most three comparisons { -fn midof3( +/// index of middle valued ref of three, using at most three comparisons { +fn midof3_refs( s: &[&T], indx0: usize, indx1: usize, @@ -90,7 +96,8 @@ pub fn nans(v: &[f64]) -> bool { } /// kth item from rng (ascending or descending, depending on `c`) -pub fn best1_k(s: &[T], k: usize, rng: Range, c: F) -> &T +/// using binary insert k-sort +pub fn best_k(s: &[T], k: usize, rng: Range, c: F) -> &T where F: Fn(&T, &T) -> Ordering, { @@ -113,98 +120,74 @@ where k_max } -/// Minimum value within a range in a slice -/// Finds maximum, when arguments of c are swapped in the function call: `|a,b| c(b,a)` -pub fn min<'a, T>(s: &[&'a T], rng: Range, c: &mut impl FnMut(&T, &T) -> Ordering) -> &'a T { - let mut min = s[rng.start]; +/// Ref to the minimum item within a range in a slice, +/// or to the maximum, when invoked with swapped arguments of comparator c: `|a,b| c(b,a)` +pub fn extremum<'a, T>(s: &'a[T], rng: Range, c: &mut impl FnMut(&T, &T) -> Ordering) -> &'a T { + let mut min = &s[rng.start]; for si in s.iter().take(rng.end).skip(rng.start + 1) { - if c(si,min) == Ordering::Less { + if c(si, min) == Ordering::Less { min = si; }; } min } - -/// Minimum value within a range in a slice -fn minu64(s: &[u64], rng: Range) -> u64 { - let mut min = s[rng.start]; - for &si in s.iter().take(rng.end).skip(rng.start + 1) { - if si < min { - min = si; - }; - } - min -} - -/// Maximum value within a range in a slice -fn maxu64(s: &[u64], rng: Range) -> u64 { - let mut max = s[rng.start]; - for &si in s.iter().take(rng.end).skip(rng.start + 1) { - if si > max { - max = si; - }; - } - max -} - -/// Minimum pair within a range in a slice -fn min2u64(s: &[u64], rng: Range) -> (u64, u64) { - let (mut m1, mut m2) = if s[rng.start + 1] < s[rng.start] { - (s[rng.start + 1], s[rng.start]) +/// Refs to the smallest two values within a range in a slice +/// or to the largest two, when invoked with swapped arguments of comparator c: `|a,b| c(b,a)`. +/// The first returned item always refers to the extremum. +pub fn best_two<'a, T>(s: &'a[T], rng: Range, c: &mut impl FnMut(&T, &T) -> Ordering) -> (&'a T, &'a T) { + let (mut m1, mut m2) = if c(&s[rng.start+1], &s[rng.start]) == Ordering::Less { + (&s[rng.start+1], &s[rng.start]) } else { - (s[rng.start], s[rng.start + 1]) + (&s[rng.start], &s[rng.start+1]) }; - for &si in s.iter().take(rng.end).skip(rng.start + 2) { - if si < m1 { - m2 = m1; - m1 = si; - } else if si < m2 { - m2 = si; + for si in s.iter().take(rng.end).skip(rng.start + 2) { + if c(si,m2) == Ordering::Less { + if c(si,m1) == Ordering::Less { + m2 = m1; + m1 = si; + } else { + m2 = si; + }; }; - } + }; (m1, m2) } -/// Minimum pair within a range in a slice -fn max2u64(s: &[u64], rng: Range) -> (u64, u64) { - let (mut m1, mut m2) = if s[rng.start + 1] > s[rng.start] { - (s[rng.start + 1], s[rng.start]) - } else { - (s[rng.start], s[rng.start + 1]) - }; - for &si in s.iter().take(rng.end).skip(rng.start + 2) { - if si > m1 { - m2 = m1; - m1 = si; - } else if si > m2 { - m2 = si; +/// Ref to the minimum item within a range in a slice of refs +/// or to the maximum, when invoked with swapped arguments of comparator c: `|a,b| c(b,a)` +pub fn extremum_refs<'a, T>(s: &[&'a T], rng: Range, c: &mut impl FnMut(&T, &T) -> Ordering) -> &'a T { + let mut m = s[rng.start]; + for si in s.iter().take(rng.end).skip(rng.start + 1) { + if c(si, m) == Ordering::Less { + m = si; }; } - (m2, m1) + m } - -/// Two minimum values within rng in slice s. -/// Finds maxima, when invoked with arguments of c swapped: `|a,b| c(b,a)`. -/// The first returned item refers to the primary minimum/maximum. -pub fn min2<'a, T>( +/// Refs to two smallest items within a range in a slice of refs +/// or to the largest two, when invoked with swapped arguments of comparator c: `|a,b| c(b,a)`. +/// The first returned item is always the extremum. +pub fn best_two_refs<'a, T>( s: &[&'a T], rng: Range, - c: &mut impl FnMut(&T, &T) -> Ordering, + c: &mut impl FnMut(&T, &T) -> Ordering ) -> (&'a T, &'a T) { - let (mut min1, mut min2) = if c(s[rng.start], s[rng.start+1]) == Ordering::Less { - (s[rng.start], s[rng.start+1]) - } else { + let (mut m1, mut m2) = if c(s[rng.start+1], s[rng.start]) == Ordering::Less { (s[rng.start+1], s[rng.start]) + } else { + (s[rng.start], s[rng.start+1]) }; for si in s.iter().take(rng.end).skip(rng.start + 2) { - if c(si,min1) == Ordering::Less { - min2 = min1; - min1 = si; - } else if c(si,min2) == Ordering::Less { - min2 = si; - } - } - (min1, min2) + if c(si,m2) == Ordering::Less { + if c(si,m1) == Ordering::Less { + m2 = m1; + m1 = si; + } else { + m2 = si; + }; + }; + }; + (m1, m2) } /// measure errors from centre (for testing) @@ -232,7 +215,7 @@ pub fn qbalance(s: &[T], centre: &f64, q: impl Fn(&T) -> f64) -> i64 { pub fn oddmedianu8(s: &[u8]) -> u8 { let need = s.len() / 2; // median target position let mut histogram = [0_usize; 256]; - let mut cummulator = 0_usize; + let mut cummulator = 0_usize; for &u in s.iter() { histogram[u as usize] += 1; } @@ -243,7 +226,7 @@ pub fn oddmedianu8(s: &[u8]) -> u8 { }; cummulator += hist; if need < cummulator { - return i; + return i; }; } 255 @@ -255,7 +238,7 @@ pub fn evenmedianu8(s: &[u8]) -> (u8,u8) { let mut histogram = [0_usize; 256]; let mut cummulator = 0_usize; let mut firstres = true; - let mut res1 = 255_u8; + let mut res1 = 255_u8; for &u in s.iter() { histogram[u as usize] += 1; } @@ -267,69 +250,27 @@ pub fn evenmedianu8(s: &[u8]) -> (u8,u8) { cummulator += hist; if firstres { if cummulator > need { - return (i,i); + return (i, i); }; // cummulator exceeds need, found both items if cummulator == need { // found first item (last in this bucket) res1 = i; - firstres = false; + firstres = false; }; // while cummulator < need, loop also continues } else { // the second item is in the first following non-zero bucket - return (res1,i); + return (res1,i); }; // found the second } - if firstres { (255,255) } else { (res1,255) } -} - -/// Odd median of specific bytes in `&u[64]` -pub fn oddmed(s: &[u64], rng:&Range, byteno:usize) -> u64 { - let need = s.len() / 2; // median target position - let mut histogram = [0_usize; 256]; - let mut cummulator = 0_usize; - for &u in s.iter().skip(rng.start).take(rng.len()) { - histogram[((u >> (8*byteno)) & 0xFF) as usize] += 1; - }; - for (i,h) in histogram.iter().enumerate().take(255) { - cummulator += h; - if cummulator > need { - return i as u64; - }; + if firstres { + (255, 255) + } else { + (res1, 255) } - 255 } -/// Odd median of specific bytes in `&u[64]` -pub fn evenmed(s: &[u64], rng:&Range, byteno:usize) -> (u64,u64) { - let need = s.len() / 2; // median target position - let mut histogram = [0_usize; 256]; - let mut cummulator = 0_usize; - let mut firstres = true; - let mut res1 = 255_u64; - for &u in s.iter().skip(rng.start).take(rng.len()) { - histogram[((u >> (8*byteno)) & 0xFF) as usize] += 1; - }; - for (i,&h) in histogram.iter().enumerate().take(255) { - if h == 0_usize { continue }; - cummulator += h; - if firstres { - if cummulator > need { - return (i as u64, i as u64); - }; // cummulator exceeds need, found both items - if need == cummulator { - // found first item (last in this bucket) - res1 = i as u64; - firstres = false; - }; - } else { - // the second item is in the first following non-zero bucket - return (res1, i as u64); - }; // found the second - }; - if firstres { (255,255) } else { (res1,255) } -} - + /// Median of odd sized u64 data -pub fn oddmedianu64(s: &mut [u64]) -> u64 { +pub fn oddmedianu64(s: &mut [u64]) -> &u64 { let mut rng = 0..s.len(); let need = s.len() / 2; // median target position in fully partitioned let mut bitval = FIRST_BIT; // set the most significant bit @@ -338,9 +279,9 @@ pub fn oddmedianu64(s: &mut [u64]) -> u64 { if bitval == 1 { // termination of bit iterations: same values left if need < gtsub { - return s[gtsub - 1]; + return &s[gtsub - 1]; }; - return s[gtsub]; + return &s[gtsub]; }; // well inside lt partition, iterate on it if need + 2 < gtsub { @@ -354,25 +295,25 @@ pub fn oddmedianu64(s: &mut [u64]) -> u64 { bitval >>= 1; // next bit continue; }; - // penultimate place in lt partition, find second maximum + // penultimate place in lt partition, find the second maximum if need + 2 == gtsub { - return max2u64(s, rng.start..gtsub).0; + return best_two(s, rng.start..gtsub,&mut |a,b| b.cmp(a)).1; }; // last place in the lt partition, find its maximum if need + 1 == gtsub { - return maxu64(s, rng.start..gtsub); + return extremum(s, rng.start..gtsub,&mut |a,b| b.cmp(a)); }; // first place in gt partition, find its minimum if need == gtsub { - return minu64(s, gtsub..rng.end); + return extremum(s, gtsub..rng.end, &mut |a,b| a.cmp(b)); }; - // second place in gt partition, find second minimum - return min2u64(s, gtsub..rng.end).1; + // second place in gt partition, find its second minimum + return best_two(s, gtsub..rng.end, &mut |a,b| a.cmp(b)).1; } } /// Median of even sized u64 data -pub fn evenmedianu64(s: &mut [u64]) -> (u64, u64) { +pub fn evenmedianu64(s: &mut [u64]) -> (&u64, &u64) { let mut rng = 0..s.len(); let need = s.len() / 2 - 1; // first median target position let mut bitval = FIRST_BIT; // set the most significant bit @@ -381,12 +322,12 @@ pub fn evenmedianu64(s: &mut [u64]) -> (u64, u64) { if bitval == 1 { // termination of bit iterations: same values left if need < gtsub - 1 { - return (s[gtsub - 2], s[gtsub - 1]); + return (&s[gtsub - 2], &s[gtsub - 1]); }; if need == gtsub - 1 { - return (s[gtsub - 1], s[gtsub]); + return (&s[gtsub - 1], &s[gtsub]); }; - return (s[gtsub], s[gtsub + 1]); + return (&s[gtsub], &s[gtsub + 1]); }; // well inside lt partition, iterate on it if need + 2 < gtsub { @@ -402,73 +343,114 @@ pub fn evenmedianu64(s: &mut [u64]) -> (u64, u64) { }; // penultimate place in lt partition, solution is the maxima pair: if need + 2 == gtsub { - return max2u64(s, rng.start..gtsub); + let (m1,m2) = best_two(s, rng.start..gtsub,&mut |a,b| b.cmp(a)); + return (m2,m1) }; - // last place in the lt partition: + // last place in the lt partition, return max of lt and min of gt partitions if need + 1 == gtsub { - return (maxu64(s, rng.start..gtsub), minu64(s, gtsub..rng.end)); + return (extremum(s, rng.start..gtsub,&mut |a,b| b.cmp(a)), + extremum(s, gtsub..rng.end,&mut |a,b| a.cmp(b))); }; - // first place in gt partition, the solution is its minima pair + // first place in gt partition, the solution is its minima pair: if need == gtsub { - return min2u64(s, gtsub..rng.end); + return best_two(s, gtsub..rng.end,&mut |a,b| a.cmp(b)); }; } } -/* -/// Median of odd sized u64 data -pub fn oddmedu64(s: &mut [u64]) -> u64 { - let mut rng = 0..s.len(); - let need = s.len()/2; // median target position in fully partitioned - let bytes = tobebytes(s); - let mut byteno = 7; // 7..0 from the most significant (left hand) - loop { - let medianbyte = oddmed(s, &rng, byteno); - if byteno == 7 { // termination of bytes iterations - return (s[rng.start] >> 8) & medianbyte; - }; - rng = rng.start .. collect(s,&rng,medianbyte,byteno); - - if rng.len() == 3 { - return s[pivotsub]; +/// Collects all items that have given byte equal to val +pub fn select(bytes: &[[u8; 8]], byteno: usize, val: u8) -> Vec<[u8; 8]> { + let mut res = Vec::new(); + for &item in bytes { + if item[byteno] == val { + res.push(item); }; + } + res +} - // well inside lt partition, iterate on it - if need + 2 < gtsub { - rng.end = gtsub; - bitval >>= 1; // next bit - continue; - }; - // well inside gt partition, iterate on it - if need > gtsub + 1 { - bitval >>= 1; // next bit - continue; - }; - let start = rng.start; - // penultimate place in lt partition, find second maximum - if need + 2 == gtsub { - return max2u64(s, rng.start..gtsub).0; - }; - // last place in the lt partition, find its maximum - if need + 1 == gtsub { - return maxu64(s, rng.start..gtsub); - }; - // first place in gt partition, find its minimum - if need == gtsub { - return minu64(s, gtsub..rng.end); +/// Median of odd sized u64 data (recursive) +/// byteno is 0..7 from the most significant (left hand) +pub(super) fn oddmedu64(bytes: &[[u8; 8]], byteno: usize, need: usize) -> u64 { + let n = bytes.len(); + assert_ne!(n, 0, "oddmedu64 failed to find median"); + if n == 1 { + return u64::from_be_bytes(bytes[0]); + }; + if n < 8 { // small number of items remaining, just use sort + let idx = bytes.isort_refs(0..bytes.len(), |a:&[u8; 8],b| a.cmp(b)); + return u64::from_be_bytes(*idx[need]); + }; + let mut histogram = [0_usize; 256]; + let mut cummulator = 0_usize; + for &u in bytes { + histogram[u[byteno] as usize] += 1; + }; + let mut medianbyte = 255_u8; + for (i, &h) in histogram.iter().enumerate() { + if h == 0_usize { continue }; + cummulator += h; + if cummulator > need { + medianbyte = i as u8; + cummulator -= h; // number of items < medianbyte + break; }; - // second place in gt partition, find second minimum - return min2u64(s, gtsub..rng.end).1; } + if byteno == 7 { + // termination of recursion + // bytes are all the same now, except the last one + let mut res = bytes[0]; // so can return any item + res[7] = medianbyte; // assign the last median byte + return u64::from_be_bytes(res); + }; + oddmedu64( + &select(bytes, byteno, medianbyte), + byteno + 1, + need - cummulator, + ) // tail recursion +} + +/* +/// Median of even sized u64 data (recursive) +pub(super) fn evenmedu64(bytes: &[[u8;8]], byteno:usize, need:usize) -> (u64, u64) { + let n = bytes.len(); + assert_ne!(n,0,"evenmedu64 failed to find median"); + if n == 2 { + let (m1,m2) = (u64::from_be_bytes(bytes[0]),u64::from_be_bytes(bytes[1])); + if m2 < m1 { return (m2,m1); } else { return (m1,m2); }; + }; + if n < 8 { let sorted_bytes = bytes.isor + let mut histogram = [0_usize; 256]; + let mut cummulator = 0_usize; + for &u in bytes { + histogram[u[byteno] as usize] += 1; + }; + let mut firstres = true; + let mut medianbyte = 255_u8; + for (i,h) in histogram.iter().enumerate() { + if h == 0_usize { continue }; + cummulator += h; + if firstres { + if cummulator > need { + medianbyte = i as u8; + cummulator -= h; // number of items < medianbyte + break; + return (i as u64, i as u64); + }; // cummulator exceeds need, found both items + if need == cummulator { + // found first item (last in this bucket) + res1 = i as u64; + firstres = false; + }; + } else { + // the second item is in the first following non-zero bucket + return (res1, i as u64); + }; // found the second + }; + if firstres { (255,255) } else { (res1,255) } } -/// Median of even sized u64 data -pub fn evenmedu64(s: &mut [u64]) -> (u64, u64) { - let mut rng = 0..s.len(); - let need = s.len() / 2 - 1; // first median target position - let mut bitval = FIRST_BIT; // set the most significant bit - loop { let gtsub = part_binary(s, &rng, bitval); if bitval == 1 { // termination of bit iterations: same values left @@ -507,28 +489,29 @@ pub fn evenmedu64(s: &mut [u64]) -> (u64, u64) { } } */ + /// Median of odd sized generic data with Odering comparisons by custom closure pub(super) fn oddmedian_by<'a, T>( s: &mut [&'a T], c: &mut impl FnMut(&T, &T) -> Ordering, ) -> &'a T { let mut rng = 0..s.len(); - let need = s.len() / 2; // first median target position + let need = s.len() / 2; // first median target position loop { - let mut pivotsub = midof3(s, rng.start, rng.start + need, rng.end - 1, c); + let mut pivotsub = midof3_refs(s, rng.start, rng.start + need, rng.end - 1, c); if rng.len() == 3 { return s[pivotsub]; }; if rng.len() > 100 { - let pivotsub2 = midof3(s, rng.start + 1, rng.start + need + 1, rng.end - 2, c); - let pivotsub3 = midof3(s, rng.start + 2, rng.start + need + 2, rng.end - 3, c); - pivotsub = midof3(s, pivotsub, pivotsub2, pivotsub3, c); + let pivotsub2 = midof3_refs(s, rng.start + 1, rng.start + need + 1, rng.end - 2, c); + let pivotsub3 = midof3_refs(s, rng.start + 2, rng.start + need + 2, rng.end - 3, c); + pivotsub = midof3_refs(s, pivotsub, pivotsub2, pivotsub3, c); } if pivotsub != rng.start { s.swap(rng.start, pivotsub); }; let pivotref = s[rng.start]; - let (eqsub, gtsub) = <&mut [T]>::part(s, &rng, c); + let (eqsub, gtsub) = <&mut [T]>::part(s, &rng, c); // well inside lt partition, iterate on it if need + 2 < eqsub { rng.end = eqsub; @@ -537,12 +520,12 @@ pub(super) fn oddmedian_by<'a, T>( // penultimate place in lt partition, solution: if need + 2 == eqsub { // swapped comparator arguments to get penultimate maximum - return min2(s, rng.start..eqsub, &mut |a, b| c(b, a)).1; + return best_two_refs(s, rng.start..eqsub, &mut |a, b| c(b, a)).1; }; // last place in the lt partition, solution is its maximum: if need + 1 == eqsub { // swapped comparator arguments to get maximum - return min(s, rng.start..eqsub, &mut |a, b| c(b, a)); + return extremum_refs(s, rng.start..eqsub, &mut |a, b| c(b, a)); }; if need < gtsub { // within equals partition, return the pivot @@ -550,11 +533,11 @@ pub(super) fn oddmedian_by<'a, T>( }; // first place in gt partition, the solution is its minimum if need == gtsub { - return min(s, gtsub..rng.end, c); + return extremum_refs(s, gtsub..rng.end, c); }; // second place in gt partition, the solution is the next minimum if need == gtsub + 1 { - return min2(s, gtsub..rng.end, c).1; + return best_two_refs(s, gtsub..rng.end, c).1; }; // well inside gt partition, iterate on it rng.start = gtsub; @@ -569,11 +552,11 @@ pub(super) fn evenmedian_by<'a, T>( let mut rng = 0..s.len(); let need = s.len() / 2 - 1; // median target position in fully partitioned set loop { - let mut pivotsub = midof3(s, rng.start, rng.start + need, rng.end - 1, c); + let mut pivotsub = midof3_refs(s, rng.start, rng.start + need, rng.end - 1, c); if rng.len() > 100 { - let pivotsub2 = midof3(s, rng.start + 1, rng.start + need + 1, rng.end - 2, c); - let pivotsub3 = midof3(s, rng.start + 2, rng.start + need + 2, rng.end - 3, c); - pivotsub = midof3(s, pivotsub, pivotsub2, pivotsub3, c); + let pivotsub2 = midof3_refs(s, rng.start + 1, rng.start + need + 1, rng.end - 2, c); + let pivotsub3 = midof3_refs(s, rng.start + 2, rng.start + need + 2, rng.end - 3, c); + pivotsub = midof3_refs(s, pivotsub, pivotsub2, pivotsub3, c); }; if pivotsub != rng.start { s.swap(rng.start, pivotsub); @@ -586,28 +569,26 @@ pub(super) fn evenmedian_by<'a, T>( rng.end = eqsub; continue; }; - // penultimate place in lt partition, solution: + // penultimate place in lt partition, return its two maxima: if need + 2 == eqsub { - // swapping comparison arguments to get two maxima - let (m1, m2) = min2(s, rng.start..eqsub, &mut |a, b| c(b, a)); - return (m2, m1); + let (m1, m2) = best_two_refs(s, rng.start..eqsub, &mut |a, b| c(b, a)); + return (m2,m1); // in ascending order }; - // last place in the lt partition, solution: + // last place in the lt partition, return its maximum and pivot: if need + 1 == eqsub { - // swapped comparison arguments to get maximum - return (min(s, rng.start..eqsub, &mut |a, b| c(b, a)), pivotref); + return (extremum_refs(s, rng.start..eqsub, &mut |a, b| c(b, a)), pivotref); }; // fully within equals partition if need + 1 < gtsub { return (pivotref, pivotref); }; - // last place in equals partition + // last place in equals partition, return pivot and minimum of gt partition: if need + 1 == gtsub { - return (pivotref, min(s, gtsub..rng.end, c)); + return (pivotref, extremum_refs(s, gtsub..rng.end, c)); }; - // first place in gt partition, the solution are its two minima + // first place in gt partition, return its two minima if need == gtsub { - return min2(s, gtsub..rng.end, c); + return best_two_refs(s, gtsub..rng.end, c); }; // inside gt partition, iterate on it, narrowing the range rng.start = gtsub; diff --git a/src/implementations.rs b/src/implementations.rs index 4b89f28..9d29ee4 100644 --- a/src/implementations.rs +++ b/src/implementations.rs @@ -36,29 +36,13 @@ where } } -impl std::fmt::Display for ConstMedians -where - T: Display, -{ - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - ConstMedians::Odd(m) => { - write!(f, "{YL}odd median: {GR}{}{UN}", *m) - } - ConstMedians::Even((m1, m2)) => { - write!(f, "{YL}even medians: {GR}{} {}{UN}", *m1, *m2) - } - } - } -} - -impl From> for f64 -where T: std::convert::Into +impl From> for f64 +where T: Copy+std::convert::Into { - fn from(item:ConstMedians) -> f64 { + fn from(item:Medians) -> f64 { match item { - ConstMedians::Odd(m) => m.into() as f64, - ConstMedians::Even((m1, m2)) => (m1.into() as f64 + m2.into() as f64)/ 2.0 + Medians::Odd(&m) => m.into() as f64, + Medians::Even((&m1, &m2)) => (m1.into() as f64 + m2.into() as f64)/ 2.0 } } } @@ -207,8 +191,9 @@ impl<'a, T> Median<'a, T> for &'a [T] { } } - /// Median of `&[T]`, quantifiable to u64's by `q`. Returns a single f64. - /// When T is a primitive type directly convertible to u64, use `as u64` as `q`. + /// Median of `&[T]`, quantifiable to u64's by `q`. + /// Returns a single f64, possibly losing some precision for even medians. + /// When T is a primitive type directly convertible to u64, use `as u64` for `q`. /// When u64:From is implemented, use `|x| x.into()` as `q`. /// In all other cases, use custom quantification closure `q`. /// When T is not quantifiable at all, use the ultimate `median_by` method. @@ -224,7 +209,10 @@ impl<'a, T> Median<'a, T> for &'a [T] { _ => (), }; let mut s:Vec = self.iter().map(q).collect(); - Ok(medianu64(&mut s)?.into()) + match medianu64(&mut s)? { + Medians::Odd(r) => Ok(*r as f64), + Medians::Even((r1, r2)) => Ok((*r1 as f64 + *r2 as f64) / 2_f64) + } } /// Median(s) of unquantifiable type by general comparison closure diff --git a/src/lib.rs b/src/lib.rs index ae5ef9d..edc93ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,8 @@ pub mod implementations; use core::cmp::Ordering; use core::fmt::Debug; -use crate::algos::{oddmedianu8,evenmedianu8,oddmedianu64,evenmedianu64}; // ,oddmedu64,evenmedu64 + +use crate::algos::{evenmedianu64, evenmedianu8, midof3, oddmedianu64, oddmedianu8, oddmedu64}; /// Shorthand type for medians errors with message payload specialized to String pub type Me = MedError; @@ -30,12 +31,12 @@ pub enum MedError { /// Convenience function for building MedError /// from error kind name and payload message, which can be either &str or String -pub fn merror(kind: &str, msg: impl Into) -> Result> { +pub fn merror(kind: &str, msg: impl Into) -> Result> { match kind { "size" => Err(MedError::Size(msg.into())), - "nan" => Err(MedError::Nan(msg.into())), + "nan" => Err(MedError::Nan(msg.into())), "other" => Err(MedError::Other(msg.into())), - _ => Err(MedError::Other("Wrong error kind given to merror".into())) + _ => Err(MedError::Other("Wrong error kind given to merror".into())), } } @@ -47,59 +48,66 @@ pub enum Medians<'a, T> { Even((&'a T, &'a T)), } -/// Enum for results of odd/even medians with simple numeric endtypes. -/// Convert to a single f64 by applying `.into()` -pub enum ConstMedians { - /// Odd sized data results in a single median - Odd(T), - /// Even sized data results in a pair of (centered) medians - Even((T,T)) -} - /// Fast medians of u8 end type by fast radix search -pub fn medianu8(s: &[u8]) -> Result, Me> { +pub fn medianu8(s: &[u8]) -> Result<(u8, u8), Me> { let n = s.len(); - match n { - 0 => return merror("size", "median: zero length data")?, - 1 => return Ok(ConstMedians::Odd(s[0])), - 2 => return Ok(ConstMedians::Even((s[0],s[1]))), - _ => (), + if n == 0 { + merror("size", "median: zero length data")?; }; if (n & 1) == 1 { - Ok(ConstMedians::Odd(oddmedianu8(s))) + match n { + 1 => Ok((s[0], s[0])), + 3 => { + let indx = midof3(s, 0, 1, 2, &mut |a, b| a.cmp(b)); + Ok((s[indx], s[indx])) + } + _ => { + let m = oddmedianu8(s); + Ok((m, m)) + } + } + } else if n == 2 { + Ok((s[0], s[1])) } else { - Ok(ConstMedians::Even(evenmedianu8(s))) + let (m1, m2) = evenmedianu8(s); + Ok((m1, m2)) } } /// Fast medians of u64 end type by binary partitioning. /// Changes the order of the input data -pub fn medianu64(s: &mut [u64]) -> Result, Me> { +pub fn medianu64(s: &mut [u64]) -> Result, Me> { let n = s.len(); match n { 0 => return merror("size", "medu: zero length data"), - 1 => return Ok( ConstMedians::Odd(s[0]) ), - 2 => return Ok( ConstMedians::Even((s[0], s[1])) ), + 1 => return Ok(Medians::Odd(&s[0])), + 2 => return Ok(Medians::Even((&s[0], &s[1]))), _ => (), }; - if (n & 1) == 1 { Ok( ConstMedians::Odd(oddmedianu64(s)) ) } - else { Ok(ConstMedians::Even(evenmedianu64(s))) } + if (n & 1) == 1 { + Ok(Medians::Odd(oddmedianu64(s))) + } else { + Ok(Medians::Even(evenmedianu64(s))) + } } -/* -/// Fast medians of u64 end type by iterated radix search -pub fn medu64(s: &mut [u64]) -> Result, Me> { +/// Fast medians of u64 end type by radix search +pub fn medu64(s: &mut [u64]) -> Result<(u64, u64), Me> { if (s.len() & 1) == 1 { - Ok(ConstMedians::Odd(oddmedu64(s))) + let byteslice = s.iter().map(|x| x.to_be_bytes()).collect::>(); + let res = oddmedu64(&byteslice, 0, s.len() / 2); + Ok((res, res)) } else { - Ok(ConstMedians::Even(evenmedu64(s))) + //let (r1,r2) = evenmedu64(&byteslice,0,s.len()/2-1); Ok((r1,r2)) + let (&m1, &m2) = evenmedianu64(s); + Ok((m1, m2)) } } -*/ + /// Fast 1D medians of floating point data, plus related methods pub trait Medianf64 { /// Median of f64s, NaNs removed - fn medf_checked(self) -> Result; + fn medf_checked(self) -> Result; /// Median of f64s, including NaNs fn medf_unchecked(self) -> f64; /// Iterative weighted median @@ -122,10 +130,7 @@ pub trait Median<'a, T> { ) -> Result; /// Median of types quantifiable to u64 by `q`, at the end converted to a single f64. /// For data that is already `u64`, use function `medianu64` - fn uqmedian( - self, - q: impl Fn(&T) -> u64, - ) -> Result; + fn uqmedian(self, q: impl Fn(&T) -> u64) -> Result; /// Median by comparison `c`, returns odd/even result fn median_by(self, c: &mut impl FnMut(&T, &T) -> Ordering) -> Result, Me>; /// Zero mean/median data, produced by subtracting the centre diff --git a/src/oldalgos.rs b/src/oldalgos.rs index 3314ffc..3c607c4 100644 --- a/src/oldalgos.rs +++ b/src/oldalgos.rs @@ -4,6 +4,27 @@ use core::ops::{Deref, Neg}; const FSIGN: u64 = 0x8000_0000_0000_0000; +/// middle valued ref of three, at most three comparisons +pub fn midof3refs<'a, T>( + item1: &'a T, + item2: &'a T, + item3: &'a T, + c: &mut impl FnMut(&T, &T) -> Ordering, +) -> &'a T { + let (min, max) = if c(item2, item1) == Less { + (item2, item1) + } else { + (item1, item2) + }; + if c(min, item3) != Less { + return min; + }; + if c(item3, max) != Less { + return max; + }; + item3 +} + /// Index of the middling value of four refs. Makes only three comparisons fn middling( idx0: usize, @@ -62,26 +83,6 @@ pub fn quant_vec(v: &[T], quantify: impl Fn(&T) -> U) -> Vec { v.iter().map(quantify).collect::>() } -/// middle valued ref out of three, at most three comparisons -pub fn midof3<'a, T>( - item1: &'a T, - item2: &'a T, - item3: &'a T, - c: &mut impl FnMut(&T, &T) -> Ordering, -) -> &'a T { - let (min, max) = if c(item2, item1) == Less { - (item2, item1) - } else { - (item1, item2) - }; - if c(min, item3) != Less { - return min; - }; - if c(item3, max) != Less { - return max; - }; - item3 -} /// pivot estimate as recursive mid of mids of three pub fn midofmids<'a, T>(s: &[&T], rng: Range, c: &mut impl FnMut(&T, &T) -> Ordering) -> &'a T diff --git a/tests/tests.rs b/tests/tests.rs index 77701c2..ea5eb8c 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -12,8 +12,7 @@ use times::{benchf64, benchu64, benchu8, mutbenchf64, mutbenchu64}; #[test] fn partbin() -> Result<(), Me> { let mut data = [257_u64,9,8,7,6,5,4,3,2,1]; - println!("Data: {}",data.gr()); - println!("Bytes: {}",tobebytes(&data).gr()); + println!("Data: {}",data.gr()); let n = data.len(); let gtsub = part_binary(&mut data, &(0..n), 3); println!("Partitioned by bit 3: {},{}",data[..gtsub].gr(),data[gtsub..].gr()); @@ -30,9 +29,9 @@ fn parting() -> Result<(), Me> { ]; println!("Data; {}",data.gr()); let len = data.len(); - let mut refdata = data.ref_vec(0..len); + let mut refdata = data.ref_vec(0..data.len()); let (eqsub, gtsub) = <&mut [f64]>::part(&mut refdata, &(0..len), &mut ::total_cmp); - println!("Pivot {} : items found equal to pivot {}", data[0].yl(), (gtsub - eqsub).yl()); + println!("Pivot {}. {} items found equal to the pivot", data[0].yl(), (gtsub - eqsub).yl()); println!("Partitions:\n{}, {}, {}\n", refdata[0..eqsub].gr(), //to_plainstr(), refdata[eqsub..gtsub].gr(), @@ -49,10 +48,10 @@ fn parting() -> Result<(), Me> { #[test] fn text() { let song = "There was a *jolly* miller once who lived on the river Dee. \ - From morn till night all day he sang for a jolly old fellow was he; \ + From morn till night all day he sang, for a jolly old fellow was he; \ and this forever the burden of his song seemed to be: \ I care for nobody, no not I, and nobody cares for me. \ - Tee hee heee, quoth he."; + Tee hee heee, piddle piddledy dee, quoth he."; let v = song.split(' ').collect::>(); println!("{}", v.gr()); // Display // v.mutisort(0..v.len(),|&a,&b| a.len().cmp(&b.len())); @@ -91,22 +90,19 @@ fn medf64() -> Result<(), Me> { let mut vr = v.ref_vec(0..len); println!( "max: {}", - min(&vr, 0..len, &mut |a: &f64, b: &f64| b.total_cmp(a)).gr() + extremum_refs(&vr, 0..len, &mut |a: &f64, b: &f64| b.total_cmp(a)).gr() ); println!( "max2: {}", - min2(&vr, 0..len, &mut |a: &f64, b: &f64| b.total_cmp(a)).gr() + best_two_refs(&vr, 0..len, &mut |a: &f64, b: &f64| b.total_cmp(a)).gr() ); let (eqsub, gtsub) = <&mut [f64]>::part(&mut vr, &(0..v.len()), &mut ::total_cmp); - println!( - "Result: {}\nCommas separate the subranges:\n\ - {GR}[{}, {}, {}]{UN}\n{} items equal to the pivot {}", - (eqsub, gtsub).yl(), + println!("Partitioning (pivot {}, commas separate the subranges): {}", v[0].yl(),(eqsub,gtsub).gr()); + println!("{GR}[{}, {}, {}]{UN}\nNumber of items equal to the pivot {}", vr[0..eqsub].to_plainstr(), vr[eqsub..gtsub].to_plainstr(), vr[gtsub..len].to_plainstr(), - (gtsub - eqsub).yl(), - v[0].yl() + (gtsub - eqsub).yl() ); let median = v.medf_checked()?; let mad = v.madf(median); @@ -138,8 +134,8 @@ fn errors() -> Result<(), Me> { let Ok(mut v) = ranv_u64(d) else { return merror("other", "Random vec genertion failed"); }; - let med:f64 = medianu64(&mut v)?.into(); - error += qbalance(&v, &med, |&f| f as f64); + let (m1,m2) = medu64(&mut v)?; + error += qbalance(&v, &((m1 as f64+m2 as f64)/2.0), |&f| f as f64); } println!("Even length {GR}{d}{UN}, repeats: {GR}{n}{UN}, errors: {GR}{error}{UN}"); error = 0_i64; @@ -150,8 +146,8 @@ fn errors() -> Result<(), Me> { // v // .as_slice() // .medf_unchecked(); - let med:f64 = medianu64(&mut v)?.into(); - error += qbalance(&v, &med, |&f| f as f64); + let (m1,m2) = medu64(&mut v)?; + error += qbalance(&v, &((m1 as f64+m2 as f64)/2.0), |&f| f as f64); } println!( "Odd length {GR}{}{UN}, repeats: {GR}{n}{UN}, errors: {GR}{error}{UN}", @@ -161,26 +157,30 @@ fn errors() -> Result<(), Me> { Ok(()) } -const NAMES: [&str; 3] = ["median_by","medf_checked","uqmedian"]; +const NAMES: [&str; 4] = ["median_by","medf_checked","uqmedian","medu64"]; -const CLOSURESU64: [fn(&[u64]); 3] = [ - |v: &[_]| { +const CLOSURESU64: [fn(&mut [u64]); 4] = [ + |v: &mut [_]| { v.median_by(&mut ::cmp) .expect("median_by closure failed"); }, - |v: &[_]| { + |v: &mut [_]| { let vf:Vec = v.iter().map(|&x| x as f64).collect(); vf.medf_checked() .expect("medf_checked found NaN"); }, - |v: &[_]| { + |v: &mut [_]| { // already in u64, so using identity quantifier v.uqmedian(|&x| x) .expect("uqmedian error"); }, - + |v: &mut [_]| { + medu64(v) + .expect("uqmedian error"); + } + /* |v: &[_]| { let mut sorted: Vec<&f64> = v.iter().collect(); @@ -206,5 +206,5 @@ const CLOSURESU64: [fn(&[u64]); 3] = [ fn comparison() { // set_seeds(0); // intialise random numbers generator // Rnum encapsulates the type of random data to be generated - benchu64(100000..100010, 1, 10, &NAMES, &CLOSURESU64); + mutbenchu64(100000..100010, 1, 10, &NAMES, &CLOSURESU64); }