diff --git a/README.md b/README.md index d79158f..729cd57 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,12 @@ The main features of our median algorithm are: ```rust /// Fast 1D medians of floating point data, plus related methods pub trait Medianf64 { - /// Median of f64s, checked for NaNs + /// Median of f64s, NaNs removed fn medf_checked(self) -> Result; - /// Median of f64s, not checked for NaNs + /// Median of f64s, including NaNs fn medf_unchecked(self) -> f64; + /// Iterative weighted median + fn medf_weighted(self, ws: Self, eps: f64) -> Result; /// Zero mean/median data produced by subtracting the centre fn medf_zeroed(self, centre: f64) -> Vec; /// Median correlation = cosine of an angle between two zero median vecs @@ -109,6 +111,8 @@ pub trait Median<'a, T> { ## Release Notes +**Version 3.0.7** - Added `medf_weighted`, applying `&[f64]` weights. + **Version 3.0.6** - Moved `part`, `ref_vec` and `deref_vec` into crate `Indxvec`, to allow their wider use. **Version 3.0.5** - Obsolete code pruning. diff --git a/src/algos.rs b/src/algos.rs index 2a52afb..ce6509d 100644 --- a/src/algos.rs +++ b/src/algos.rs @@ -2,6 +2,40 @@ use core::cmp::{Ordering, Ordering::*}; use std::ops::Range; use indxvec::Mutops; +/// Scan a slice of f64s for NANs +pub fn nans(v: &[f64]) -> bool { + for &f in v { + if f.is_nan() { + return true; + }; + } + false +} + +/// kth item from rng (ascending or descending, depending on `c`) +pub fn best1_k(s: &[T], k: usize, rng: Range, c: F) -> &T + where + F: Fn(&T, &T) -> Ordering, + { + let n = rng.len(); + assert!((k > 0) & (k <= n)); + let mut k_sorted: Vec<&T> = s.iter().skip(rng.start).take(k).collect(); + k_sorted.sort_unstable_by(|&a, &b| c(a, b)); + let mut k_max = k_sorted[k - 1]; + for si in s.iter() { + if c(si, k_max) == Less { + let insert_pos = match k_sorted.binary_search_by(|j| c(j, si)) { + Ok(ins) => ins + 1, + Err(ins) => ins, + }; + k_sorted.insert(insert_pos, si); + k_sorted.pop(); + k_max = k_sorted[k - 1]; + }; + } + k_max + } + /// Index of the middling value of four refs. Makes only three comparisons fn middling( idx0: usize, diff --git a/src/lib.rs b/src/lib.rs index 63ff1cc..eb32a7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,6 +65,8 @@ pub trait Medianf64 { fn medf_checked(self) -> Result; /// Median of f64s, including NaNs fn medf_unchecked(self) -> f64; + /// Iterative weighted median + fn medf_weighted(self, ws: Self, eps: f64) -> Result; /// Zero mean/median data produced by subtracting the centre fn medf_zeroed(self, centre: f64) -> Vec; /// Median correlation = cosine of an angle between two zero median vecs @@ -107,11 +109,6 @@ impl Medianf64 for &[f64] { 2 => return Ok((self[0] + self[1]) / 2.0), _ => (), }; - /* - if !no_nans(self) { return merror("Nan", "medf_checked: Nan found in input!");}; - let us = &to_u64s(self); - let mut s = ref_vec(us,0..self.len()); - */ let mut s = self .iter() .map(|x| { @@ -150,6 +147,33 @@ impl Medianf64 for &[f64] { (med1 + med2) / 2.0 } } + /// Iterative weighted median with accuracy eps + fn medf_weighted(self, ws: Self, eps: f64) -> Result { + if self.len() != ws.len() { + return merror("size","medf_weighted - data and weights lengths mismatch"); }; + if nans(self) { + return merror("Nan","medf_weighted - detected Nan in input"); }; + let weights_sum: f64 = ws.iter().sum(); + let mut last_median = 0_f64; + for (g,w) in self.iter().zip(ws) { last_median += w*g; }; + last_median /= weights_sum; // start iterating from the weighted centre + let mut last_recsum = 0f64; + loop { // iteration till accuracy eps is exceeded + let mut median = 0_f64; + let mut recsum = 0_f64; + for (x,w) in self.iter().zip(ws) { + let mag = (x-last_median).abs(); + if mag.is_normal() { // only use this point if its distance from median is > 0.0 + let rec = w/(mag.sqrt()); // weight/distance + median += rec*x; + recsum += rec // add separately the reciprocals for final scaling + } + } + if recsum-last_recsum < eps { return Ok(median/recsum); }; // termination test + last_median = median/recsum; + last_recsum = recsum; + } + } /// Zero mean/median data produced by subtracting the centre, /// typically the mean or the median. fn medf_zeroed(self, centre: f64) -> Vec { diff --git a/src/oldalgos.rs b/src/oldalgos.rs index 25cb086..a346fe8 100644 --- a/src/oldalgos.rs +++ b/src/oldalgos.rs @@ -4,16 +4,6 @@ use core::ops::{Deref, Neg}; const FSIGN: u64 = 0x8000_0000_0000_0000; -/// Scan a slice of f64s for being free of NANs -pub fn no_nans(v: &[f64]) -> bool { - for &f in v { - if f.is_nan() { - return false; - }; - } - true -} - /// Copies a slice of f64s, removing any NANs from it. /// It is advisable to test with `non_nans` first, as there may be none pub fn scrub_nans(v: &[f64]) -> Vec { diff --git a/tests/tests.rs b/tests/tests.rs index da8eadc..7d8ec84 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2,7 +2,7 @@ #![allow(dead_code)] #[cfg(test)] use indxvec::{here, printing::*, Indices, Mutops, Printing, Vecops}; -use medians::algos::{qbalance, min, min2}; +use medians::algos::{qbalance, min, min2,best1_k}; // use medians::algosf64::partord; use medians::{Me, merror, medianu8, Median, Medianf64}; use ran:: *; @@ -73,7 +73,9 @@ fn medf64() -> Result<(), Me> { let v = [ 9., 10., 18., 17., 16., 15., 14., 1., 2., 3., 4., 5., 6., 7., 8., 17., 10., 11., 12., 13., 14., 15., 16., 18., 9. ]; - println!("Data: {}",v.gr()); + let weights = [ 1.,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.,15.,16.,17.,18.,19.,20.,21.,22.,23.,24.,25.]; + println!("Data: {}",v.gr()); + println!("Weights: {}",weights.gr()); let len = v.len(); let mut vr = v.ref_vec(0..len); println!("max: {}",min(&vr,0..len,&mut |a:&f64,b:&f64| b.total_cmp(a)).gr()); @@ -95,7 +97,8 @@ fn medf64() -> Result<(), Me> { ); let median = v.medf_checked()?; let mad = v.madf(median); - println!("\nMedian±mad: {GR}{}±{}{UN}", median, mad); + println!("Median±mad: {GR}{}±{}{UN}", median, mad); + println!("Weighted median: {GR}{}{UN} ", v.medf_weighted(&weights,0.0001)?); Ok(()) } @@ -141,38 +144,45 @@ fn errors() -> Result<(), Me> { Ok(()) } -const NAMES: [&str; 3] = [ - // "medf_unchecked", - "qmedian", - "median_by", - "medianu8" +const NAMES: [&str; 3] = [ + "median_by", + "best_k", + "medf_unchecked", + ]; -const CLOSURESU8: [fn(&[u8]); 3] = [ -// |v: &[_]| { -// v.medf_unchecked(); -// }, -// |v: &[_]| { -// v.medf_checked().expect("median closure failed"); - // }, +const CLOSURESF64: [fn(&[f64]); 3] = [ + |v: &[_]| { + v.median_by(&mut ::total_cmp) + .expect("even median closure failed"); + }, + |v: &[_]| { + let mut sorted:Vec<&f64> = v.iter().collect(); + sorted.sort_unstable_by(|&a,&b| a.total_cmp(b)); + // sorted[sorted.len()/2]; + }, + |v: &[_]| { + v.medf_unchecked(); + }, + /* |v: &[_]| { - v.qmedian_by(&mut ::cmp,|&x| x as f64) + v.qmedian_by(&mut ::total_cmp,|&x| x) .expect("even median closure failed"); - }, - |v: &[_]| { - v.median_by(&mut ::cmp) - .expect("even median closure failed"); }, + */ + + /* |v: &[_]| { medianu8(v) .expect("medianu8 closure failed"); } + */ ]; #[test] fn comparison() { // set_seeds(0); // intialise random numbers generator // Rnum encapsulates the type of random data to be generated - benchu8(3..5000, 100, 10, &NAMES, &CLOSURESU8); + benchf64(3..100, 1, 10, &NAMES, &CLOSURESF64); }