From 3198e04ed9a0dbee265e5edcb735cb483ac710a2 Mon Sep 17 00:00:00 2001 From: sebasv Date: Mon, 12 Nov 2018 13:50:39 +0100 Subject: [PATCH 1/4] L-BFGS skateboard --- src/vector/l_bfgs.rs | 181 +++++++++++++++++++++++++++++++++++++++++++ src/vector/mod.rs | 5 +- 2 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 src/vector/l_bfgs.rs diff --git a/src/vector/l_bfgs.rs b/src/vector/l_bfgs.rs new file mode 100644 index 0000000..022dc04 --- /dev/null +++ b/src/vector/l_bfgs.rs @@ -0,0 +1,181 @@ +/// Limited-memory BFGS Quasi-Newton optimizer. Uses the two-loop recursion to +/// calculate the quasi-inverse-hessian, as formulated in +/// +/// Jorge Nocedal. Updating Quasi-Newton Matrices With Limited Storage. +/// MATHEMATICS OF COMPUTATION, VOLUME 35, NUMBER 151 JULY 1980, PAGES 773-782 +/// +use ndarray::prelude::*; + +#[derive(Builder, Debug)] +pub struct LBFGS { + /// Smaller is more precise. + #[builder(default = "1e-8")] + pub gtol: f64, + + /// Larger is more precise. + #[builder(default = "1500")] + pub max_iter: usize, +} + +impl LBFGS { + pub fn minimize(&self, func: F, grad: G, x0: ArrayView1) -> Array1 + where + F: Fn(ArrayView1) -> f64, + G: Fn(ArrayView1) -> Array1, + { + let mut iter = 0; + let m = x0.len().min(5); + + let mut hist = RobinVec::new(); + + let mut x = x0.to_owned(); + let mut g = grad(x.view()); + + loop { + let dir = self.quasi_update(&g, &hist); + let a = { + let min = ::scalar::GoldenRatioBuilder::default().build().unwrap(); + let f = |a: f64| func((&x + &(a * &dir)).view()); + min.minimize_bracket(&f, -2.0, 0.0) + }; + let x_new = &x + &(a * &dir); + let g_new = grad(x_new.view()); + + let s = &x_new - &x; + let y = &g_new - &g; + let r = 1f64 / s.dot(&y); + + iter += 1; + + if r.is_nan() || iter > self.max_iter || g_new.mapv(f64::abs).scalar_sum() < self.gtol { + break; + } + + hist.push((s, y, r), m); + + x = x_new; + g = g_new; + } + + x + } + + fn quasi_update( + &self, + grad: &Array1, + hist: &RobinVec<(Array1, Array1, f64)>, + ) -> Array1 { + let mut q = grad.to_owned(); + let mut a = Vec::with_capacity(hist.len()); + + for (si, yi, ri) in hist.iter().rev() { + let ai = ri * si.dot(&q); + q.scaled_add(-ai, &yi); + a.push(ai); + } + + // q = { + // // H_0 * q + // let (ref s, ref y, _) = hist[hist.len() - 1]; + // y * (s.dot(&q) / y.dot(y)) + // }; + + for ((si, yi, ri), ai) in hist.iter().zip(a.iter().rev()) { + let bi = ri * yi.dot(&q); + q.scaled_add(ai - bi, &si); + } + q + } +} + +#[derive(Debug)] +struct RobinVec { + i0: usize, + vec: Vec, +} + +use std::iter::Chain; +use std::slice; + +impl RobinVec { + pub fn new() -> RobinVec { + RobinVec { + i0: 0, + vec: Vec::new(), + } + } + + pub fn iter(&self) -> Chain, slice::Iter> { + self.vec[self.i0..].iter().chain(self.vec[..self.i0].iter()) + } + + pub fn len(&self) -> usize { + self.vec.len() + } + + pub fn push(&mut self, el: T, size: usize) { + let n = self.vec.len(); + if size > n { + if self.i0 == 0 { + self.vec.push(el); + } else { + self.vec.insert(self.i0, el); + self.i0 += 1; + } + } else if size == n { + self.vec[self.i0] = el; + self.i0 = (self.i0 + 1) % n; + } else { + panic!("needs implementation") + } + } +} + +use std::ops::{Index, IndexMut}; +impl Index for RobinVec { + type Output = T; + + fn index(&self, index: usize) -> &T { + &self.vec[(index + self.i0) % self.vec.len()] + } +} + +impl IndexMut for RobinVec { + fn index_mut(&mut self, index: usize) -> &mut T { + let n = self.vec.len(); + &mut self.vec[(index + self.i0) % n] + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn minimize() { + let center = arr1(&[0.9, 1.3, 0.5]); + let min = LBFGSBuilder::default().build().unwrap(); + let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| -(-xi * xi).exp()).scalar_sum(); + let g = |x: ArrayView1| { + -2.0 * (&x - ¢er).mapv(|xi| -(-xi * xi).exp()) * &(&x - ¢er) + }; + let x0 = Array1::ones(center.len()); + let xmin = min.minimize(&f, &g, x0.view()); + println!("{:?}", xmin); + assert!(xmin.all_close(¢er, 1e-5)) + } + + #[test] + fn robin() { + let mut r = RobinVec::new(); + for i in 1..16 { + r.push(i, 4); + } + println!("{:?}", r); + for (i, &ri) in r.iter().enumerate() { + println!("{}", ri); + assert_eq!(i + 12, ri); + assert_eq!(ri, r[i]); + } + } +} diff --git a/src/vector/mod.rs b/src/vector/mod.rs index 93975fa..c34506c 100644 --- a/src/vector/mod.rs +++ b/src/vector/mod.rs @@ -2,4 +2,7 @@ mod nelder_mead; -pub use self::nelder_mead::NelderMeadBuilder; \ No newline at end of file +pub use self::nelder_mead::NelderMeadBuilder; + +mod l_bfgs; +pub use self::l_bfgs::LBFGSBuilder; From 1ccb6514d0ccf08b06a3a180ddf27b49353c373e Mon Sep 17 00:00:00 2001 From: sebasv Date: Tue, 13 Nov 2018 09:19:31 +0100 Subject: [PATCH 2/4] use VecDeque instead of custom type, added parameters to L-BFGS --- src/scalar/golden_ratio.rs | 76 ++++++++++++---------- src/vector/l_bfgs.rs | 129 ++++++++++++++----------------------- 2 files changed, 89 insertions(+), 116 deletions(-) diff --git a/src/scalar/golden_ratio.rs b/src/scalar/golden_ratio.rs index 3d13673..134f3ad 100644 --- a/src/scalar/golden_ratio.rs +++ b/src/scalar/golden_ratio.rs @@ -1,19 +1,19 @@ //! Golden ratio is to minimization what bisection is to root finding. -//! GoldenRatio searches within an interval for a local minimum. At -//! every iteration, the interval is decreased in size by a constant +//! GoldenRatio searches within an interval for a local minimum. At +//! every iteration, the interval is decreased in size by a constant //! factor, until the desired precision is obtained. -//! -//! This algorithm is guaranteed to converge on a local minimum in a +//! +//! This algorithm is guaranteed to converge on a local minimum in a //! finite amount of steps under very light smoothess criteria for the //! target function. -//! +//! //! In an iteration, the target function is calculated at 4 points: //! +---------+----+---------+ //! iter 1 a b c d //! The interval for the next iteration is chosen to be [a,c] if b. /// Bigger is more precise. #[builder(default = "1000")] @@ -42,26 +42,30 @@ pub struct GoldenRatio { const RATIO: f64 = 2.618033988749895; // 1.5 + 0.5*5f64.sqrt(); impl GoldenRatio { - /// Search for the minimum of `func` around `x0`. /// The search region is expanded in both directions until an expansion /// leads to an increase in the function value at the bound of the region. - /// - /// Currently `minimize` makes, in some specific cases, at most around 2000 + /// + /// Currently `minimize` makes, in some specific cases, at most around 2000 /// additional calls to `func` to find a bracketing interval. For more details /// see https://github.com/to266/optimize/issues/11 - pub fn minimize(&self, func: F, x0: f64) -> f64 - where F: Fn(f64) -> f64 { - let left = x0 + self.explore(&func, x0, true); - let right = x0 + self.explore(&func, x0, false); + pub fn minimize(&self, mut func: F, x0: f64) -> f64 + where + F: FnMut(f64) -> f64, + { + let left = x0 + self.explore(&mut func, x0, true); + let right = x0 + self.explore(&mut func, x0, false); self.minimize_bracket(func, left, right) } /// increase step until func stops decreasing, then return step - fn explore(&self, func: F, x0: f64, explore_left: bool) -> f64 - where F: Fn(f64) -> f64 { - let mut step = if explore_left {-1.} else {1.} * - f64::powi(2., f64::log2(f64::EPSILON + x0.abs()) as i32) * f64::EPSILON; + fn explore(&self, mut func: F, x0: f64, explore_left: bool) -> f64 + where + F: FnMut(f64) -> f64, + { + let mut step = if explore_left { -1. } else { 1. } + * f64::powi(2., f64::log2(f64::EPSILON + x0.abs()) as i32) + * f64::EPSILON; let mut fprev = func(x0); let mut fstepped = func(x0 + step); @@ -70,13 +74,15 @@ impl GoldenRatio { step *= 2.0; fprev = fstepped; fstepped = func(x0 + step); - } + } step } /// Search for the minimum of `func` between `left` and `right`. - pub fn minimize_bracket(&self, func: F, left: f64, right: f64) -> f64 - where F: Fn(f64) -> f64 { + pub fn minimize_bracket(&self, mut func: F, left: f64, right: f64) -> f64 + where + F: FnMut(f64) -> f64, + { let mut min = left.max(f64::MIN); let mut max = right.min(f64::MAX); let mut iter = 0; @@ -86,7 +92,7 @@ impl GoldenRatio { let mut f_b = func(x_b); let mut f_c = func(x_c); - while (max-min).abs() > self.xtol && iter < self.max_iter { + while (max - min).abs() > self.xtol && iter < self.max_iter { iter += 1; if f_b < f_c { max = x_c; @@ -108,7 +114,6 @@ impl GoldenRatio { } } - #[cfg(test)] mod tests { use super::*; @@ -118,12 +123,13 @@ mod tests { let minimizer = GoldenRatioBuilder::default() .xtol(1e-7) .max_iter(1000) - .build().unwrap(); - let f = |x: f64| (x-0.2).powi(2); + .build() + .unwrap(); + let f = |x: f64| (x - 0.2).powi(2); let res = minimizer.minimize_bracket(&f, -1.0, 1.0); println!("res: {}", res); - assert!( (res - 0.2).abs() <= 1e-7); + assert!((res - 0.2).abs() <= 1e-7); } #[test] @@ -131,12 +137,13 @@ mod tests { let minimizer = GoldenRatioBuilder::default() .xtol(1e-7) .max_iter(1000) - .build().unwrap(); - let f = |x: f64| (x-0.2).powi(2); + .build() + .unwrap(); + let f = |x: f64| (x - 0.2).powi(2); let res = minimizer.minimize(&f, 10.0); println!("res: {}", res); - assert!( (res - 0.2).abs() <= 1e-7); + assert!((res - 0.2).abs() <= 1e-7); } #[test] @@ -144,11 +151,12 @@ mod tests { let minimizer = GoldenRatioBuilder::default() .xtol(1e-7) .max_iter(1000) - .build().unwrap(); + .build() + .unwrap(); let f = |x: f64| x; let res = minimizer.minimize(&f, 10.0); println!("res: {}", res); - assert!( res.is_infinite() ); + assert!(res.is_infinite()); } -} \ No newline at end of file +} diff --git a/src/vector/l_bfgs.rs b/src/vector/l_bfgs.rs index 022dc04..5790082 100644 --- a/src/vector/l_bfgs.rs +++ b/src/vector/l_bfgs.rs @@ -5,16 +5,38 @@ /// MATHEMATICS OF COMPUTATION, VOLUME 35, NUMBER 151 JULY 1980, PAGES 773-782 /// use ndarray::prelude::*; +use std::collections::VecDeque; #[derive(Builder, Debug)] pub struct LBFGS { /// Smaller is more precise. - #[builder(default = "1e-8")] + #[builder(default = "1e-12")] pub gtol: f64, + /// The maximum number of iterations. The gradient is evaluated once per iteration. /// Larger is more precise. #[builder(default = "1500")] pub max_iter: usize, + + /// The number of datapoints to use to estimate the inverse hessian. + /// Larger is more precise. If `m` is larger than `x0.len()`, then + /// `x0.len()` is used. + #[builder(default = "5")] + pub m: usize, + + /// The maximum step to be taken in the direction determined by the Quasi-Newton + /// method. + #[builder(default = "2.0")] + pub max_step: f64, + + /// The tolerance on x used to terminate the line search. + /// Smaller is more precise. + #[builder(default = "1e-8")] + pub xtol: f64, + + /// The maximum number of function evaluations. + #[builder(default = "1500")] + pub max_feval: usize, } impl LBFGS { @@ -24,9 +46,10 @@ impl LBFGS { G: Fn(ArrayView1) -> Array1, { let mut iter = 0; - let m = x0.len().min(5); + let mut feval_count = 0; + let m = x0.len().min(self.m).max(1); - let mut hist = RobinVec::new(); + let mut hist = VecDeque::new(); let mut x = x0.to_owned(); let mut g = grad(x.view()); @@ -34,9 +57,15 @@ impl LBFGS { loop { let dir = self.quasi_update(&g, &hist); let a = { - let min = ::scalar::GoldenRatioBuilder::default().build().unwrap(); - let f = |a: f64| func((&x + &(a * &dir)).view()); - min.minimize_bracket(&f, -2.0, 0.0) + let min = ::scalar::GoldenRatioBuilder::default() + .xtol(self.xtol) + .build() + .unwrap(); + let f = |a: f64| { + feval_count += 1; + func((&x + &(a * &dir)).view()) + }; + min.minimize_bracket(f, -self.max_step, 0.0) }; let x_new = &x + &(a * &dir); let g_new = grad(x_new.view()); @@ -47,11 +76,18 @@ impl LBFGS { iter += 1; - if r.is_nan() || iter > self.max_iter || g_new.mapv(f64::abs).scalar_sum() < self.gtol { + if r.is_nan() + || iter > self.max_iter + || feval_count > self.max_feval + || g_new.mapv(f64::abs).scalar_sum() < self.gtol + { break; } - hist.push((s, y, r), m); + while hist.len() >= m { + hist.pop_front(); + } + hist.push_back((s, y, r)); x = x_new; g = g_new; @@ -60,10 +96,12 @@ impl LBFGS { x } + /// Calculate the Quasi-Newton direction H*g efficiently, where g + /// is the gradient and H is the *inverse* hessian. fn quasi_update( &self, grad: &Array1, - hist: &RobinVec<(Array1, Array1, f64)>, + hist: &VecDeque<(Array1, Array1, f64)>, ) -> Array1 { let mut q = grad.to_owned(); let mut a = Vec::with_capacity(hist.len()); @@ -88,65 +126,6 @@ impl LBFGS { } } -#[derive(Debug)] -struct RobinVec { - i0: usize, - vec: Vec, -} - -use std::iter::Chain; -use std::slice; - -impl RobinVec { - pub fn new() -> RobinVec { - RobinVec { - i0: 0, - vec: Vec::new(), - } - } - - pub fn iter(&self) -> Chain, slice::Iter> { - self.vec[self.i0..].iter().chain(self.vec[..self.i0].iter()) - } - - pub fn len(&self) -> usize { - self.vec.len() - } - - pub fn push(&mut self, el: T, size: usize) { - let n = self.vec.len(); - if size > n { - if self.i0 == 0 { - self.vec.push(el); - } else { - self.vec.insert(self.i0, el); - self.i0 += 1; - } - } else if size == n { - self.vec[self.i0] = el; - self.i0 = (self.i0 + 1) % n; - } else { - panic!("needs implementation") - } - } -} - -use std::ops::{Index, IndexMut}; -impl Index for RobinVec { - type Output = T; - - fn index(&self, index: usize) -> &T { - &self.vec[(index + self.i0) % self.vec.len()] - } -} - -impl IndexMut for RobinVec { - fn index_mut(&mut self, index: usize) -> &mut T { - let n = self.vec.len(); - &mut self.vec[(index + self.i0) % n] - } -} - #[cfg(test)] mod test { use super::*; @@ -164,18 +143,4 @@ mod test { println!("{:?}", xmin); assert!(xmin.all_close(¢er, 1e-5)) } - - #[test] - fn robin() { - let mut r = RobinVec::new(); - for i in 1..16 { - r.push(i, 4); - } - println!("{:?}", r); - for (i, &ri) in r.iter().enumerate() { - println!("{}", ri); - assert_eq!(i + 12, ri); - assert_eq!(ri, r[i]); - } - } } From bee8ff4478748cb01e1f656d9626e22832752c21 Mon Sep 17 00:00:00 2001 From: sebasv Date: Tue, 13 Nov 2018 09:34:17 +0100 Subject: [PATCH 3/4] finite-difference approximation to gradient --- src/vector/l_bfgs.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/vector/l_bfgs.rs b/src/vector/l_bfgs.rs index 5790082..4883fdb 100644 --- a/src/vector/l_bfgs.rs +++ b/src/vector/l_bfgs.rs @@ -40,6 +40,20 @@ pub struct LBFGS { } impl LBFGS { + /// Minimize `func` starting in `x0` using a finite difference approximation + /// for the gradient. For more control over the approximation of the gradient, + /// you can use `minimize` with you own approximation. + pub fn minimize_approx_grad(&self, func: F, x0: ArrayView1) -> Array1 + where + F: Fn(ArrayView1) -> f64, + { + let eps = Array1::ones(x0.len()) * 1e-9; + let eps_view = eps.view(); + let grad = |x: ArrayView1| ::utils::approx_fprime(x, &func, eps_view); + self.minimize(&func, &grad, x0) + } + + /// Minimize `func` starting in `x0` using `grad` as the gradient of `func`. pub fn minimize(&self, func: F, grad: G, x0: ArrayView1) -> Array1 where F: Fn(ArrayView1) -> f64, @@ -49,7 +63,7 @@ impl LBFGS { let mut feval_count = 0; let m = x0.len().min(self.m).max(1); - let mut hist = VecDeque::new(); + let mut hist = VecDeque::with_capacity(m); let mut x = x0.to_owned(); let mut g = grad(x.view()); @@ -143,4 +157,15 @@ mod test { println!("{:?}", xmin); assert!(xmin.all_close(¢er, 1e-5)) } + + #[test] + fn minimize_approx() { + let center = arr1(&[0.9, 1.3, 0.5]); + let min = LBFGSBuilder::default().build().unwrap(); + let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| -(-xi * xi).exp()).scalar_sum(); + let x0 = Array1::ones(center.len()); + let xmin = min.minimize_approx_grad(&f, x0.view()); + println!("{:?}", xmin); + assert!(xmin.all_close(¢er, 1e-5)) + } } From eb03fd35ca99b75bc0366ed7e905740029d32970 Mon Sep 17 00:00:00 2001 From: sebasv Date: Wed, 14 Nov 2018 09:54:24 +0100 Subject: [PATCH 4/4] documentation --- src/vector/l_bfgs.rs | 70 +++++++++++++++++++++++++++++++++++++++++--- src/vector/mod.rs | 4 ++- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/vector/l_bfgs.rs b/src/vector/l_bfgs.rs index 4883fdb..71adebf 100644 --- a/src/vector/l_bfgs.rs +++ b/src/vector/l_bfgs.rs @@ -1,14 +1,75 @@ -/// Limited-memory BFGS Quasi-Newton optimizer. Uses the two-loop recursion to +use ndarray::prelude::*; +use std::collections::VecDeque; + +/// Limited-memory BFGS Quasi-Newton optimizer. +/// +/// This implementation uses the two-loop recursion to /// calculate the quasi-inverse-hessian, as formulated in /// /// Jorge Nocedal. Updating Quasi-Newton Matrices With Limited Storage. /// MATHEMATICS OF COMPUTATION, VOLUME 35, NUMBER 151 JULY 1980, PAGES 773-782 /// -use ndarray::prelude::*; -use std::collections::VecDeque; - +/// This optimizer is known to be particularly suited for optimizing +/// high-dimensional convex functions. +/// +/// # Examples +/// +/// ### With an exact gradient +/// +/// ``` +/// # extern crate ndarray; +/// use ndarray::prelude::*; +/// +/// # extern crate optimize; +/// use optimize::vector::LBFGSBuilder; +/// +/// # fn main() { +/// // Use the builder pattern to create a minimizer object +/// // with default values for the parameters +/// let min = LBFGSBuilder::default().build().unwrap(); +/// +/// // The function that is to be minimized, and an exact gradient function +/// let center = arr1(&[0.9, 1.3, 0.5]); +/// let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| xi.powi(4)).scalar_sum(); +/// let g = |x: ArrayView1| 4.0 * (&x - ¢er).mapv(|xi| xi.powi(3)); +/// let x0 = Array1::ones(center.len()); +/// +/// // do the actual minimization +/// let xmin = min.minimize(&f, &g, x0.view()); +/// +/// println!("{:?}", xmin); +/// assert!(xmin.all_close(¢er, 1e-4)) +/// # } +/// ``` +/// +/// ### Or with a finite-difference approximation +/// +/// ``` +/// # extern crate ndarray; +/// # use ndarray::prelude::*; +/// # extern crate optimize; +/// # use optimize::vector::LBFGSBuilder; +/// +/// # fn main() { +/// // Use the builder pattern to create a minimizer object +/// // with default values for the parameters +/// let min = LBFGSBuilder::default().build().unwrap(); +/// +/// // The function that is to be minimized, and an exact gradient function +/// let center = arr1(&[0.9, 1.3, 0.5]); +/// let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| xi.powi(4)).scalar_sum(); +/// let x0 = Array1::ones(center.len()); +/// +/// // do the actual minimization +/// let xmin = min.minimize_approx_grad(&f, x0.view()); +/// +/// println!("{:?}", xmin); +/// assert!(xmin.all_close(¢er, 1e-4)) +/// # } +/// ``` #[derive(Builder, Debug)] pub struct LBFGS { + /// The tolerance on the gradient used to terminate the optimization. /// Smaller is more precise. #[builder(default = "1e-12")] pub gtol: f64, @@ -112,6 +173,7 @@ impl LBFGS { /// Calculate the Quasi-Newton direction H*g efficiently, where g /// is the gradient and H is the *inverse* hessian. + #[inline] fn quasi_update( &self, grad: &Array1, diff --git a/src/vector/mod.rs b/src/vector/mod.rs index c34506c..b750137 100644 --- a/src/vector/mod.rs +++ b/src/vector/mod.rs @@ -1,8 +1,10 @@ -//! This module contains algorithms that search for local minima of functions along multiple dimensions. +//! Algorithms that search for local minima of functions along multiple dimensions. mod nelder_mead; +pub use self::nelder_mead::NelderMead; pub use self::nelder_mead::NelderMeadBuilder; mod l_bfgs; pub use self::l_bfgs::LBFGSBuilder; +pub use self::l_bfgs::LBFGS;