From 9cbbb12497bab4ea2c029d91b92f88ab153bf582 Mon Sep 17 00:00:00 2001 From: Hallvard Lavik Date: Sun, 19 May 2024 21:06:14 +0200 Subject: [PATCH] Bug fixing. `MSE` and `BinaryCrossEntropy` has some errors, and is therefore commented out. --- src/layer.rs | 11 +++---- src/network.rs | 20 +++++-------- src/objective.rs | 78 ++++++++++++++++++++++++------------------------ 3 files changed, 53 insertions(+), 56 deletions(-) diff --git a/src/layer.rs b/src/layer.rs index 681aeda..7c274be 100644 --- a/src/layer.rs +++ b/src/layer.rs @@ -50,11 +50,11 @@ impl Layer { } } - pub fn forward(&self, x: Vec) -> (Vec, Vec) { - let inter: Vec = self.weights.iter().map(|w| dot(&w, &x)).collect(); + pub fn forward(&self, x: &Vec) -> (Vec, Vec) { + let inter: Vec = self.weights.iter().map(|w| dot(&w, x)).collect(); let out: Vec = match &self.bias { - Some(b) => add(&self.activation.forward(inter.clone()), b), - None => self.activation.forward(inter.clone()), + Some(b) => add(&self.activation.forward(&inter), b), + None => self.activation.forward(&inter), }; (inter, out) } @@ -67,7 +67,8 @@ impl Layer { activation::Function::Softmax(_) => self.activation.backward(inter, Some(gradient)), _ => self.activation.backward(inter, None), }; - let delta = mul(gradient, &activation); + + let delta: Vec = mul(gradient, &activation); let weight_gradient: Vec> = delta .iter().map(|d| input .iter().map(|i| i * d) diff --git a/src/network.rs b/src/network.rs index 64baa66..d4f345b 100644 --- a/src/network.rs +++ b/src/network.rs @@ -92,7 +92,7 @@ impl Network { pub fn predict(&self, x: &Vec) -> Vec { let mut out = x.clone(); for layer in &self.layers { - let (_, _out) = layer.forward(out); + let (_, _out) = layer.forward(&out); out = _out; } out @@ -109,13 +109,12 @@ impl Network { let mut outs: Vec> = vec![out.clone()]; for layer in &self.layers { - let (inter, next) = layer.forward(out); + let (inter, next) = layer.forward(&out); out = next; inters.push(inter); outs.push(out.clone()); } - (inters, outs, out) } @@ -125,16 +124,16 @@ impl Network { (self.objective.loss(y, &out), inters, outs, out) } - fn backward(&mut self, loss: Vec, inters: Vec>, outs: Vec>) { + fn backward(&mut self, gradient: Vec, inters: Vec>, inputs: Vec>) { - let mut gradient = loss; - let inputs = outs.clone(); + let mut gradient = gradient; for (i, layer) in self.layers.iter_mut().rev().enumerate() { let input = &inputs[inputs.len() - i - 2]; let inter = &inters[inters.len() - i - 1]; - let (weight_gradient, bias_gradient, _gradient) = layer.backward(&gradient, inter, input); + let (weight_gradient, bias_gradient, _gradient) = + layer.backward(&gradient, inter, input); gradient = _gradient; // Weight update. @@ -143,11 +142,8 @@ impl Network { } // Bias update. - match layer.bias { - Some(ref mut bias) => { - self.optimizer.update(bias, bias_gradient.as_ref().unwrap()); - }, - None => {}, + if let Some(ref mut bias) = layer.bias { + self.optimizer.update(bias, bias_gradient.as_ref().unwrap()); } } } diff --git a/src/objective.rs b/src/objective.rs index 380038c..b80f6ba 100644 --- a/src/objective.rs +++ b/src/objective.rs @@ -19,10 +19,10 @@ use std::fmt::Display; pub enum Objective { AE, MAE, - MSE, + // MSE, RMSE, - BinaryCrossEntropy, - MulticlassCrossEntropy, + // BinaryCrossEntropy, + CategoricalCrossEntropy, } pub struct Function { @@ -34,10 +34,10 @@ impl Display for Function { match self.objective { Objective::AE => write!(f, "AE"), Objective::MAE => write!(f, "MAE"), - Objective::MSE => write!(f, "MSE"), + // Objective::MSE => write!(f, "MSE"), Objective::RMSE => write!(f, "RMSE"), - Objective::BinaryCrossEntropy => write!(f, "BinaryCrossEntropy"), - Objective::MulticlassCrossEntropy => write!(f, "MulticlassCrossEntropy"), + // Objective::BinaryCrossEntropy => write!(f, "BinaryCrossEntropy"), + Objective::CategoricalCrossEntropy => write!(f, "CategoricalCrossEntropy"), } } } @@ -47,10 +47,10 @@ impl Function { match objective { Objective::AE => Function { objective: Objective::AE }, Objective::MAE => Function { objective: Objective::MAE }, - Objective::MSE => Function { objective: Objective::MSE }, + // Objective::MSE => Function { objective: Objective::MSE }, Objective::RMSE => Function { objective: Objective::RMSE }, - Objective::BinaryCrossEntropy => Function { objective: Objective::BinaryCrossEntropy }, - Objective::MulticlassCrossEntropy => Function { objective: Objective::MulticlassCrossEntropy }, + // Objective::BinaryCrossEntropy => Function { objective: Objective::BinaryCrossEntropy }, + Objective::CategoricalCrossEntropy => Function { objective: Objective::CategoricalCrossEntropy }, } } @@ -65,9 +65,9 @@ impl Function { if actual == predicted { 0.0 } else if actual > predicted { - 1.0 - } else { -1.0 + } else { + 1.0 } ).collect(); (loss, gradient) @@ -81,22 +81,22 @@ impl Function { if actual == predicted { 0.0 } else if actual > predicted { - 1.0 - } else { -1.0 + } else { + 1.0 } ).collect(); (loss, gradient) }, - Objective::MSE => { - let loss: f32 = y.iter().zip(out.iter()) - .map(|(actual, predicted)| (actual - predicted).powi(2)) - .sum::() / y.len() as f32; - let gradient: Vec = y.iter().zip(out.iter()) - .map(|(actual, predicted)| 2.0 * (actual - predicted) / y.len() as f32) - .collect(); - (loss, gradient) - }, + // Objective::MSE => { + // let loss: f32 = y.iter().zip(out.iter()) + // .map(|(actual, predicted)| (actual - predicted).powi(2)) + // .sum::() / y.len() as f32; + // let gradient: Vec = y.iter().zip(out.iter()) + // .map(|(actual, predicted)| 2.0 * (actual - predicted) / y.len() as f32) + // .collect(); + // (loss, gradient) + // }, Objective::RMSE => { let loss: f32 = y.iter().zip(out.iter()) .map(|(actual, predicted)| (actual - predicted).powi(2)) @@ -106,27 +106,27 @@ impl Function { if actual == predicted { 0.0 } else { - (actual - predicted) / + -(actual - predicted) / ((actual - predicted).powi(2).sqrt() * y.len() as f32) } ).collect(); (loss, gradient) }, - Objective::BinaryCrossEntropy => { - let eps: f32 = 1e-7; - let loss: f32 = -y.iter().zip(out.iter()) - .map(|(actual, predicted)| { - let predicted = predicted.clamp(eps, 1.0 - eps); - actual * predicted.ln() + (1.0 - actual) * (1.0 - predicted).ln() - }).sum::() / y.len() as f32; - let gradient: Vec = y.iter().zip(out.iter()) - .map(|(actual, predicted)| { - let predicted = predicted.clamp(eps, 1.0 - eps); - -(actual / predicted - (1.0 - actual) / (1.0 - predicted)) - }).collect(); - (loss, gradient) - }, - Objective::MulticlassCrossEntropy => { + // Objective::BinaryCrossEntropy => { + // let eps: f32 = 1e-7; + // let loss: f32 = -y.iter().zip(out.iter()) + // .map(|(actual, predicted)| { + // let predicted = predicted.clamp(eps, 1.0 - eps); + // actual * predicted.ln() + (1.0 - actual) * (1.0 - predicted).ln() + // }).sum::() / y.len() as f32; + // let gradient: Vec = y.iter().zip(out.iter()) + // .map(|(actual, predicted)| { + // let predicted = predicted.clamp(eps, 1.0 - eps); + // (predicted - actual) / (predicted * (1.0 - predicted)) + // }).collect(); + // (loss, gradient) + // }, + Objective::CategoricalCrossEntropy => { let eps: f32 = 1e-7; let loss: f32 = -y.iter().zip(out.iter()) .map(|(actual, predicted)| @@ -134,7 +134,7 @@ impl Function { ).sum::() / y.len() as f32; let gradient: Vec = y.iter().zip(out.iter()) .map(|(actual , predicted)| - -actual / (predicted + eps) + predicted - actual ).collect(); (loss, gradient) },