From 9cbbb12497bab4ea2c029d91b92f88ab153bf582 Mon Sep 17 00:00:00 2001
From: Hallvard Lavik <hallvard.hoyland.lavik@nmbu.no>
Date: Sun, 19 May 2024 21:06:14 +0200
Subject: [PATCH] Bug fixing. `MSE` and `BinaryCrossEntropy` has some errors,
 and is therefore commented out.

---
 src/layer.rs     | 11 +++----
 src/network.rs   | 20 +++++--------
 src/objective.rs | 78 ++++++++++++++++++++++++------------------------
 3 files changed, 53 insertions(+), 56 deletions(-)
diff --git a/src/layer.rs b/src/layer.rs
index 681aeda..7c274be 100644
--- a/src/layer.rs
+++ b/src/layer.rs
@@ -50,11 +50,11 @@ impl Layer {
         }
     }
 
-    pub fn forward(&self, x: Vec<f32>) -> (Vec<f32>, Vec<f32>) {
-        let inter: Vec<f32> = self.weights.iter().map(|w| dot(&w, &x)).collect();
+    pub fn forward(&self, x: &Vec<f32>) -> (Vec<f32>, Vec<f32>) {
+        let inter: Vec<f32> = self.weights.iter().map(|w| dot(&w, x)).collect();
         let out: Vec<f32> = match &self.bias {
-            Some(b) => add(&self.activation.forward(inter.clone()), b),
-            None => self.activation.forward(inter.clone()),
+            Some(b) => add(&self.activation.forward(&inter), b),
+            None => self.activation.forward(&inter),
         };
         (inter, out)
     }
@@ -67,7 +67,8 @@ impl Layer {
             activation::Function::Softmax(_) => self.activation.backward(inter, Some(gradient)),
             _ => self.activation.backward(inter, None),
         };
-        let delta = mul(gradient, &activation);
+
+        let delta: Vec<f32> = mul(gradient, &activation);
         let weight_gradient: Vec<Vec<f32>> = delta
             .iter().map(|d| input
             .iter().map(|i| i * d)
diff --git a/src/network.rs b/src/network.rs
index 64baa66..d4f345b 100644
--- a/src/network.rs
+++ b/src/network.rs
@@ -92,7 +92,7 @@ impl Network {
     pub fn predict(&self, x: &Vec<f32>) -> Vec<f32> {
         let mut out = x.clone();
         for layer in &self.layers {
-            let (_, _out) = layer.forward(out);
+            let (_, _out) = layer.forward(&out);
             out = _out;
         }
         out
@@ -109,13 +109,12 @@ impl Network {
         let mut outs: Vec<Vec<f32>> = vec![out.clone()];
 
         for layer in &self.layers {
-            let (inter, next) = layer.forward(out);
+            let (inter, next) = layer.forward(&out);
             out = next;
 
             inters.push(inter);
             outs.push(out.clone());
         }
-
         (inters, outs, out)
     }
 
@@ -125,16 +124,16 @@ impl Network {
         (self.objective.loss(y, &out), inters, outs, out)
     }
 
-    fn backward(&mut self, loss: Vec<f32>, inters: Vec<Vec<f32>>, outs: Vec<Vec<f32>>) {
+    fn backward(&mut self, gradient: Vec<f32>, inters: Vec<Vec<f32>>, inputs: Vec<Vec<f32>>) {
 
-        let mut gradient = loss;
-        let inputs = outs.clone();
+        let mut gradient = gradient;
 
         for (i, layer) in self.layers.iter_mut().rev().enumerate() {
 
             let input = &inputs[inputs.len() - i - 2];
             let inter = &inters[inters.len() - i - 1];
-            let (weight_gradient, bias_gradient, _gradient) = layer.backward(&gradient, inter, input);
+            let (weight_gradient, bias_gradient, _gradient) =
+                layer.backward(&gradient, inter, input);
             gradient = _gradient;
 
             // Weight update.
@@ -143,11 +142,8 @@ impl Network {
             }
 
             // Bias update.
-            match layer.bias {
-                Some(ref mut bias) => {
-                    self.optimizer.update(bias, bias_gradient.as_ref().unwrap());
-                },
-                None => {},
+            if let Some(ref mut bias) = layer.bias {
+                self.optimizer.update(bias, bias_gradient.as_ref().unwrap());
             }
         }
     }
diff --git a/src/objective.rs b/src/objective.rs
index 380038c..b80f6ba 100644
--- a/src/objective.rs
+++ b/src/objective.rs
@@ -19,10 +19,10 @@ use std::fmt::Display;
 pub enum Objective {
     AE,
     MAE,
-    MSE,
+    // MSE,
     RMSE,
-    BinaryCrossEntropy,
-    MulticlassCrossEntropy,
+    // BinaryCrossEntropy,
+    CategoricalCrossEntropy,
 }
 
 pub struct Function {
@@ -34,10 +34,10 @@ impl Display for Function {
         match self.objective {
             Objective::AE => write!(f, "AE"),
             Objective::MAE => write!(f, "MAE"),
-            Objective::MSE => write!(f, "MSE"),
+            // Objective::MSE => write!(f, "MSE"),
             Objective::RMSE => write!(f, "RMSE"),
-            Objective::BinaryCrossEntropy => write!(f, "BinaryCrossEntropy"),
-            Objective::MulticlassCrossEntropy => write!(f, "MulticlassCrossEntropy"),
+            // Objective::BinaryCrossEntropy => write!(f, "BinaryCrossEntropy"),
+            Objective::CategoricalCrossEntropy => write!(f, "CategoricalCrossEntropy"),
         }
     }
 }
@@ -47,10 +47,10 @@ impl Function {
         match objective {
             Objective::AE => Function { objective: Objective::AE },
             Objective::MAE => Function { objective: Objective::MAE },
-            Objective::MSE => Function { objective: Objective::MSE },
+            // Objective::MSE => Function { objective: Objective::MSE },
             Objective::RMSE => Function { objective: Objective::RMSE },
-            Objective::BinaryCrossEntropy => Function { objective: Objective::BinaryCrossEntropy },
-            Objective::MulticlassCrossEntropy => Function { objective: Objective::MulticlassCrossEntropy },
+            // Objective::BinaryCrossEntropy => Function { objective: Objective::BinaryCrossEntropy },
+            Objective::CategoricalCrossEntropy => Function { objective: Objective::CategoricalCrossEntropy },
         }
     }
 
@@ -65,9 +65,9 @@ impl Function {
                         if actual == predicted {
                             0.0
                         } else if actual > predicted {
-                            1.0
-                        } else {
                             -1.0
+                        } else {
+                            1.0
                         }
                     ).collect();
                 (loss, gradient)
@@ -81,22 +81,22 @@ impl Function {
                         if actual == predicted {
                             0.0
                         } else if actual > predicted {
-                            1.0
-                        } else {
                             -1.0
+                        } else {
+                            1.0
                         }
                     ).collect();
                 (loss, gradient)
             },
-            Objective::MSE => {
-                let loss: f32 = y.iter().zip(out.iter())
-                    .map(|(actual, predicted)| (actual - predicted).powi(2))
-                    .sum::<f32>() / y.len() as f32;
-                let gradient: Vec<f32> = y.iter().zip(out.iter())
-                    .map(|(actual, predicted)| 2.0 * (actual - predicted) / y.len() as f32)
-                    .collect();
-                (loss, gradient)
-            },
+            // Objective::MSE => {
+            //     let loss: f32 = y.iter().zip(out.iter())
+            //         .map(|(actual, predicted)| (actual - predicted).powi(2))
+            //         .sum::<f32>() / y.len() as f32;
+            //     let gradient: Vec<f32> = y.iter().zip(out.iter())
+            //         .map(|(actual, predicted)| 2.0 * (actual - predicted) / y.len() as f32)
+            //         .collect();
+            //     (loss, gradient)
+            // },
             Objective::RMSE => {
                 let loss: f32 = y.iter().zip(out.iter())
                     .map(|(actual, predicted)| (actual - predicted).powi(2))
@@ -106,27 +106,27 @@ impl Function {
                         if actual == predicted {
                             0.0
                         } else {
-                            (actual - predicted) /
+                            -(actual - predicted) /
                                 ((actual - predicted).powi(2).sqrt() * y.len() as f32)
                         }
                     ).collect();
                 (loss, gradient)
             },
-            Objective::BinaryCrossEntropy => {
-                let eps: f32 = 1e-7;
-                let loss: f32 = -y.iter().zip(out.iter())
-                    .map(|(actual, predicted)| {
-                            let predicted = predicted.clamp(eps, 1.0 - eps);
-                            actual * predicted.ln() + (1.0 - actual) * (1.0 - predicted).ln()
-                    }).sum::<f32>() / y.len() as f32;
-                let gradient: Vec<f32> = y.iter().zip(out.iter())
-                    .map(|(actual, predicted)| {
-                        let predicted = predicted.clamp(eps, 1.0 - eps);
-                        -(actual / predicted - (1.0 - actual) / (1.0 - predicted))
-                    }).collect();
-                (loss, gradient)
-            },
-            Objective::MulticlassCrossEntropy => {
+            // Objective::BinaryCrossEntropy => {
+            //     let eps: f32 = 1e-7;
+            //     let loss: f32 = -y.iter().zip(out.iter())
+            //         .map(|(actual, predicted)| {
+            //                 let predicted = predicted.clamp(eps, 1.0 - eps);
+            //                 actual * predicted.ln() + (1.0 - actual) * (1.0 - predicted).ln()
+            //         }).sum::<f32>() / y.len() as f32;
+            //     let gradient: Vec<f32> = y.iter().zip(out.iter())
+            //         .map(|(actual, predicted)| {
+            //             let predicted = predicted.clamp(eps, 1.0 - eps);
+            //             (predicted - actual) / (predicted * (1.0 - predicted))
+            //         }).collect();
+            //     (loss, gradient)
+            // },
+            Objective::CategoricalCrossEntropy => {
                 let eps: f32 = 1e-7;
                 let loss: f32 = -y.iter().zip(out.iter())
                     .map(|(actual, predicted)|
@@ -134,7 +134,7 @@ impl Function {
                     ).sum::<f32>() / y.len() as f32;
                 let gradient: Vec<f32> = y.iter().zip(out.iter())
                     .map(|(actual , predicted)|
-                        -actual / (predicted + eps)
+                        predicted - actual
                     ).collect();
                 (loss, gradient)
             },