Fix batched weight update bug. Completed: #12 #13 #15.

hallvardnmbu · Aug 28, 2024 · 342eb58 · 342eb58
1 parent 09cf0d6
commit 342eb58
Show file tree

Hide file tree

Showing 5 changed files with 153 additions and 165 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "neurons"
-version = "1.1.0"
+version = "2.0.0"
 edition = "2021"
 description = "Neural networks from scratch, in Rust."
 authors = ["Hallvard Høyland Lavik"]

diff --git a/README.md b/README.md
@@ -71,6 +71,18 @@ Create modular neural networks in Rust with ease!
 <details>
   <summary>Releases</summary>
 
+  ## 2.0.0 (Fix batched weight updates)
+
+  * Weight updates are now batched correctly.
+    - See `network::Network::learn` for details.
+
+  ### Benchmarking examples/example_benchmark.rs (mnist version)
+
+  ```raw
+  batched (128): 17.268632412s (4.82x speedup)
+  unbatched (1): 83.347593292s
+  ```
+
   ## 1.1.0 (Improved optimizer step)
 
   * Optimizer step more intuitive and easy to read.
@@ -85,7 +97,7 @@ Create modular neural networks in Rust with ease!
   * Batched training (`network::Network::learn`)
   * Parallelization of batches (`rayon`)
 
-  ### Benchmarking example/example_benchmark.rs
+  ### Benchmarking examples/example_benchmark.rs (iris version)
 
   ```raw
   v0.3.0: 0.318811179s (6.95x speedup)
@@ -164,6 +176,7 @@ Create modular neural networks in Rust with ease!
     - [x] Feedforward (dubbed `Network`)
     - [x] Convolutional
     - [ ] Recurrent
+    - [ ] Skip connections
     - [ ] Feedback connections
       - [x] Dense to Dense
       - [ ] Dense to Convolutional
@@ -173,7 +186,7 @@ Create modular neural networks in Rust with ease!
   ## Regularization
     - [x] Dropout
     - [ ] Batch normalization
-    - [ ] Early stopping
+    - [x] Early stopping
 
   ## Parallelization
     - [x] Parallelization of batches
@@ -186,10 +199,10 @@ Create modular neural networks in Rust with ease!
       - [x] Thorough testing of objective functions
       - [x] Thorough testing of optimization techniques
       - [ ] Thorough testing of feedback scaling (wrt. gradients)
-    - [ ] Integration tests
+    - [x] Integration tests
       - [x] Network forward pass
       - [x] Network backward pass
-      - [ ] Network training (i.e., weight updates)
+      - [x] Network training (i.e., weight updates)
 
   ## Examples
     - [x] XOR
@@ -220,7 +233,6 @@ Create modular neural networks in Rust with ease!
       - [ ] General data loading functionality
     - [x] Custom icon/image for documentation
     - [x] Custom stylesheet for documentation
-    - [ ] Type conversion (e.g. f32, f64)
     - [ ] Network type specification (e.g. f32, f64)
     - [ ] Saving and loading
       - [ ] Single layer weights
@@ -242,9 +254,9 @@ Create modular neural networks in Rust with ease!
   * [Adam](https://pytorch.org/docs/stable/generated/torch.optim.Adam.html)
   * [AdamW](https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html)
   * [RMSprop](https://pytorch.org/docs/stable/generated/torch.optim.RMSprop.html)
-  * [backpropagation convolution 1](https://deeplearning.cs.cmu.edu/F21/document/recitation/Recitation5/CNN_Backprop_Recitation_5_F21.pdf)
-  * [backpropagation convolution 2](https://www.jefkine.com/general/2016/09/05/backpropagation-in-convolutional-neural-networks/)
-  * [backpropagation convolution 3](https://sites.cc.gatech.edu/classes/AY2021/cs7643_spring/assets/L11_CNNs.pdf)
+  * [convolution 1](https://deeplearning.cs.cmu.edu/F21/document/recitation/Recitation5/CNN_Backprop_Recitation_5_F21.pdf)
+  * [convolution 2](https://www.jefkine.com/general/2016/09/05/backpropagation-in-convolutional-neural-networks/)
+  * [convolution 3](https://sites.cc.gatech.edu/classes/AY2021/cs7643_spring/assets/L11_CNNs.pdf)
 
   ### Tools used
 

diff --git a/examples/example_benchmark.rs b/examples/example_benchmark.rs
@@ -1,118 +1,101 @@
 // Copyright (C) 2024 Hallvard Høyland Lavik
 
-use neurons::{activation, network, objective, optimizer, random, tensor};
+use neurons::{activation, network, objective, optimizer, tensor};
 
+use std::fs::File;
+use std::io::{BufReader, Read, Result};
 use std::time;
-extern crate csv;
-
-fn data(path: &str) -> (Vec<tensor::Tensor>, Vec<tensor::Tensor>) {
-    let mut reader = csv::Reader::from_path(path).unwrap();
-
-    let mut x: Vec<Vec<f32>> = Vec::new();
-    let mut y: Vec<Vec<f32>> = Vec::new();
-
-    reader.records().for_each(|record| {
-        let record = record.unwrap();
-        x.push(vec![
-            record.get(1).unwrap().parse::<f32>().unwrap(),
-            record.get(2).unwrap().parse::<f32>().unwrap(),
-            record.get(3).unwrap().parse::<f32>().unwrap(),
-            record.get(4).unwrap().parse::<f32>().unwrap(),
-        ]);
-        y.push(match record.get(5).unwrap() {
-            "Iris-setosa" => vec![1.0, 0.0, 0.0],
-            "Iris-versicolor" => vec![0.0, 1.0, 0.0],
-            "Iris-virginica" => vec![0.0, 0.0, 1.0],
-            // "Iris-setosa" => vec![0.0],
-            // "Iris-versicolor" => vec![1.0],
-            // "Iris-virginica" => vec![2.0],
-            _ => panic!("Unknown class"),
-        });
-    });
-
-    let mut generator = random::Generator::create(12345);
-    let mut indices: Vec<usize> = (0..x.len()).collect();
-    generator.shuffle(&mut indices);
-
-    let x: Vec<tensor::Tensor> = indices
-        .iter()
-        .map(|&i| tensor::Tensor::vector(x[i].clone()))
-        .collect();
-    let y: Vec<tensor::Tensor> = indices
-        .iter()
-        .map(|&i| tensor::Tensor::vector(y[i].clone()))
-        .collect();
 
-    (x, y)
+fn read(reader: &mut dyn Read) -> Result<u32> {
+    let mut buffer = [0; 4];
+    reader.read_exact(&mut buffer)?;
+    Ok(u32::from_be_bytes(buffer))
+}
+
+fn load_images(path: &str) -> Result<Vec<tensor::Tensor>> {
+    let mut reader = BufReader::new(File::open(path)?);
+    let mut images: Vec<tensor::Tensor> = Vec::new();
+
+    let _magic_number = read(&mut reader)?;
+    let num_images = read(&mut reader)?;
+    let num_rows = read(&mut reader)?;
+    let num_cols = read(&mut reader)?;
+
+    for _ in 0..num_images {
+        let mut image: Vec<Vec<f32>> = Vec::new();
+        for _ in 0..num_rows {
+            let mut row: Vec<f32> = Vec::new();
+            for _ in 0..num_cols {
+                let mut pixel = [0];
+                reader.read_exact(&mut pixel)?;
+                row.push(pixel[0] as f32 / 255.0);
+            }
+            image.push(row);
+        }
+        images.push(tensor::Tensor::tensor(vec![image]).resize(tensor::Shape::Tensor(1, 14, 14)));
+    }
+
+    Ok(images)
+}
+
+fn load_labels(file_path: &str, numbers: f32) -> Result<Vec<tensor::Tensor>> {
+    let mut reader = BufReader::new(File::open(file_path)?);
+    let _magic_number = read(&mut reader)?;
+    let num_labels = read(&mut reader)?;
+
+    let mut _labels = vec![0; num_labels as usize];
+    reader.read_exact(&mut _labels)?;
+
+    Ok(_labels
+        .iter()
+        .map(|&x| tensor::Tensor::one_hot(x as f32, numbers))
+        .collect())
 }
 
 fn main() {
-    // Load the iris dataset
-    let (x, y) = data("./datasets/iris.csv");
-
-    let split = (x.len() as f32 * 0.8) as usize;
-    let x = x.split_at(split);
-    let y = y.split_at(split);
-
-    let x_train: Vec<&tensor::Tensor> = x.0.iter().collect();
-    let y_train: Vec<&tensor::Tensor> = y.0.iter().collect();
-    let x_test: Vec<&tensor::Tensor> = x.1.iter().collect();
-    let y_test: Vec<&tensor::Tensor> = y.1.iter().collect();
-
-    let (x_train, y_train, x_test, y_test) = (
-        x_train.to_vec(),
-        y_train.to_vec(),
-        x_test.to_vec(),
-        y_test.to_vec(),
-    );
-    println!(
-        "Train data {}x{}: {} => {}",
-        x_train.len(),
-        x_train[0].shape,
-        x_train[0].data,
-        y_train[0].data
-    );
-    println!(
-        "Test data {}x{}: {} => {}",
-        x_test.len(),
-        x_test[0].shape,
-        x_test[0].data,
-        y_test[0].data
-    );
+    let x_train = load_images("./examples/datasets/mnist/train-images-idx3-ubyte").unwrap();
+    let y_train = load_labels("./examples/datasets/mnist/train-labels-idx1-ubyte", 10f32).unwrap();
+
+    let x_train: Vec<&tensor::Tensor> = x_train.iter().collect();
+    let y_train: Vec<&tensor::Tensor> = y_train.iter().collect();
 
     let mut times: Vec<time::Duration> = Vec::new();
 
-    for _ in 0..10 {
+    for iteration in 0..10 {
         let start = time::Instant::now();
 
         // Create the network
-        let mut network = network::Network::new(tensor::Shape::Vector(4));
-
-        network.dense(50, activation::Activation::ReLU, false, Some(0.1));
-        network.dense(50, activation::Activation::ReLU, false, Some(0.1));
-        network.dense(3, activation::Activation::Softmax, false, Some(0.1));
+        let mut network = network::Network::new(tensor::Shape::Tensor(1, 14, 14));
+
+        network.convolution(
+            8,
+            (3, 3),
+            (1, 1),
+            (0, 0),
+            activation::Activation::ReLU,
+            Some(0.05),
+        );
+        network.maxpool((2, 2), (2, 2));
+        network.dense(10, activation::Activation::Softmax, true, None);
 
-        network.set_optimizer(optimizer::Optimizer::RMSprop(optimizer::RMSprop {
+        network.set_optimizer(optimizer::Optimizer::Adam(optimizer::Adam {
             learning_rate: 0.001,
-            alpha: 0.0,
+            decay: None,
+            beta1: 0.9,
+            beta2: 0.999,
             epsilon: 1e-8,
-
-            decay: Some(0.01),
-            momentum: Some(0.01),
-            centered: Some(true),
-
-            // To be filled by the network:
             velocity: vec![],
-            gradient: vec![],
-            buffer: vec![],
+            momentum: vec![],
         }));
         network.set_objective(
             objective::Objective::CrossEntropy, // Objective function
-            Some((-1f32, 1f32)),                // Gradient clipping
+            None,                               // Gradient clipping
         );
 
         // Train the network
-        let (_train_loss, _val_loss) = network.learn(&x_train, &y_train, None, 25, 500, Some(50));
+        let (train_loss, _) = network.learn(&x_train, &y_train, None, 128, 10, None);
+
+        println!("Iteration: {}, Loss: {:?}", iteration, train_loss);
 
         let duration = start.elapsed();
         times.push(duration);

diff --git a/examples/example_mnist.rs b/examples/example_mnist.rs
@@ -31,7 +31,7 @@ fn load_images(path: &str) -> Result<Vec<tensor::Tensor>> {
             }
             image.push(row);
         }
-        images.push(tensor::Tensor::tensor(vec![image]).resize(tensor::Shape::Tensor(1, 14, 14)));
+        images.push(tensor::Tensor::tensor(vec![image]).resize(tensor::Shape::Tensor(1, 10, 10)));
     }
 
     Ok(images)
@@ -67,7 +67,7 @@ fn main() {
     let x_test: Vec<&tensor::Tensor> = x_test.iter().collect();
     let y_test: Vec<&tensor::Tensor> = y_test.iter().collect();
 
-    let mut network = network::Network::new(tensor::Shape::Tensor(1, 14, 14));
+    let mut network = network::Network::new(tensor::Shape::Tensor(1, 10, 10));
 
     network.convolution(
         8,
@@ -78,15 +78,6 @@ fn main() {
         Some(0.05),
     );
     network.maxpool((2, 2), (2, 2));
-    // network.convolution(
-    //     8,
-    //     (3, 3),
-    //     (1, 1),
-    //     (0, 0),
-    //     activation::Activation::ReLU,
-    //     None,
-    // );
-    network.dense(512, activation::Activation::ReLU, true, Some(0.25));
     network.dense(10, activation::Activation::Softmax, true, None);
 
     network.set_optimizer(optimizer::Optimizer::Adam(optimizer::Adam {
@@ -106,13 +97,12 @@ fn main() {
     println!("{}", network);
 
     // Train the network
-    let (train_loss, val_loss) =
-        network.learn(&x_train, &y_train, Some((0.1, 5)), 128, 50, Some(1));
+    let (train_loss, val_loss) = network.learn(&x_train, &y_train, Some((0.1, 5)), 128, 5, Some(1));
     plot::loss(&train_loss, &val_loss, "Loss per epoch", "loss.png");
 
     // Validate the network
-    let (val_loss, val_acc) = network.validate(&x_test, &y_test, 0.1);
-    println!("1. Validation acc: {}, loss: {}", val_acc, val_loss);
+    let (test_loss, test_acc) = network.validate(&x_test, &y_test, 0.1);
+    println!("1. Test acc: {}, loss: {}", test_acc, test_loss);
 
     // Use the network
     let prediction = network.predict(x_test.get(0).unwrap());