Skip to content

Commit

Permalink
Fix batched weight update bug. Completed: #12 #13 #15.
Browse files Browse the repository at this point in the history
  • Loading branch information
hallvardnmbu committed Aug 28, 2024
1 parent 09cf0d6 commit 342eb58
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 165 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "neurons"
version = "1.1.0"
version = "2.0.0"
edition = "2021"
description = "Neural networks from scratch, in Rust."
authors = ["Hallvard Høyland Lavik"]
Expand Down
28 changes: 20 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,18 @@ Create modular neural networks in Rust with ease!
<details>
<summary>Releases</summary>

## 2.0.0 (Fix batched weight updates)

* Weight updates are now batched correctly.
- See `network::Network::learn` for details.

### Benchmarking examples/example_benchmark.rs (mnist version)

```raw
batched (128): 17.268632412s (4.82x speedup)
unbatched (1): 83.347593292s
```

## 1.1.0 (Improved optimizer step)

* Optimizer step more intuitive and easy to read.
Expand All @@ -85,7 +97,7 @@ Create modular neural networks in Rust with ease!
* Batched training (`network::Network::learn`)
* Parallelization of batches (`rayon`)

### Benchmarking example/example_benchmark.rs
### Benchmarking examples/example_benchmark.rs (iris version)

```raw
v0.3.0: 0.318811179s (6.95x speedup)
Expand Down Expand Up @@ -164,6 +176,7 @@ Create modular neural networks in Rust with ease!
- [x] Feedforward (dubbed `Network`)
- [x] Convolutional
- [ ] Recurrent
- [ ] Skip connections
- [ ] Feedback connections
- [x] Dense to Dense
- [ ] Dense to Convolutional
Expand All @@ -173,7 +186,7 @@ Create modular neural networks in Rust with ease!
## Regularization
- [x] Dropout
- [ ] Batch normalization
- [ ] Early stopping
- [x] Early stopping

## Parallelization
- [x] Parallelization of batches
Expand All @@ -186,10 +199,10 @@ Create modular neural networks in Rust with ease!
- [x] Thorough testing of objective functions
- [x] Thorough testing of optimization techniques
- [ ] Thorough testing of feedback scaling (wrt. gradients)
- [ ] Integration tests
- [x] Integration tests
- [x] Network forward pass
- [x] Network backward pass
- [ ] Network training (i.e., weight updates)
- [x] Network training (i.e., weight updates)

## Examples
- [x] XOR
Expand Down Expand Up @@ -220,7 +233,6 @@ Create modular neural networks in Rust with ease!
- [ ] General data loading functionality
- [x] Custom icon/image for documentation
- [x] Custom stylesheet for documentation
- [ ] Type conversion (e.g. f32, f64)
- [ ] Network type specification (e.g. f32, f64)
- [ ] Saving and loading
- [ ] Single layer weights
Expand All @@ -242,9 +254,9 @@ Create modular neural networks in Rust with ease!
* [Adam](https://pytorch.org/docs/stable/generated/torch.optim.Adam.html)
* [AdamW](https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html)
* [RMSprop](https://pytorch.org/docs/stable/generated/torch.optim.RMSprop.html)
* [backpropagation convolution 1](https://deeplearning.cs.cmu.edu/F21/document/recitation/Recitation5/CNN_Backprop_Recitation_5_F21.pdf)
* [backpropagation convolution 2](https://www.jefkine.com/general/2016/09/05/backpropagation-in-convolutional-neural-networks/)
* [backpropagation convolution 3](https://sites.cc.gatech.edu/classes/AY2021/cs7643_spring/assets/L11_CNNs.pdf)
* [convolution 1](https://deeplearning.cs.cmu.edu/F21/document/recitation/Recitation5/CNN_Backprop_Recitation_5_F21.pdf)
* [convolution 2](https://www.jefkine.com/general/2016/09/05/backpropagation-in-convolutional-neural-networks/)
* [convolution 3](https://sites.cc.gatech.edu/classes/AY2021/cs7643_spring/assets/L11_CNNs.pdf)

### Tools used

Expand Down
165 changes: 74 additions & 91 deletions examples/example_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,118 +1,101 @@
// Copyright (C) 2024 Hallvard Høyland Lavik

use neurons::{activation, network, objective, optimizer, random, tensor};
use neurons::{activation, network, objective, optimizer, tensor};

use std::fs::File;
use std::io::{BufReader, Read, Result};
use std::time;
extern crate csv;

fn data(path: &str) -> (Vec<tensor::Tensor>, Vec<tensor::Tensor>) {
let mut reader = csv::Reader::from_path(path).unwrap();

let mut x: Vec<Vec<f32>> = Vec::new();
let mut y: Vec<Vec<f32>> = Vec::new();

reader.records().for_each(|record| {
let record = record.unwrap();
x.push(vec![
record.get(1).unwrap().parse::<f32>().unwrap(),
record.get(2).unwrap().parse::<f32>().unwrap(),
record.get(3).unwrap().parse::<f32>().unwrap(),
record.get(4).unwrap().parse::<f32>().unwrap(),
]);
y.push(match record.get(5).unwrap() {
"Iris-setosa" => vec![1.0, 0.0, 0.0],
"Iris-versicolor" => vec![0.0, 1.0, 0.0],
"Iris-virginica" => vec![0.0, 0.0, 1.0],
// "Iris-setosa" => vec![0.0],
// "Iris-versicolor" => vec![1.0],
// "Iris-virginica" => vec![2.0],
_ => panic!("Unknown class"),
});
});

let mut generator = random::Generator::create(12345);
let mut indices: Vec<usize> = (0..x.len()).collect();
generator.shuffle(&mut indices);

let x: Vec<tensor::Tensor> = indices
.iter()
.map(|&i| tensor::Tensor::vector(x[i].clone()))
.collect();
let y: Vec<tensor::Tensor> = indices
.iter()
.map(|&i| tensor::Tensor::vector(y[i].clone()))
.collect();

(x, y)
fn read(reader: &mut dyn Read) -> Result<u32> {
let mut buffer = [0; 4];
reader.read_exact(&mut buffer)?;
Ok(u32::from_be_bytes(buffer))
}

fn load_images(path: &str) -> Result<Vec<tensor::Tensor>> {
let mut reader = BufReader::new(File::open(path)?);
let mut images: Vec<tensor::Tensor> = Vec::new();

let _magic_number = read(&mut reader)?;
let num_images = read(&mut reader)?;
let num_rows = read(&mut reader)?;
let num_cols = read(&mut reader)?;

for _ in 0..num_images {
let mut image: Vec<Vec<f32>> = Vec::new();
for _ in 0..num_rows {
let mut row: Vec<f32> = Vec::new();
for _ in 0..num_cols {
let mut pixel = [0];
reader.read_exact(&mut pixel)?;
row.push(pixel[0] as f32 / 255.0);
}
image.push(row);
}
images.push(tensor::Tensor::tensor(vec![image]).resize(tensor::Shape::Tensor(1, 14, 14)));
}

Ok(images)
}

fn load_labels(file_path: &str, numbers: f32) -> Result<Vec<tensor::Tensor>> {
let mut reader = BufReader::new(File::open(file_path)?);
let _magic_number = read(&mut reader)?;
let num_labels = read(&mut reader)?;

let mut _labels = vec![0; num_labels as usize];
reader.read_exact(&mut _labels)?;

Ok(_labels
.iter()
.map(|&x| tensor::Tensor::one_hot(x as f32, numbers))
.collect())
}

fn main() {
// Load the iris dataset
let (x, y) = data("./datasets/iris.csv");

let split = (x.len() as f32 * 0.8) as usize;
let x = x.split_at(split);
let y = y.split_at(split);

let x_train: Vec<&tensor::Tensor> = x.0.iter().collect();
let y_train: Vec<&tensor::Tensor> = y.0.iter().collect();
let x_test: Vec<&tensor::Tensor> = x.1.iter().collect();
let y_test: Vec<&tensor::Tensor> = y.1.iter().collect();

let (x_train, y_train, x_test, y_test) = (
x_train.to_vec(),
y_train.to_vec(),
x_test.to_vec(),
y_test.to_vec(),
);
println!(
"Train data {}x{}: {} => {}",
x_train.len(),
x_train[0].shape,
x_train[0].data,
y_train[0].data
);
println!(
"Test data {}x{}: {} => {}",
x_test.len(),
x_test[0].shape,
x_test[0].data,
y_test[0].data
);
let x_train = load_images("./examples/datasets/mnist/train-images-idx3-ubyte").unwrap();
let y_train = load_labels("./examples/datasets/mnist/train-labels-idx1-ubyte", 10f32).unwrap();

let x_train: Vec<&tensor::Tensor> = x_train.iter().collect();
let y_train: Vec<&tensor::Tensor> = y_train.iter().collect();

let mut times: Vec<time::Duration> = Vec::new();

for _ in 0..10 {
for iteration in 0..10 {
let start = time::Instant::now();

// Create the network
let mut network = network::Network::new(tensor::Shape::Vector(4));

network.dense(50, activation::Activation::ReLU, false, Some(0.1));
network.dense(50, activation::Activation::ReLU, false, Some(0.1));
network.dense(3, activation::Activation::Softmax, false, Some(0.1));
let mut network = network::Network::new(tensor::Shape::Tensor(1, 14, 14));

network.convolution(
8,
(3, 3),
(1, 1),
(0, 0),
activation::Activation::ReLU,
Some(0.05),
);
network.maxpool((2, 2), (2, 2));
network.dense(10, activation::Activation::Softmax, true, None);

network.set_optimizer(optimizer::Optimizer::RMSprop(optimizer::RMSprop {
network.set_optimizer(optimizer::Optimizer::Adam(optimizer::Adam {
learning_rate: 0.001,
alpha: 0.0,
decay: None,
beta1: 0.9,
beta2: 0.999,
epsilon: 1e-8,

decay: Some(0.01),
momentum: Some(0.01),
centered: Some(true),

// To be filled by the network:
velocity: vec![],
gradient: vec![],
buffer: vec![],
momentum: vec![],
}));
network.set_objective(
objective::Objective::CrossEntropy, // Objective function
Some((-1f32, 1f32)), // Gradient clipping
None, // Gradient clipping
);

// Train the network
let (_train_loss, _val_loss) = network.learn(&x_train, &y_train, None, 25, 500, Some(50));
let (train_loss, _) = network.learn(&x_train, &y_train, None, 128, 10, None);

println!("Iteration: {}, Loss: {:?}", iteration, train_loss);

let duration = start.elapsed();
times.push(duration);
Expand Down
20 changes: 5 additions & 15 deletions examples/example_mnist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fn load_images(path: &str) -> Result<Vec<tensor::Tensor>> {
}
image.push(row);
}
images.push(tensor::Tensor::tensor(vec![image]).resize(tensor::Shape::Tensor(1, 14, 14)));
images.push(tensor::Tensor::tensor(vec![image]).resize(tensor::Shape::Tensor(1, 10, 10)));
}

Ok(images)
Expand Down Expand Up @@ -67,7 +67,7 @@ fn main() {
let x_test: Vec<&tensor::Tensor> = x_test.iter().collect();
let y_test: Vec<&tensor::Tensor> = y_test.iter().collect();

let mut network = network::Network::new(tensor::Shape::Tensor(1, 14, 14));
let mut network = network::Network::new(tensor::Shape::Tensor(1, 10, 10));

network.convolution(
8,
Expand All @@ -78,15 +78,6 @@ fn main() {
Some(0.05),
);
network.maxpool((2, 2), (2, 2));
// network.convolution(
// 8,
// (3, 3),
// (1, 1),
// (0, 0),
// activation::Activation::ReLU,
// None,
// );
network.dense(512, activation::Activation::ReLU, true, Some(0.25));
network.dense(10, activation::Activation::Softmax, true, None);

network.set_optimizer(optimizer::Optimizer::Adam(optimizer::Adam {
Expand All @@ -106,13 +97,12 @@ fn main() {
println!("{}", network);

// Train the network
let (train_loss, val_loss) =
network.learn(&x_train, &y_train, Some((0.1, 5)), 128, 50, Some(1));
let (train_loss, val_loss) = network.learn(&x_train, &y_train, Some((0.1, 5)), 128, 5, Some(1));
plot::loss(&train_loss, &val_loss, "Loss per epoch", "loss.png");

// Validate the network
let (val_loss, val_acc) = network.validate(&x_test, &y_test, 0.1);
println!("1. Validation acc: {}, loss: {}", val_acc, val_loss);
let (test_loss, test_acc) = network.validate(&x_test, &y_test, 0.1);
println!("1. Test acc: {}, loss: {}", test_acc, test_loss);

// Use the network
let prediction = network.predict(x_test.get(0).unwrap());
Expand Down
Loading

0 comments on commit 342eb58

Please sign in to comment.