diff --git a/example/is_odd.cr b/example/is_odd.cr index 7476ab9..1178852 100644 --- a/example/is_odd.cr +++ b/example/is_odd.cr @@ -5,13 +5,19 @@ include Gradnite # BIT_SIZE is our input layer size BIT_SIZE = 8 +puts "running with #{BIT_SIZE} bits" + # Layer size is approximately 2/3 of the input size LAYER_SIZE = (BIT_SIZE * 2/3).ceil.to_i mlp = MLP.new(BIT_SIZE, [LAYER_SIZE, LAYER_SIZE, 1]) -max = 255 -nums = (1..max).to_a +# Generate all the numbers that can be represented by BIT_SIZE bits +max = (2 ** BIT_SIZE) - 1 + +puts "building training set of numbers with a ceiling of #{max}" + +nums = (0..max).to_a def num_to_binary_array(n) BIT_SIZE.times.map { |bit| @@ -30,7 +36,7 @@ ypred = [] of Node loss = Node.new(0.0) -epochs = 100 +epochs = 50 epochs.times do |k| # forward pass @@ -49,6 +55,12 @@ epochs.times do |k| } loss.backward + if loss.value < 0.0001 + puts "loss: #{loss.value}" + puts "converged at epoch #{k}" + break + end + # Gradient descent. Nudge all the parameters in the opposite direction of the gradient. # The gradient is showing us the direction that increases the loss, so we want to go the opposite way. # Linear decay of learning rate @@ -68,8 +80,8 @@ def is_odd?(n, mlp) return result > 0.0 end -puts is_odd?(201, mlp) -puts is_odd?(202, mlp) -puts is_odd?(203, mlp) +puts "201 true - #{is_odd?(201, mlp)}" +puts "202 false - #{is_odd?(202, mlp)}" +puts "203 true - #{is_odd?(203, mlp)}" puts "done" diff --git a/src/gradnite/gradnite.cr b/src/gradnite/gradnite.cr index 30c538c..0ebeeef 100644 --- a/src/gradnite/gradnite.cr +++ b/src/gradnite/gradnite.cr @@ -130,8 +130,8 @@ module Gradnite property bias : Node def initialize(input_count : Int64) - @weights = Array.new(input_count) { Node.new(rand) } - @bias = Node.new(rand) + @weights = Array.new(input_count) { Node.new(0.01 * rand) } + @bias = Node.new(0.0) end def run(x : Array(Float64) | Array(Node))