-
Notifications
You must be signed in to change notification settings - Fork 0
/
ocr.py
120 lines (98 loc) · 4.76 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import csv
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
from collections import namedtuple
import math
import os
import json
"""
This class does some initial training of a neural network for predicting drawn
digits based on a data set in data_matrix and data_labels. It can then be used to
train the network further by calling train() with any array of data or to predict
what a drawn digit is by calling predict().
The weights that define the neural network can be saved to a file, NN_FILE_PATH,
to be reloaded upon initilization.
"""
class OCRNeuralNetwork:
LEARNING_RATE = 0.1
WIDTH_IN_PIXELS = 20
NN_FILE_PATH = 'nn.json'
def __init__(self, num_hidden_nodes, data_matrix, data_labels, training_indices, use_file=True):
self.sigmoid = np.vectorize(self._sigmoid_scalar)
self.sigmoid_prime = np.vectorize(self._sigmoid_prime_scalar)
self._use_file = use_file
self.data_matrix = data_matrix
self.data_labels = data_labels
if (not os.path.isfile(OCRNeuralNetwork.NN_FILE_PATH) or not use_file):
# Step 1: Initialize weights to small numbers
self.theta1 = self._rand_initialize_weights(400, num_hidden_nodes)
self.theta2 = self._rand_initialize_weights(num_hidden_nodes, 10)
self.input_layer_bias = self._rand_initialize_weights(1, num_hidden_nodes)
self.hidden_layer_bias = self._rand_initialize_weights(1, 10)
# Train using sample data
TrainData = namedtuple('TrainData', ['y0', 'label'])
self.train([TrainData(self.data_matrix[i], int(self.data_labels[i])) for i in training_indices])
self.save()
else:
self._load()
def _rand_initialize_weights(self, size_in, size_out):
return [((x * 0.12) - 0.06) for x in np.random.rand(size_out, size_in)]
# The sigmoid activation function. Operates on scalars.
def _sigmoid_scalar(self, z):
return 1 / (1 + math.e ** -z)
def _sigmoid_prime_scalar(self, z):
return self.sigmoid(z) * (1 - self.sigmoid(z))
def _draw(self, sample):
pixelArray = [sample[j:j + self.WIDTH_IN_PIXELS] for j in range(0, len(sample), self.WIDTH_IN_PIXELS)]
plt.imshow(zip(*pixelArray), cmap=cm.Greys_r, interpolation="nearest")
plt.show()
def train(self, training_data_array):
for data in training_data_array:
# Step 2: Forward propagation
y1 = np.dot(np.mat(self.theta1), np.mat(data.y0).T)
sum1 = y1 + np.mat(self.input_layer_bias) # Add the bias
y1 = self.sigmoid(sum1)
y2 = np.dot(np.array(self.theta2), y1)
y2 = np.add(y2, self.hidden_layer_bias) # Add the bias
y2 = self.sigmoid(y2)
# Step 3: Back propagation
actual_vals = [0] * 10 # actual_vals is a python list for easy initialization and
# is later turned into an np matrix (2 lines down).
actual_vals[data.label] = 1
output_errors = np.mat(actual_vals).T - np.mat(y2)
hidden_errors = np.multiply(np.dot(np.mat(self.theta2).T, output_errors), self.sigmoid_prime(sum1))
# Step 4: Update weights
self.theta1 += self.LEARNING_RATE * np.dot(np.mat(hidden_errors), np.mat(data.y0))
self.theta2 += self.LEARNING_RATE * np.dot(np.mat(output_errors), np.mat(y1).T)
self.hidden_layer_bias += self.LEARNING_RATE * output_errors
self.input_layer_bias += self.LEARNING_RATE * hidden_errors
def predict(self, test):
y1 = np.dot(np.mat(self.theta1), np.mat(test).T)
y1 = y1 + np.mat(self.input_layer_bias) # Add the bias
y1 = self.sigmoid(y1)
y2 = np.dot(np.array(self.theta2), y1)
y2 = np.add(y2, self.hidden_layer_bias) # Add the bias
y2 = self.sigmoid(y2)
results = y2.T.tolist()[0]
return results.index(max(results))
def save(self):
if not self._use_file:
return
json_neural_network = {
"theta1": [np_mat.tolist()[0] for np_mat in self.theta1],
"theta2": [np_mat.tolist()[0] for np_mat in self.theta2],
"b1": self.input_layer_bias[0].tolist()[0],
"b2": self.hidden_layer_bias[0].tolist()[0]
}
with open(OCRNeuralNetwork.NN_FILE_PATH, 'w') as nnFile:
json.dump(json_neural_network, nnFile)
def _load(self):
if not self._use_file:
return
with open(OCRNeuralNetwork.NN_FILE_PATH) as nnFile:
nn = json.load(nnFile)
self.theta1 = [np.array(li) for li in nn['theta1']]
self.theta2 = [np.array(li) for li in nn['theta2']]
self.input_layer_bias = [np.array(nn['b1'][0])]
self.hidden_layer_bias = [np.array(nn['b2'][0])]