-
Notifications
You must be signed in to change notification settings - Fork 0
/
neural_network.py
283 lines (217 loc) · 12 KB
/
neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
from mnist_loader import load_data
import os
import pickle
import numpy as np
from time import time
import matplotlib.pyplot as plt
# activation function for hidden units
def relu(value):
return np.maximum(0, value)
# derivative of activation function for hidden units
def d_relu(value):
# change to (value > 0.0) + 0.0 if any problems
return value > 0.0
# activation function for output units
def softmax(value):
exponent = np.exp(value - np.amax(value))
normalized_exponent = exponent / np.sum(exponent)
return normalized_exponent
# implementation of a shallow neural network for classification of handwritten numbers
# trained on MNIST data set
class NeuralNetwork:
# hyper-parameters
# can be altered to observe different results (low learning rate for ReLU)
learning_rate = 0.01
dropout = 0.5
# initialize network with specified number of input units, hidden units and output units
# weights are set to a random number, using normal distribution for hidden weights and uniform distribution
# for output weights. these are modified with np.sqrt() to normalize the variance of the random numbers generated
# in weight matrices, row X holds all connections to previous unit X,
# e. g. row 3 in output_weights holds all weights connected to hidden unit 3
def __init__(self, input_amount, hidden_amount, output_amount):
self.input_amount = input_amount
self.hidden_weights = np.random.randn(input_amount, hidden_amount) * np.sqrt(2.0/input_amount)
self.hidden = np.ones(hidden_amount)
self.output_weights = np.random.rand(hidden_amount, output_amount) / np.sqrt(hidden_amount)
self.output = np.ones(output_amount)
# error measurements
self.average_cross_entropy = []
self.training_error = []
self.test_error = []
# used for training network with all data for a given number of epochs
# training is on-line
def train(self, data, epochs):
print('Training commenced')
# load test data for determining test error after each epoch
test_data = load_data('testing')
# use to determine and save the best weights based on test set error
best_test_error = 100.0
# prevent shuffle to affect data outside function
data = np.copy(data)
# used to avoid repeatedly using len()
data_amount = len(data)
total_rounds = 0
for epoch in range(epochs):
# used to calculate average cross entropy error
cross_entropy_sum = 0
# every epoch, train on-line with all available data
np.random.shuffle(data)
for i in range(data_amount):
# classify current image with training-mode enabled (enables dropout-regularization)
classification = self.classify(data[i][0], mode='training')
# create target output using image label
target = np.zeros(len(self.output))
target[data[i][1]] = 1
# prints every 10000 rounds, to monitor progress of training
if i % 10000 == 0:
print('Starting round ' + str(total_rounds) + ', ' + str(data_amount * epochs - total_rounds) +
' left (' + format(total_rounds*100.0/(data_amount*epochs), '.2f') + '% finished)')
total_rounds += 10000
# calculate error and back-propagate
output_error = (classification - target)
hidden_error = d_relu(self.hidden) * np.dot(self.output_weights, output_error)
# perform weight update
self.output_weights -= self.learning_rate * output_error * np.reshape(self.hidden, (len(self.hidden), 1))
self.hidden_weights -= self.learning_rate * hidden_error * np.reshape(data[i][0], (len(data[i][0]), 1))
# add current cross entropy error
# simplified from -sum(target * log(classification)) as target is non-zero for only one entry
cross_entropy_sum += -np.log(classification[data[i][1]])
# halve learning rate every 5 epochs to learn more fine-tuned weights in later epochs
if epoch % 5 == 0 and epoch != 0:
self.learning_rate -= self.learning_rate / 2.0
# log error after each epoch, used to show progression
self.average_cross_entropy.append(cross_entropy_sum / data_amount)
self.training_error.append(self.test(data))
self.test_error.append(self.test(test_data))
# check if current weight produce lower test set error than best test set error
# if true, save current weights as best weights so far
if self.test_error[-1] < best_test_error:
best_hidden_weights = self.hidden_weights
best_output_weights = self.output_weights
# training is completed, now set weights to the best weights found
# deactivated in final version to choose best network based on average cross entropy
# (if activated, printing of errors must be altered)
# self.hidden_weights = best_hidden_weights
# self.output_weights = best_output_weights
# count number of saved networks, used for naming figures
figure_nr = len(os.listdir('networks'))
# plot training curve
plt.plot(self.average_cross_entropy, 'g-', label='Average cross entropy error')
plt.legend(loc='upper right')
plt.title('Training curve')
plt.xlabel('Epochs (60000 iterations per)')
plt.ylabel('Error')
plt.savefig('figures/nn' + str(figure_nr) + '_ace_error.png')
plt.clf()
# plot training set and test set error
plt.plot(self.training_error, 'b-', label='Training set error')
plt.plot(self.test_error, 'r-', label='Test set error')
plt.legend(loc='upper right')
plt.title('Classification error')
# plt.xlim(0, epochs)
# plt.ylim(0.0, 0.5)
plt.xlabel('Epochs (60000 iterations per)')
plt.ylabel('Error (% misclassified)')
plt.savefig('figures/nn' + str(figure_nr) + '_classification.png')
plt.clf()
print('Training complete')
# used for classifying an image, return a vector with probabilities for each number
# mode used for enabling regularization with dropout when training
def classify(self, image, mode='testing'):
# compute output from hidden units
self.hidden = relu(np.dot(np.transpose(self.hidden_weights), image))
# if in training mode, enable inverted dropout
# currently deactivated as it worsens results on both training set and test set (probable correlation)
# if mode == 'training':
# drop_mask = (np.random.rand(len(self.hidden)) < self.dropout) / self.dropout
# self.hidden *= drop_mask
# compute and return output from output units
return softmax(np.dot(np.transpose(self.output_weights), self.hidden))
# counts number of incorrect classifications for a given data set
def test(self, data):
incorrectly_classified = 0
for i in range(len(data)):
classification = self.classify(data[i][0])
# if best probability in classification is the different from label,
# image was incorrectly classified
if classification.argmax() != data[i][1]:
incorrectly_classified += 1
# return % incorrectly classified
return incorrectly_classified * 100.0 / len(data)
# save network for later use / easy demonstration of correctness
def save_network(self):
# count number of saved networks, used for naming network
nn_amount = len(os.listdir('networks'))
# put relevant info in dictionary
network_info = {'input_amount': self.input_amount,
'hidden_amount': len(self.hidden),
'output_amount': len(self.output),
'output_weights': self.output_weights,
'hidden_weights': self.hidden_weights,
'average_cross_entropy': self.average_cross_entropy,
'training_error': self.training_error,
'test_error': self.test_error}
# save network to given folder with unused name
filename = 'networks/nn' + str(nn_amount) + '.pkl'
file = open(filename, 'wb')
pickle.dump(network_info, file, 2)
file.close()
print('\nNetwork saved as nn' + str(nn_amount) + '.pkl')
# load network for use / demonstration
@staticmethod
def load_network(network_number):
# load network with given network number
filename = 'networks/nn' + str(network_number) + '.pkl'
file = open(filename, 'rb')
network_info = pickle.load(file)
file.close()
# create new network with saved arguments
network = NeuralNetwork(network_info['input_amount'],
network_info['hidden_amount'],
network_info['output_amount'])
# used saved information to recreate saved network
network.hidden_weights = network_info['hidden_weights']
network.output_weights = network_info['output_weights']
network.average_cross_entropy = network_info['average_cross_entropy']
network.training_error = network_info['training_error']
network.test_error = network_info['test_error']
# print relevant information about loaded network
print('\nNetwork nn' + str(network_number) + ' loaded')
print('Network has ' + str(network_info['input_amount']) + ' input units, ' +
str(network_info['hidden_amount']) + ' hidden units, and ' +
str(network_info['output_amount']) + ' output units')
print('Network was trained for ' + str(len(network.average_cross_entropy)) + ' epochs')
print('Average cross entropy error for previous training of nn' + str(network_number) + ' is ' +
str(network.average_cross_entropy[-1]))
return network
# used to create and run neural network
def main():
# used to compute computation time
start = time()
# choose whether to train a new network or load an old one
# -1 for training, desired network number for loading (e.g. 5 loads nn5.pkl)
network_number = -1
if network_number >= 0:
# load network with specified number
network = NeuralNetwork.load_network(network_number)
else:
# create network with desired number of input units, hidden units and output units
network = NeuralNetwork(784, 100, 10)
# select number of desired epochs and commence training
epochs = 30
train_data = load_data('training')
network.train(train_data, epochs)
# only save decent networks
if network.test_error[-1] <= 2.50:
network.save_network()
# print information about classification error, using the latest results found in training
# create a tuple containing size of training data set and test data set (used in printing)
data_sizes = (60000, 10000)
print('\nFor training data, network correctly classifies ' + format((100.0-network.training_error[-1]), '.2f') +
'% of images, for a total of ' + format((100.0-network.training_error[-1])*data_sizes[0]/100, '.0f') + ' images')
print('\nFor test data, network correctly classifies ' + format((100.0-network.test_error[-1]), '.2f') +
'% of images, for a total of ' + format((100.0-network.test_error[-1])*data_sizes[1]/100, '.0f') + ' images')
# print('\nBest test set error: ' + str(min(network.test_error)))
# print total computation time
print('\nComputation time was ' + format(time() - start, '.2f') + ' seconds')
main()