-
Notifications
You must be signed in to change notification settings - Fork 4
/
MNIST_DNN.m
103 lines (79 loc) · 2.88 KB
/
MNIST_DNN.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
% This file demonstrates the use of the NNBox on the MNIST figure database
% Using the model from Hinton, G. E., Osindero, S., & Teh, Y. W. (2006). A
% fast learning algorithm for deep belief nets. Neural computation, 18(7),
% 1527-1554.
nnbox_dir = '../';
addpath(fullfile(nnbox_dir, 'networks'));
addpath(fullfile(nnbox_dir, 'costfun'));
addpath(fullfile(nnbox_dir, 'utils'));
%% Load Database --------------------------------------------------------------
[trainX, trainY, testX, testY] = getMNIST();
trainX = double(reshape(trainX, 28*28, 60000)) / 255;
trainY = ((0:9)' * ones(1, 60000)) == (ones(10, 1) * double(trainY'));
testX = double(reshape(testX, 28*28, 10000)) / 255;
testY = ((0:9)' * ones(1, 10000)) == (ones(10, 1) * double(testY'));
%% Setup network --------------------------------------------------------------
% Start with an empty multilayer network skeleton
net = MultiLayerNet();
% Setup first layer
pretrainOpts = struct( ...
'nEpochs', 50, ...
'momentum', 0.7, ...
'lRate', 1e-3, ...
'batchSz', 200, ...
'dropout', 0.3, ...
'displayEvery', 5);
trainOpts = struct( ...
'lRate', 5e-4, ...
'batchSz', 200);
rbm1 = RBM(28*28, 500, pretrainOpts, trainOpts);
% Add first layer
net.add(rbm1);
% Setup second layer
pretrainOpts.nEpochs = 15;
trainOpts = struct( ...
'lRate', 5e-4, ...
'batchSz', 200);
rbm2 = RBM(500, 500, pretrainOpts, trainOpts);
% Add second layer
net.add(rbm2);
%% Pretrain network -----------------------------------------------------------
fprintf('Pretraining first two layers\n');
net.pretrain(trainX); % note: MultilayerNet will pretrain layerwise
%% Train ----------------------------------------------------------------------
% Add fully connected layer above
rbm3 = RBM(500, 2000, pretrainOpts, trainOpts);
net.add(rbm3);
per = Perceptron(2000, 10, trainOpts);
net.add(per);
% Train in a supervized fashion
fprintf('Fine-tuning\n');
trainOpts = struct(...
'nIter', 50, ...
'batchSz', 500, ...
'displayEvery', 3);
train(net, CrossEntropyCost(), trainX, trainY, trainOpts);
%% Results --------------------------------------------------------------------
disp('Confusion matrix:')
[~, tmp] = max(net.compute(testX));
tmp = tmp - 1; % first class is 0
Y = bsxfun(@eq, (0:9)' * ones(1, 10000), tmp);
confusion = double(testY) * double(Y');
disp(confusion);
disp('Classification error (testing):');
disp(mean(sum(Y ~= testY) > 0));
disp('Classification error (training):');
[~, tmp] = max(net.compute(trainX));
tmp = tmp - 1; % first class is 0
Y = bsxfun(@eq, (0:9)' * ones(1, 60000), tmp);
disp(mean(sum(Y ~= trainY) > 0));
disp('Showing first layer weights as filters (20 largest L2 norm)');
weights = net.nets{1}.W;
[~, order] = sort(sum(weights .^2), 'descend');
colormap gray
for i = 1:20
subplot(5, 4, i);
imagesc(reshape(weights(:, order(i)), 28, 28));
axis image
axis off
end