-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomputeGV_1.m
69 lines (54 loc) · 1.82 KB
/
computeGV_1.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
function gv = computeGV_1(theta, v, training_data, training_target, layer_size, lambda)
%UNTITLED7 Summary of this function goes here
% This function will implement the Gauss-newton vector product.
% Every case wis arranged in a column in training_data
%% initialize the variable
% variable used in the forward propagation
[W, b] = dnnParamToStack(theta, layer_size);
[V_w, V_b] = dnnParamToStack(v, layer_size);
case_num = length(training_target);
m = length(V_w);
R_x = cell(m,1);
R_y = cell(m,1);
y = cell(m,1);
dydx = cell(m,1);
G_w = cell(m,1);
G_b = cell(m,1);
%% R operator forward propagation
R_x{1} = V_w{1} * training_data;
R_x{1} = bsxfun(@plus,R_x{1},V_b{1}(:));
unit = W{1} * training_data;
unit = bsxfun(@plus, unit, b{1}(:));
y{1} = sigmoid(unit);
dydx{1} = y{1}.*(1-y{1});
R_y{1} = R_x{1} .* dydx{1};
for i = 2:m
R_x{i} = W{i} * R_y{i-1} + V_w{i} * y{i-1};
R_x{i} = bsxfun(@plus,R_x{i},V_b{i}(:));
%normal propagation
unit = W{i} * y{i-1};
unit = bsxfun(@plus,unit,b{i}(:));
y{i} = sigmoid(unit);
dydx{i} = y{i}.*(1-y{i});
R_y{i} = R_x{i}.*dydx{i};
end;
%% back propagation
% the output layer is different
expension_matrix = eye(layer_size(end));
expended_target = expension_matrix(:,training_target);
R_dEdy = (expended_target./(y{m}.^2)+(1-expended_target)./((1-y{m}).^2)).*R_y{m};
dEdy = -expended_target./y{m} + (1-expended_target)./(1-y{m});
R_dEdx = R_dEdy.*y{m}.*(1-y{m}) + R_x{m}.*(1-2*y{m}).*dydx{m}.*dEdy;
delta = R_dEdx;
%normal back propagation
for i = m:-1:2
G_w{i} = (delta * y{i-1}')./case_num;
G_b{i} = sum(delta,2)./case_num;
delta = W{i}'*delta .* dydx{i-1};
end;
G_w{1} = (delta * training_data')./case_num;
G_b{1} = sum(delta,2)./case_num;
%% convert to vector
gv = dnnParamToVec(G_w,G_b,layer_size);
gv = gv + (lambda./length(training_target)) * v;
end