-
Notifications
You must be signed in to change notification settings - Fork 0
/
q2_neural.py
executable file
·203 lines (167 loc) · 6.27 KB
/
q2_neural.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import numpy as np
import random
from q1_softmax import softmax
from q2_sigmoid import sigmoid, sigmoid_grad
from q2_gradcheck import gradcheck_naive
def affine_forward(x, w, b):
"""
Computes the forward pass for an affine (fully-connected) layer.
The input x has shape (N, d_1, ..., d_k) and contains a minibatch of N
examples, where each example x[i] has shape (d_1, ..., d_k). We will
reshape each input into a vector of dimension D = d_1 * ... * d_k, and
then transform it to an output vector of dimension M.
Inputs:
- x: A numpy array containing input data, of shape (N, d_1, ..., d_k)
- w: A numpy array of weights, of shape (D, M)
- b: A numpy array of biases, of shape (M,)
Returns a tuple of:
- out: output, of shape (N, M)
- cache: (x, w, b)
"""
out = None
N = x.shape[0]
D = np.prod(x.shape[1:])
M = b.shape[1]
out = np.dot(x.reshape(N, D), w.reshape(D, M)) + b.reshape(1, M)
return out, (x,w,b)
def affine_backward(dout, cache):
"""
Computes the backward pass for an affine layer.
Inputs:
- dout: Upstream derivative, of shape (N, M)
- cache: Tuple of:
- x: Input data, of shape (N, d_1, ... d_k)
- w: Weights, of shape (D, M)
Returns a tuple of:
- dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
- dw: Gradient with respect to w, of shape (D, M)
- db: Gradient with respect to b, of shape (M,)
"""
x, w, b = cache
dx, dw, db = None, None, None
N = x.shape[0]
D = np.prod(x.shape[1:])
M = b.shape[1]
dx = np.dot(dout, w.reshape(D, M).T).reshape(x.shape)
dw = np.dot(x.reshape(N, D).T, dout).reshape(w.shape)
db = np.sum(dout, axis=0)
return dx, dw, db
def sigmoid_forward(x):
"""
Computes the forward pass for a sigmoid activation.
Inputs:
- x: Input data, numpy array of arbitary shape;
Returns a tuple (out, cache)
- out: output of the same shape as x
- cache: identical to out; required for backpropagation
"""
return sigmoid(x), sigmoid(x)
def sigmoid_backward(dout, cache):
"""
Computes the backward pass for an sigmoid layer.
Inputs:
- dout: Upstream derivative, same shape as the input
to the sigmoid layer (x)
- cache: sigmoid(x)
Returns a tuple of:
- dx: back propagated gradient with respect to x
"""
x = cache
return sigmoid_grad(x) * dout
def forward_backward_prop(data, labels, params, dimensions):
"""
Forward and backward propagation for a two-layer sigmoidal network
Compute the forward propagation and for the cross entropy cost,
and backward propagation for the gradients for all parameters.
"""
### Unpack network parameters (do not modify)
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
N = data.shape[0]
W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
### YOUR CODE HERE: forward propagation
hidden = np.dot(data,W1) + b1
layer1_a = sigmoid(hidden)
layer2 = np.dot(layer1_a, W2) + b2
# need to calculate the softmax loss
probs = softmax(layer2)
cost = - np.sum(np.log(probs[np.arange(N), np.argmax(labels, axis=1)]))
### END YOUR CODE
### YOUR CODE HERE: backward propagation
#There is no regularization :/
# dx -> sigmoid -> W2 * layer1_a + b -> sigmoid -> W1 * data + b1 -> ..
dx = probs.copy()
dx -= labels
dlayer2 = np.zeros_like(dx)
gradW2 = np.zeros_like(W2)
gradW1 = np.zeros_like(W1)
gradb2 = np.zeros_like(b2)
gradb1 = np.zeros_like(b1)
gradW2 = np.dot(layer1_a.T, dx)
gradb2 = np.sum(dx, axis=0)
dlayer2 = np.dot(dx, W2.T)
dlayer1 = sigmoid_grad(layer1_a) * dlayer2
gradW1 = np.dot(data.T, dlayer1)
gradb1 = np.sum(dlayer1, axis=0)
# Decided to implement affine (forward and backward function)
# sigmoid (forward and backward function)
# These should work properly;
# scores, cache_1 = affine_forward(data, W1, b1)
# scores, cache_s1 = sigmoid_forward(scores)
# scores, cache_2 = affine_forward(scores, W2, b2)
# # need to calculate the softmax loss
# probs = softmax(scores)
# cost = -np.sum(np.log(probs[np.arange(N), np.argmax(labels)] + 1e-12)) / N
# softmax_dx = probs.copy()
# softmax_dx[np.arange(N), np.argmax(labels,axis=1)] -= 1
# softmax_dx /= N
# grads = {}
# dlayer2, grads['W2'], grads['b2'] = affine_backward(softmax_dx, cache_2)
# dlayer1s = sigmoid_backward(dlayer2, cache_s1)
# dlayer1, grads['W1'], grads['b1'] = affine_backward(dlayer1s, cache_1)
#softmax_dx is the gradient of the loss w.r.t. y_{est}
### END YOUR CODE
### Stack gradients (do not modify)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
return cost, grad
def sanity_check():
"""
Set up fake data and parameters for the neural network, and test using
gradcheck.
"""
print("Running sanity check...")
N = 300
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0]) # each row will be a datum
labels = np.zeros((N, dimensions[2]))
for i in range(N):
labels[i,random.randint(0,dimensions[2]-1)] = 1
params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
dimensions[1] + 1) * dimensions[2], )
#cost, _ = forward_backward_prop(data, labels, params, dimensions)
# # expect to get 1 in 10 correct
#print(np.exp(-cost))
# #cost is roughly correct
gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,
dimensions), params)
def your_sanity_checks():
"""
Use this space add any additional sanity checks by running:
python q2_neural.py
This function will not be called by the autograder, nor will
your additional tests be graded.
"""
print("Running your sanity checks...")
### YOUR CODE HERE
#raise NotImplementedError
### END YOUR CODE
if __name__ == "__main__":
sanity_check()
your_sanity_checks()