-
Notifications
You must be signed in to change notification settings - Fork 0
/
implementations.py
265 lines (197 loc) · 7.96 KB
/
implementations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
from typing import Tuple
import numpy as np
def compute_mse_gradient(y: np.ndarray, x: np.ndarray, w: np.ndarray) -> np.ndarray:
"""
Computes the gradient of MSE
:param y: Target shape (n, )
:param x: Array of observations shape (n, m)
:param w: Weights
:returns gradient of the MSE
"""
e = y - x @ w
return (-1 / x.shape[0]) * x.T @ e
def compute_mse_loss(y: np.ndarray, x: np.ndarray, w: np.ndarray) -> float:
"""
Calculate the MSE loss
:param y: Target shape (n, )
:param x: Array of observations shape (n, m)
:param w: Weights
:returns scalar loss of the function
"""
e = y - x @ w
loss = (1 / (2 * y.shape[0])) * e.T @ e
return loss
def sigmoid(tx: np.ndarray) -> np.ndarray:
"""
Our implementation of a classic sigmoid function
:param tx: feature values of shape (n, m)
:return sigmoid(x)
"""
exp_x = np.exp(tx)
return exp_x / (1 + exp_x)
def predict_probabilities(tx: np.array, w: np.array) -> np.array:
"""
Function that makes class predictions in range [0, 1]
:param tx: feature values of shape (n, m)
:param w: feature weights of shape (m, )
where n - number of data points, m - number of features
:return: probabilities of data points being of class 1, shape (n, )
"""
predicted_prob = sigmoid(tx @ w)
return predicted_prob
def logistic_loss(y: np.ndarray, tx: np.ndarray, w: np.ndarray) -> float:
"""
Computes the negative log likelihood loss
:param y: target labels of shape (n, )
:param tx: training data of shape (n, m)
:param w:
:return:
"""
pred = tx @ w
return np.mean(np.log(1 + np.exp(pred)) - y * pred)
def calculate_gradient_logistic_loss(y: np.ndarray, tx: np.ndarray, w: np.ndarray) -> np.ndarray:
"""
Calculates the gradient of a logistic loss
:param y: target labels of shape (n, )
:param tx: training data of shape (n, m)
:param w: feature weights of shape (m, )
where n - number of data points, m - number of features
:return:
"""
gradient = np.mean(tx * (predict_probabilities(tx, w) - y)[:, np.newaxis], axis=0)
return gradient
# -------------- Models --------------
def least_squares(y: np.ndarray, tx: np.ndarray) -> Tuple[np.ndarray, float]:
"""
Analytical solution of the least squares problem
:param y: target labels of shape (n, )
:param tx: feature values of shape (n, m)
:return:
"""
w = np.linalg.solve((tx.T @ tx), tx.T @ y)
loss = compute_mse_loss(y, tx, w)
return w, loss
def ridge_regression(y: np.ndarray, tx: np.ndarray, lambda_: float) -> Tuple[np.ndarray, float]:
"""
Implementation of a ridge regression, which finds analytical solution to the problem
:param y: target labels of shape (n, )
:param tx: feature values of shape (n, m)
:param lambda_: regularization parameter
:return:
"""
num_objects = tx.shape[0]
num_features = tx.shape[1]
a = tx.T @ tx + 2 * num_objects * lambda_ * np.identity(num_features)
b = tx.T @ y
w = np.linalg.solve(a, b)
loss = compute_mse_loss(y, tx, w)
return w, loss
def least_squares_GD(y: np.ndarray, tx: np.ndarray, initial_w: np.ndarray,
max_iters: int, gamma: float) -> Tuple[np.ndarray, list]:
"""
Gradient Descent algorithm for linear regression
with Mean Squared Error loss function
:param y: target labels of shape (n, )
:param tx: training data of shape (n, m)
:param initial_w: initial weight estimation of shape (m, )
:param max_iters: maximum iterations allowed
:param gamma: learning rate
:return: tuple with the first element being the weights of the model of shape (m, )
and the second element being the final loss
"""
losses = []
w = initial_w
for _ in range(max_iters):
grad = compute_mse_gradient(y, tx, w)
w = w - np.dot(gamma, grad)
loss = compute_mse_loss(y, tx, w)
losses.append(loss)
return w, losses
def least_squares_SGD(y: np.ndarray, tx: np.ndarray, initial_w: np.ndarray,
max_iters: int, gamma: float) -> Tuple[np.ndarray, float]:
"""
Stochastic Gradient Descent algorithm for linear regression
with Mean Squared Error loss function
:param y: target labels of shape (n, )
:param tx: training data of shape (n, m)
:param initial_w: initial weight estimation of shape (m, )
:param max_iters: maximum iterations allowed
:param gamma: learning rate
:return: tuple with the first element being the weights of the model of shape (m, )
and the second element being the final loss
"""
w = initial_w
for _ in range(max_iters):
index = np.random.randint(0, y.shape[0], size=1)
y_n = y[index]
x_n = tx[index]
stoch_grad = compute_mse_gradient(y_n, x_n, w)
w = w - gamma * stoch_grad
loss = compute_mse_loss(y, tx, w)
return w, loss
def least_squares_batch_GD(y: np.ndarray, tx: np.ndarray, initial_w: np.ndarray,
max_iters: int, gamma: float, batch_size: int) -> Tuple[np.ndarray, list]:
"""
Stochastic Gradient Descent algorithm for linear regression
with Mean Squared Error loss function
:param y: target labels of shape (n, )
:param tx: training data of shape (n, m)
:param initial_w: initial weight estimation of shape (m, )
:param max_iters: maximum iterations allowed
:param gamma: learning rate
:param batch_size: batch size for gradient descent to take on each iteration
:return: tuple with the first element being the weights of the model of shape (m, )
and the second element being a list of losses during iterations
"""
w = initial_w
data_n = y.shape[0]
indices = range(data_n)
losses = []
for _ in range(max_iters):
batch_indices = np.random.choice(indices, size=batch_size, replace=False)
y_n = y[batch_indices]
x_n = tx[batch_indices]
batch_grad = compute_mse_gradient(y_n, x_n, w)
w = w - gamma * batch_grad
loss = compute_mse_loss(y, tx, w)
losses.append(loss)
return w, losses
def logistic_regression(y: np.ndarray, tx: np.ndarray, initial_w: np.ndarray,
max_iters: int, gamma: float) -> Tuple[np.ndarray, float]:
"""
Function for training a logistic regression
:param y: target labels of shape (n, )
:param tx: training data of shape (n, m)
:param initial_w: initial weight estimation of shape (m, )
:param max_iters: maximum iterations allowed
:param gamma: learning rate
where n - number of data points, m - number of features
:return: tuple with the first element being the weights of the model of shape (m, )
and the second element being the final loss
"""
w = initial_w
for _ in range(max_iters):
grad = calculate_gradient_logistic_loss(y, tx, w)
w = w - gamma * grad
loss = logistic_loss(y, tx, w)
return w, loss
def reg_logistic_regression(y: np.ndarray, tx: np.ndarray, initial_w: np.ndarray,
max_iters: int, gamma: float, lambda_: float) -> Tuple[np.ndarray, float]:
"""
Function for training a logistic regression
:param y: target labels of shape (n, )
:param tx: training data of shape (n, m)
:param initial_w: initial weight estimation of shape (m, )
:param max_iters: maximum iterations allowed
:param gamma: learning rate
:param lambda_: regularization parameter
where n - number of data points, m - number of features
:return: tuple with the first element being the weights of the model of shape (m, )
and the second element being the final loss
"""
w = initial_w
for i in range(max_iters):
gradient = calculate_gradient_logistic_loss(y, tx, w) + lambda_ * w
w = w - gradient * gamma
loss = logistic_loss(y, tx, w) + (lambda_ / 2) * np.sum(w ** 2)
return w, loss