-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
134 lines (100 loc) · 4.15 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from expand_basis import expand_basis
from sklearn.preprocessing import MinMaxScaler
def read_data():
""" Reads the data for training and test
It extracts training data inputs and labels from `traindata.txt` and test data inputs
from `testinputs.txt`.
Returns:
np.ndarray: Input training data of shape (N, D).
np.ndarray: Training data labels of shape (N, 1).
np.ndarray: Input test data of shape (N', D).
"""
df_train = pd.read_csv("data/traindata.txt", sep=" ", names=range(9), engine="python")
df_train = df_train.sample(len(df_train))
X_train = df_train.iloc[:, :-1].values
y_train = df_train.iloc[:, -1].values.reshape(-1, 1)
X_test = pd.read_csv("data/testinputs.txt", sep=" ", names=range(8), engine="python").values
return X_train, y_train, X_test
def model_fit(Z, y):
""" Fits Linear Regression model on the data
Args:
Z (np.ndarray): Feature engineered inputs of shape (N, D').
y (np.ndarray): Corresponding data labels of shape (N, 1).
Returns:
np.ndarray: Weight for fitted linear regression model of shape (D', 1).
"""
w = np.linalg.inv(Z.T @ Z) @ (Z.T @ y)
# w = np.linalg.lstsq(Z.T @ Z, (Z.T @ y), rcond=None)
return w
def all_train_fit(Xtrain, ytrain, basis):
""" Trains Linear Regression model of the basis on the whole training data
Args:
Xtrain: Input training data of shape (N, D).
ytrain: Corresponding data labels of shape (N, 1).
basis: Basis with the least cross validation MSE loss with values (poly_degree, include_sin, include_log).
Returns:
np.ndarray: Weight for fitted linear regression model of shape (D', 1).
"""
Ztrain = expand_basis(Xtrain, *basis)
w = model_fit(Ztrain, ytrain)
return w
def model_predict(Xtest, w_ls, basis):
""" Runs model prediction on test data with the fitted linear regression weight
Args:
Xtest: Input test data of shape (N, D).
w_ls: Weight for fitted linear regression model of shape (D', 1).
basis: Basis with the least cross validation MSE loss with values (poly_degree, include_sin, include_log).
Returns:
np.ndarray: Labels predicted by the linear regerssion model on test data.
"""
Ztest = expand_basis(Xtest, *basis)
ytest_preds = Ztest @ w_ls
return ytest_preds
def mse(y_true, y_pred):
""" (float) Computes MSE loss between true and prediction values """
return ((y_true.ravel() - y_pred.ravel()) ** 2).mean()
def save_predictions(ytest_preds):
""" Saves results predicted by the model in csv format """
np.savetxt("reports/prediction_results.csv", ytest_preds)
print("Test data predictions written to `reports/prediction_results.csv` file.")
def visualize_cross_validation_mses(cv_results):
""" Generates subplots with mse and its log form across different conditions"""
df_results = pd.DataFrame(cv_results,
columns=["polynomial basis degree", "include sin basis",
"include log basis", "MSE"])
df_results["log10 MSE"] = np.log10(df_results["MSE"])
df_results["sin and log basis"] = df_results.apply(
lambda x: f"log: {x['include log basis']}, sin: {x['include sin basis']}", axis=1)
fig, (ax1, ax2) = plt.subplots(
figsize=(12, 4),
ncols=2
)
sns.lineplot(
df_results[df_results["polynomial basis degree"] < 8],
x="polynomial basis degree",
y="MSE",
hue="sin and log basis",
marker="o",
errorbar=('ci', 95),
err_style='band',
ax=ax1
)
ax1.set_title("MSE v/s polynomial degree")
sns.lineplot(
df_results[df_results["polynomial basis degree"] < 8],
x="polynomial basis degree",
y="MSE",
hue="sin and log basis",
marker="o",
err_style='band',
errorbar=('ci', 95),
ax=ax2
)
ax2.set(yscale="log")
ax2.set_title("MSE v/s polynomial degree in log scale")
# plt.show()
plt.savefig("reports/training_results.jpg")