-
Notifications
You must be signed in to change notification settings - Fork 0
/
NaiveCrossValidation.py
85 lines (64 loc) · 2.68 KB
/
NaiveCrossValidation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import Perceptron
import DatasetTools as dt
import Tests as tests
import numpy as np
"""
bank_marketing: best_dim = 3, 89.0%; best_sigma = 0.001, 88.9%
gender_voice: best_dim = 1, 96.1%; best_sigma = 0.1, 96.4%
mushroom: best_dim = 5, 96.4%; best_sigma = 0.1, 99.4%
"""
# saving the parameters
# TODO: Implement a better way to pass the parameters automatically
# first row is for dim, the second for sigma
# the cols represent the datasets in the following order:
# bank_marketing, gender_voice, mushroom
best_parameters = np.array([
[0, 0, 0], # linear kernel does not need parameters
[3, 1, 5],
[0.001, 0.1, 0.1]])
def cross_validate_parameters(dataset_name, train_set, test_set):
dims = [i for i in range(1, 7)]
sigmas = [0.001, 0.01, 0.1, 1, 2, 5]
accuracies = []
best_accu_and_dim = 0, 0
max = 0
for dim in dims:
clf_poly = Perceptron.Perceptron(dataset_name,
train_set[:, :-1],
train_set[:, -1],
_kernel_=2, epochs=10, _dim_=dim)
accuracy, accuracy_index = tests.best_epoch(clf_poly, test_set, 10)
# clf_poly.fit()
# predicted = clf_poly.predict_set(test_set[:, :-1], test_set[:, -1])
# accuracy = clf_poly.accuracy(test_set[:, -1], predicted)
accuracies.append((accuracy, dim))
for accu, dim in accuracies:
if accu > max:
max = accu
best_accu_and_dim = accu, dim
accuracies = []
best_accu_and_sigma = 0, 0
max = 0
for sigma in sigmas:
clf_gaussian = Perceptron.Perceptron(dataset_name,
train_set[:, :-1],
train_set[:, -1],
_kernel_=3, epochs=10, _sigma_=sigma)
accuracy, accuracy_index = tests.best_epoch(clf_gaussian, test_set, 10)
# clf_gaussian.fit()
# predicted = clf_gaussian.predict_set(test_set[:, :-1], test_set[:, -1])
# accuracy = clf_gaussian.accuracy(test_set[:, -1], predicted)
accuracies.append((accuracy, sigma))
for accu, sigma in accuracies:
if accu > max:
max = accu
best_accu_and_sigma = accu, sigma
print("Best accuracy with associated dim: ", best_accu_and_dim, "\n",
"Best accuracy with associated sigma :", best_accu_and_sigma)
return best_accu_and_dim, best_accu_and_sigma
def main():
dataset_name = "mushroom"
train_set, test_set = dt.load_dataset(dataset_name, 70, standardize=True)
cross_validate_parameters(dataset_name, train_set, test_set)
if __name__ == "__main__":
main()