-
Notifications
You must be signed in to change notification settings - Fork 1
/
knn.py
156 lines (129 loc) · 5.27 KB
/
knn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import numpy as np
from utils.similarity import euclidean
class KNN(object):
""" k-Nearest Neighbors algorithm. The principle behind KNN is to
find a predefined number of training samples nearest to the new data point
and classified it based on this training samples.
Attributes:
k: int
Number of neighbours.
data: numpy.array (n_samples, n_features)
Training vectors, where n_samples is the number of samples and
n_features is the number.
of features.
Y: numpy.array (n_samples, 1)
Target array of the training vectors, where n_samples is the
number of samples.
"""
def __init__(self):
self.k = 1
self.data = None
self.Y = None
def __compute_weight(self, neighbours):
""" Weighted points by the inverse of their distance.
Args:
neighbours: list (k_neighbours, (1, 2))
List of the tuples of the neighbours, where k_neighbours is the number of the
nearest neighbours
Returns:
weights: numpy.array
Weights of each neighbour.
"""
weights = []
for dist, i in neighbours:
weights.append(np.power(dist, -2))
return np.array(weights)
def __uniform(self, X):
""" Classification data points. Weight points by the inverse of their
distance. Closer neighbors of a query point will have a greater
influence than neighbors which are further away.
Args:
X: numpy.array (n_samples, n_features)
Test vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns:
prediction: numpy.array
Predicted class for each data point.
"""
prediction = []
for x in X:
neighbours = self.__KNN(x)
idx = [n[1] for n in neighbours]
counts = np.bincount(self.Y[idx])
prediction.append(np.argmax(counts))
return prediction
def __distance_weighted(self, X):
""" Classification data points. All points in each neighborhood are
weighted equally.
Args:
X: numpy.array (n_samples, n_features)
Test vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns:
prediction: numpy.array
Predicted class for each data point.
"""
prediction = []
for x in X:
neighbours = self.__KNN(x)
weights = self.__compute_weight(neighbours)
distances = np.array([n[0] for n in neighbours])
weights *= distances
votes = {}
for n, w in zip(neighbours, weights):
votes[self.Y[n[1]]] = votes.get(self.Y[n[1]], 0) + n[0] * w
prediction.append(max(votes, key=votes.get))
return prediction
def __KNN(self, x):
""" Finds the K-neighbors of a datapoint.
Args:
x: numpy.array (1, n_features)
Vector of the features of the sample, where n_samples is the
number of samples and n_features is the number of features.
Returns:
distances: numpy.array
Distance from datapoint to K nearest neighbors.
"""
distances = []
for i, d in enumerate(self.data):
distances.append((euclidean(x, d), i))
distances.sort(key=lambda x: x[0])
return distances[:self.k]
def fit(self, X, Y, k):
""" Fit the dataset for future classification.
Args:
X: numpy.array (n_samples, n_features)
Test vectors, where n_samples is the number of samples and
n_features is the number of features.
Y: numpy.array (n_samples)
Target array.
k: int
Number of neighbours.
"""
self.data = X
self.Y = Y
self.k = k
def predict(self, X, mode="uniform"):
""" Perform classification.
Args:
X: numpy.array (n_samples, n_features)
Test vectors, where n_samples is the number of samples and
n_features is the number of features.
mode: str
Mode of the weight function used in prediction. Possible values:
"uniform" : uniform weights. All points in each neighborhood
are weighted equally.
"weighted" : weight points by the inverse of their distance.
Closer neighbors of a query point will have a greater influence
than neighbors which are further away.
Returns:
prediction: numpy.array
Predicted class for each data point.
"""
if mode == "uniform":
prediction = self.__uniform(X)
elif mode == "weighted":
prediction = self.__distance_weighted(X)
else:
raise Exception('No such distance mode!')
return prediction