-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClassifier.py
36 lines (32 loc) · 1.57 KB
/
Classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import numpy as np
from operator import itemgetter
class KNearestNeighbours:
def __init__(self, data, target, test_point, k):
self.data = data
self.target = target
self.test_point = test_point
self.k = k
self.distances = list()
self.categories = list()
self.indices = list()
self.counts = list()
self.category_assigned = None
@staticmethod
def dist(p1, p2):
"""Method returns the euclidean distance between two points"""
return np.linalg.norm(np.array(p1) - np.array(p2))
def fit(self):
"""Method that performs the KNN classification"""
# Create a list of (distance, index) tuples from the test point to each point in the data
self.distances.extend([(self.dist(self.test_point, point), i) for point, i in zip(self.data, [i for i in range(len(self.data))])])
# Sort the distances in ascending order
sorted_li = sorted(self.distances, key=itemgetter(0))
# Fetch the indices of the k nearest point from the data
self.indices.extend([index for (val, index) in sorted_li[:self.k]])
# Fetch the categories from the train data target
for i in self.indices:
self.categories.append(self.target[i])
# Fetch the count for each category from the K nearest neighbours
self.counts.extend([(i, self.categories.count(i)) for i in set(self.categories)])
# Find the highest repeated category among the K nearest neighbours
self.category_assigned = sorted(self.counts, key=itemgetter(1), reverse=True)[0][0]