-
Notifications
You must be signed in to change notification settings - Fork 0
/
knnstarter.py
51 lines (37 loc) · 1.66 KB
/
knnstarter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import sklearn
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
from sklearn import linear_model, preprocessing
data = pd.read_csv("car.data")
print(data.head())
le = preprocessing.LabelEncoder()
# convert the non-numeric categories into numeric categories – returns a numpy array
buying = le.fit_transform((data["buying"]))
maint = le.fit_transform((data["maint"]))
door = le.fit_transform((data["door"]))
persons = le.fit_transform((data["persons"]))
lug_boot = le.fit_transform((data["lug_boot"]))
safety = le.fit_transform((data["safety"]))
cls = le.fit_transform((data["class"]))
predict = "class"
X = list(zip(buying, maint, door, persons, lug_boot, safety)) # zip creates tuples
y = list(cls)
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.1)
# choose k = 5
model = KNeighborsClassifier(n_neighbors=9)
model.fit(x_train, y_train)
accuracy = model.score(x_test, y_test)
print("Accuracy rate: ", accuracy)
# Note: for this model, K = 9 yields greater accuracy than K = 11 --> greater K doesn't
# always mean greater accuracy!
predicted = model.predict(x_test)
names = ["unacc", "acc", "good", "vgood"]
for x in range(len(predicted)):
print("Predicted: ", predicted[x], "Data: ", x_test[x], "Actual: ", names[y_test[x]])
'''Now let's return the K-neighbors of a point: their indices in the dataset and
and their distances to each query point. The method takes in a 2D array (hence the outer brackets),
K, and a bool that decides whether or not to return the distances.'''
n = model.kneighbors([x_test[x]], 9, True)
print("n: ", n)