-
Notifications
You must be signed in to change notification settings - Fork 3
/
Pipilines.py
65 lines (57 loc) · 1.61 KB
/
Pipilines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import random
'''
got 0.33 using random because there are 3 types of flowers
'''
from scipy.spatial import distance
#euclidean distance
def eau(a,b):
return distance.euclidean(a,b)
'''
our own classifier accuracy = 0.96
'''
class ScrappyKNN():
#fit function
def fit(self,X_train,y_train):
self.X_train = X_train
self.y_train = y_train
def predict(self,X_test):
#returns an 2d array
predictions = []
for row in X_test:
#using random
#label = random.choice(self.y_train)
#using closest
label = self.closest(row)
predictions.append(label)
return predictions
#using auclidean distance
def closest(self, row):
best_dist = eau(row,self.X_train[0])
best_index = 0
for i in range(1,len(self.X_train)):
dist = eau(row,self.X_train[i])
if dist <best_dist:
best_dist = dist
best_index = i
return self.y_train[best_index]
#import dataset
from sklearn import datasets
iris = datasets.load_iris()
#labels
X = iris.data
y = iris.target
#train and test split
from sklearn.model_selection import train_test_split
X_train, X_test , y_train, y_test = train_test_split(X,y,test_size=0.5)
# bring in ml algorithm
#from sklearn import tree
#my_classifier = tree.DecisionTreeClassifier()
#our own classifier
my_classifier = ScrappyKNN()
#fit predict
my_classifier.fit(X_train,y_train)
predictions = my_classifier.predict(X_test)
print(predictions)
#check
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,predictions))