-
Notifications
You must be signed in to change notification settings - Fork 1
/
agds-iris.py
120 lines (97 loc) · 3.39 KB
/
agds-iris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import csv
from pprint import pprint
from copy import deepcopy
iris_map = {
'Iris-setosa': 1,
'Iris-versicolor': 2,
'Iris-virginica': 3
}
class Iris:
def __init__(self, class_name, petal_length, sepal_length, petal_width, sepal_width):
self.class_name = class_name
self.petal_length = petal_length
self.petal_width = petal_width
self.sepal_length = sepal_length
self.sepal_width = sepal_width
self.weight = 0.2
self.params = {
'class': iris_map[self.class_name],
'petal_length': self.petal_length,
'petal_width': self.petal_width,
'sepal_length': self.sepal_length,
'sepal_width': self.sepal_width
}
def __repr__(self):
return "(id: {}, class: {}, {}-{}-{}-{})".format(
id(self),
self.class_name,
self.petal_length,
self.petal_width,
self.sepal_length,
self.sepal_width
)
def load_data(filename):
params = {}
irises = []
with open(filename, newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
iris = Iris(
row['class'],
row['petal_length'],
row['sepal_length'],
row['petal_width'],
row['sepal_width']
)
irises.append(iris)
for key in row.keys():
if key not in params:
params[key] = {}
for key, val in row.items():
if val not in params[key] and val.startswith('Iris'):
params[key][iris_map[val]] = []
elif val not in params[key]:
params[key][val] = []
if val.startswith('Iris'):
params[key][iris_map[val]].append(iris)
else:
params[key][val].append(iris)
return params, irises
def calculate_weights(params, iris):
weights = deepcopy(params)
iris.weight = 1.0
#find me
iri_value = {'class': iris_map[iris.class_name]}
for param in params:
for value in params[param]:
if iris in params[param][value]:
try:
iri_value[param] = float(value)
except ValueError:
iri_value[param] = iris_map[value]
weights[param][value] = 1.0
for param in params:
mx = max([float(x) for x in params[param].keys()])
mn = min([float(x) for x in params[param].keys()])
print(mx, mn)
for value in params[param]:
if float(value) != iri_value[param]:
weights[param][value] = 1 - abs(float(value) - iri_value[param]) / (mx - mn)
return weights
def get_similarities(weights, irises):
similarities = []
for iris in irises:
tmp = []
for param in weights:
if isinstance(weights[param][iris.params[param]], float):
tmp.append(iris.weight * weights[param][iris.params[param]])
similarities.append(
(iris, round(sum(tmp)*100, 2))
)
return similarities
params, irises = load_data('../data/training_data/iris-with-header.data')
looking_for = irises[92]
weights = calculate_weights(params, looking_for)
pprint(weights)
similar = sorted(get_similarities(weights, irises), key=lambda x: x[1])
#pprint(similar)