forked from hoytak/diabetic-retinopathy-code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_submission.py
78 lines (52 loc) · 1.84 KB
/
create_submission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import graphlab as gl
import re
import random
from copy import copy
import os
import graphlab.aggregate as agg
import array
import sys
# gl.set_runtime_config("GRAPHLAB_CACHE_FILE_LOCATIONS", os.path.expanduser("~/data/tmp/"))
base_path = os.getcwd()
model_path = base_path + "/nn_256x256/models/"
train_sf = []
test_sf = []
feature_names = []
for n in [0,1,2,3,4]:
try:
Xf_train = gl.SFrame(model_path + "/scores_train_%d" % n)
Xf_test = gl.SFrame(model_path + "/scores_test_%d" % n)
train_sf.append(Xf_train)
test_sf.append(Xf_test)
feature_names += ["scores_%d" % n, "features_%d" %n]
except IOError, ier:
print "Skipping %d" % n, ": ", str(ier)
# Train a simple regressor to classify the different outputs
assert train_sf
for sf in train_sf[1:]:
train_sf[0] = train_sf[0].join(sf, on = ["name", "level"])
for sf in test_sf[1:]:
test_sf[0] = test_sf[0].join(sf, on = "name")
X_train, X_valid = train_sf[0].random_split(0.95)
X_test = test_sf[0]
m = gl.regression.boosted_trees_regression.create(
X_train, target = "level", features = feature_names,
max_iterations=500, validation_set=X_valid,
column_subsample=0.2, row_subsample=1, step_size=0.01)
X_test['level'] = m.predict(X_test).apply(lambda x: min(4, max(0, int(round(x)))))
X_out = X_test[['name', 'level']]
def get_number(s):
n = float(re.match('[0-9]+', s).group(0))
if 'right' in s:
n += 0.5
return n
X_out['number'] = X_out['name'].apply(get_number)
X_out = X_out.sort('number')
X_out.rename({"name" : "image"})
import csv
with open('submission.csv', 'wb') as outfile:
fieldnames = ['image', 'level']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for d in X_out[['image', 'level']]:
writer.writerow(d)