-
Notifications
You must be signed in to change notification settings - Fork 2
/
createsimplemodels.py
93 lines (88 loc) · 2.7 KB
/
createsimplemodels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""Create simple surrogate models from data in the folder dtlzdatasets."""
import pickle
import warnings
from os import getcwd, listdir
import numpy as np
import pandas as pd
from sklearn import ensemble, svm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split as tts
from sklearn.neural_network import MLPRegressor
warnings.filterwarnings("ignore")
folder = getcwd() + "/wfgdatasets/"
files = listdir(folder)
outputfolder = folder + "modellingresults/"
numfiles = len(files)
i = 0
R2results = pd.DataFrame(
np.zeros((numfiles, 5)), columns=["file", "SVM", "NN", "Ada", "GPR"]
)
models = pd.DataFrame(
np.full((numfiles, 5), np.nan), columns=["file", "SVM", "NN", "Ada", "GPR"]
)
for file in files:
print("File", i + 1, "of", numfiles)
fullfilename = folder + file
data = pickle.load(open(fullfilename, "rb"))
inputs = data[data.columns[0:-2]]
f1 = data["f1"]
f2 = data["f2"]
R2results["file"][i] = file
models["file"][i] = file
inputs_train, inputs_test, f2_train, f2_test = tts(inputs, f2)
# SVM
max_score = 0
best_model = None
for j in range(3):
clf = svm.SVR()
clf.fit(inputs_train, f2_train)
pred = clf.predict(inputs_test)
score = r2_score(f2_test, pred)
if score > max_score:
max_score = score
best_model = clf
R2results["SVM"][i] = max_score
models["SVM"][i] = best_model
# NN
max_score = 0
best_model = None
for j in range(3):
clf = MLPRegressor()
clf.fit(inputs_train, f2_train)
pred = clf.predict(inputs_test)
score = r2_score(f2_test, pred)
if score > max_score:
max_score = score
best_model = clf
R2results["NN"][i] = max_score
models["NN"][i] = best_model
# ADABOOST
max_score = 0
best_model = None
for j in range(3):
clf = ensemble.AdaBoostRegressor()
clf.fit(inputs_train, f2_train)
pred = clf.predict(inputs_test)
score = r2_score(f2_test, pred)
if score > max_score:
max_score = score
best_model = clf
R2results["Ada"][i] = max_score
models["Ada"][i] = best_model
# GPR
max_score = 0
best_model = None
for j in range(3):
clf = GaussianProcessRegressor()
clf.fit(inputs_train, f2_train)
pred = clf.predict(inputs_test)
score = r2_score(f2_test, pred)
if score > max_score:
max_score = score
best_model = clf
R2results["GPR"][i] = max_score
models["GPR"][i] = best_model
i = i + 1
R2results.to_csv("R2results.csv")
models.to_csv("models.csv")