forked from SimonBlanke/Hyperactive
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathensemble_learning_example.py
110 lines (83 loc) · 3.08 KB
/
ensemble_learning_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
This example shows how you can search for the best models in each layer in a
stacking ensemble.
We want to create a stacking ensemble with 3 layers:
- a top layer with one model
- a middle layer with multiple models
- a bottom layer with multiple models
We also want to know how many models should be used in the middle and bottom layer.
For that we can use the helper function "get_combinations". It works as follows:
input = [1, 2 , 3]
output = get_combinations(input, comb_len=2)
output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]
Instead of numbers we insert models into "input". This way we get each combination
with more than 2 elements. Only 1 model per layer would not make much sense.
The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
"""
import itertools
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from mlxtend.classifier import StackingClassifier
from sklearn.ensemble import (
GradientBoostingClassifier,
RandomForestClassifier,
ExtraTreesClassifier,
)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from hyperactive import Hyperactive
data = load_breast_cancer()
X, y = data.data, data.target
# define models that are later used in search space
gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()
mlp = MLPClassifier()
gnb = GaussianNB()
gpc = GaussianProcessClassifier()
dtc = DecisionTreeClassifier()
knn = KNeighborsClassifier()
lr = LogisticRegression()
rc = RidgeClassifier()
def stacking(opt):
stack_lvl_0 = StackingClassifier(
classifiers=opt["lvl_0"], meta_classifier=opt["top"]
)
stack_lvl_1 = StackingClassifier(
classifiers=opt["lvl_1"], meta_classifier=stack_lvl_0
)
scores = cross_val_score(stack_lvl_1, X, y, cv=3)
return scores.mean()
# helper function to create search space dimensions
def get_combinations(models, comb_len=2):
def _list_in_list_of_lists(list_, list_of_lists):
for list__ in list_of_lists:
if set(list_) == set(list__):
return True
comb_list = []
for i in range(0, len(models) + 1):
for subset in itertools.permutations(models, i):
if len(subset) < comb_len:
continue
if _list_in_list_of_lists(subset, comb_list):
continue
comb_list.append(list(subset))
return comb_list
top = [lr, dtc, gnb, rc]
models_0 = [gpc, dtc, mlp, gnb, knn]
models_1 = [gbc, rfc, etc]
stack_lvl_0_clfs = get_combinations(models_0)
stack_lvl_1_clfs = get_combinations(models_1)
search_space = {
"lvl_1": stack_lvl_1_clfs,
"lvl_0": stack_lvl_0_clfs,
"top": top,
}
hyper = Hyperactive()
hyper.add_search(stacking, search_space, n_iter=20)
hyper.run()