-
Notifications
You must be signed in to change notification settings - Fork 3
/
models.py
95 lines (79 loc) · 3.7 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score
def score_test(model, x_test, y_test):
from pandas import DataFrame
y_pred = model.predict(x_test)
# recode NaNs in y_test.attack_type
y_recode = y_test.attack_type
y_recode[y_recode.isna()] = 'unknown'
labels = ['normal', 'dos', 'probe', 'r2l', 'u2r', 'unknown']
return DataFrame({
'precision' : precision_score(y_recode, y_pred, average=None, labels=labels),
'recall' : recall_score(y_recode, y_pred, average=None, labels=labels),
'F1' : f1_score(y_recode, y_pred, average=None, labels=labels) },
# 'kappa' : cohen_kappa_score(y_test.attack_type, y_pred, labels=labels) },
# 'AUC' : roc_auc_score(y_test.attack_type, y_pred, average=None) },
index=labels)
# normal-vs-rest recoding of classes
def score_test2(model, x_test, y_test):
from pandas import DataFrame
y_pred = model.predict(x_test)
# recode non-normal classes in y_test.attack_type
y_recode = y_test.attack_type
y_recode[y_recode.isna()] = 'unknown'
y_recode[y_recode != 'normal'] = 'attack'
y_pred[y_pred != 'normal'] = 'attack'
labels = ['normal', 'attack']
return DataFrame({
'precision' : precision_score(y_recode, y_pred, average=None, labels=labels),
'recall' : recall_score(y_recode, y_pred, average=None, labels=labels),
'F1' : f1_score(y_recode, y_pred, average=None, labels=labels) },
# 'AUC' : roc_auc_score(y_recode, y_pred, average=None) },
index=labels)
######################################################################
# L O G I S T I C
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
# fit on balanced data
logit = LogisticRegressionCV(Cs=[100,500,750,900], scoring='recall', multi_class='ovr', solver='saga', random_state=4129, n_jobs=-1)
%time logit.fit(x_train_r, y_train_r)
logit.C_
score_test(logit)
# fit on imbalanced data
logit = LogisticRegression(C=31, multi_class='ovr', solver='saga', n_jobs=-1, random_state=4129)
%time logit.fit(x_train_r, y_train_r)
# logit.score(x_test, y_test.iloc[:,1])
score_test(logit)
%time logit.fit(x_train[:, 18:37], y_train)
logit.score(x_test[:, :37], y_test[:,1])
######################################################################
# S V M
from sklearn.svm import LinearSVC, SVC
svc = LinearSVC(C=1, random_state=4129)
%time svc.fit(x_train_r[:, 18:37], y_train_r)
score_test(svc, x_test[:, 18:37])
%time svc.fit(x_train_r, y_train_r)
score_test(svc, x_test)
# kernel SVM
svc = SVC(C=1, random_state=4129)
%time svc.fit(x_train_r[:, 18:37], y_train_r)
score_test(svc)
%time svc.fit(x_train_r, y_train_r)
score_test(svc)
######################################################################
# GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier(max_depth=20, n_estimators=50, verbose=1, random_state=4129)
%time gb.fit(x_train_r, y_train_r)
score_test(gb, x_test, y_test)
score_test(gb, x_corr, y_corr)
score_test2(gb, x_corr, y_corr)
# https://www.analyticsvidhya.com/blog/2016/02/complete-guide-parameter-tuning-gradient-boosting-gbm-python/
# https://datascience.stackexchange.com/questions/14377/tuning-gradient-boosted-classifiers-hyperparametrs-and-balancing-it
from sklearn.model_selection import GridSearchCV, StratifiedKFold
parameters = {'max_depth':[20,50], 'n_estimators':[100,150,250,500]}
cv = StratifiedKFold(3, shuffle=True, random_state=4129)
gs = GridSearchCV(gb, parameters, n_jobs=1, cv=cv, verbose=1, return_train_score=False)
%time gs.fit(x_train_r, y_train_r)
gs.best_params_
score_test(gs.best_estimator_, x_test, y_test)
gs.best_score_
gs.cv_results_