-
Notifications
You must be signed in to change notification settings - Fork 1
/
tuning.R
126 lines (99 loc) · 4.55 KB
/
tuning.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Global Tuning -----------------------------------------------------------
set.seed(69)
cv.folds.caret <- createMultiFolds(y.train, k = 5, times = 5)
# XGBoost Tuning ----------------------------------------------------------
# tunexgboost hyperparameters using bayesian optimization
tune.bayes.xgb <- FALSE
if (tune.bays.xgb == TRUE) {
cv.folds.bayes <- KFold(getinfo(ord.train.xgb, 'label'), nfolds = 5, stratified = FALSE, seed = 0)
xgb.bayes.opt <- BayesianOptimization(xgbCvBayes,
bounds = list(max.depth = c(1L, 15L),
subsample = c(0.3, 0.9),
colsample_bytree = c(0.3, 0.9),
eta = c(0.01, 0.1),
min_child_weight = c(1L,15L),
nrounds = c(500L,1200L)),
init_points = 15, n_iter = 30, acq = "ei",
kappa = 3, eps = 0.5, verbose = TRUE)
}
# grid search to optimize nrounds and eta
tune.caret.xgb <- FALSE
if (tune.caret.xgb == TRUE) {
xgb.caret.train <- train(x = ord.train.m, y = y.train,
method = "xgbTree",
metric = "RMSE",
tuneGrid = XGB_CARET_TUNE_GRID,
trControl = CARET_TRAIN_CTRL)
}
# check performance
check.base.xgb <- FALSE
if (check.base.xgb == TRUE) {
xgb.b.cv <- xgbCVPerformance(model.parameters = XGB_PARS, predictors = ord.train.b.m, y = y.train)
xgb.cv <- xgbCVPerformance(model.parameters = XGB_PARS, predictors = ord.train.m, y = y.train)
xgb.tsne.cv <- xgbCVPerformance(model.parameters = XGB_PARS,
predictors = cbind(ord.train.b.m, tsne$Y[train, ]), y = y.train)
}
# RandomForest Tuning -----------------------------------------------------
tune.caret.rf <- TRUE
if (tune.caret.rf == TRUE) {
rf.caret.train <- train(x = ord.train.b.m, y = y.train,
method = "rf",
metric = "RMSE",
tuneLength = 8,
trControl = CARET_TRAIN_CTRL)
}
# check performance
check.rf <- FALSE
if (check.rf == TRUE) {
rf.ord.cv <- rfCVPerformance(model.parameters = RF_PARS, predictors = ord.train.b.m, y = y.train)
}
# KNN Tuning --------------------------------------------------------------
# Feature selection using simulated annealing
knn.sa <- FALSE
if (knn.sa == TRUE) {
knnSA <- caretSA
ctrl <- safsControl(functions = knnSA,
method = "repeatedcv",
repeats = 5,
## Here are the exact folds to used:
index = cv.folds.caret,
## What should we optimize?
improve = 50,
allowParallel = TRUE)
knn.sa <- safs(x = ord.train.pp,
y = y.train,
iters = 500,
safsControl = ctrl,
method = "knn",
tuneLength = 20,
trControl = CARET_TRAIN_CTRL)
}
tune.caret.knn <- FALSE
if (tune.caret.kn == TRUE) {
knn.caret.train <- train(x = as.matrix(data.frame(ord.train.m)[, PREDICTOR_ATTR]), y = y.train,
method = "knn",
metric = "RMSE",
tuneGrid = KNN_CARET_TUNE_GRID,
trControl = CARET_TRAIN_CTRL)
}
# check performance
check.knn <- FALSE
if (check.knn == TRUE) {
knn.ord.cv <- rfCVPerformance(model.parameters = KNN_PARS, predictors = as.matrix(data.frame(ord.train.m)[, PREDICTOR_ATTR]), y = y.train)
}
# Lasso Tuning --------------------------------------------------------------
tune.caret.lasso <- FALSE
if (tune.caret.kn == TRUE) {
lasso.caret.train <- train(x = ord.train.m, y = y.train,
method = "glmnet",
metric = "RMSE",
tuneLength = 30,
trControl = CARET_TRAIN_CTRL)
}
if (check.knn == TRUE) {
knn.ord.cv <- rfCVPerformance(model.parameters = KNN_PARS, predictors = as.matrix(data.frame(ord.train.m)[, PREDICTOR_ATTR]), y = y.train)
}
# Nnet Tuning -------------------------------------------------------------
ord.train.nn <- cbind(ord.train.nn, y.train)
f <- as.formula(paste("medv ~", paste(n[!n %in% "medv"], collapse = " + ")))
nn <- neuralnet(f,data=train_,hidden=c(5,3),linear.output=T)