-
Notifications
You must be signed in to change notification settings - Fork 0
/
C11 - MLWR2 - Improving Model Performance.r
131 lines (103 loc) · 3.39 KB
/
C11 - MLWR2 - Improving Model Performance.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
##### Chapter 11: Improving Model Performance -------------------
# load the credit dataset
credit <- read.csv("credit.csv")
library(caret)
## Creating a simple tuned model ----
# automated parameter tuning of C5.0 decision tree
set.seed(300)
m <- train(default ~ ., data = credit, method = "C5.0")
# summary of tuning results
m
# apply the best C5.0 candidate model to make predictions
p <- predict(m, credit)
table(p, credit$default)
# obtain predicted classes
head(predict(m, credit, type = "raw"))
# obtain predicted probabilities
head(predict(m, credit, type = "prob"))
## Customizing the tuning process ----
# use trainControl() to alter resampling strategy
ctrl <- trainControl(method = "cv", number = 10,
selectionFunction = "oneSE")
# use expand.grid() to create grid of tuning parameters
grid <- expand.grid(.model = "tree",
.trials = c(1, 5, 10, 15, 20, 25, 30, 35),
.winnow = FALSE)
# look at the result of expand.grid()
grid
# customize train() with the control list and grid of parameters
set.seed(300)
m <- train(default ~ ., data = credit, method = "C5.0",
metric = "Kappa",
trControl = ctrl,
tuneGrid = grid)
m
## Bagging ----
# Using the ipred bagged decision trees
library(ipred)
set.seed(300)
mybag <- bagging(default ~ ., data = credit, nbagg = 25)
credit_pred <- predict(mybag, credit)
table(credit_pred, credit$default)
# estimate performance of ipred bagged trees
library(caret)
set.seed(300)
ctrl <- trainControl(method = "cv", number = 10)
train(default ~ ., data = credit, method = "treebag",
trControl = ctrl)
# Using caret's more general bagging function
# create a bag control object using svmBag
str(svmBag)
svmBag$fit
bagctrl <- bagControl(fit = svmBag$fit,
predict = svmBag$pred,
aggregate = svmBag$aggregate)
# fit the bagged svm model
set.seed(300)
svmbag <- train(default ~ ., data = credit, "bag",
trControl = ctrl, bagControl = bagctrl)
svmbag
## Boosting ----
## Using C5.0 Decision Tree (not shown in book)
library(C50)
m_c50_bst <- C5.0(default ~ ., data = credit, trials = 100)
## Using AdaBoost.M1
library(adabag)
# create a Adaboost.M1 model
set.seed(300)
m_adaboost <- boosting(default ~ ., data = credit)
p_adaboost <- predict(m_adaboost, credit)
head(p_adaboost$class)
p_adaboost$confusion
# create and evaluate an Adaboost.M1 model using 10-fold-CV
set.seed(300)
adaboost_cv <- boosting.cv(default ~ ., data = credit)
adaboost_cv$confusion
# calculate kappa
library(vcd)
Kappa(adaboost_cv$confusion)
## Random Forests ----
# random forest with default settings
library(randomForest)
set.seed(300)
rf <- randomForest(default ~ ., data = credit)
rf
library(caret)
ctrl <- trainControl(method = "repeatedcv",
number = 10, repeats = 10)
# auto-tune a random forest
grid_rf <- expand.grid(.mtry = c(2, 4, 8, 16))
set.seed(300)
m_rf <- train(default ~ ., data = credit, method = "rf",
metric = "Kappa", trControl = ctrl,
tuneGrid = grid_rf)
m_rf
# auto-tune a boosted C5.0 decision tree
grid_c50 <- expand.grid(.model = "tree",
.trials = c(10, 20, 30, 40),
.winnow = "FALSE")
set.seed(300)
m_c50 <- train(default ~ ., data = credit, method = "C5.0",
metric = "Kappa", trControl = ctrl,
tuneGrid = grid_c50)
m_c50