-
Notifications
You must be signed in to change notification settings - Fork 0
/
Models.R
67 lines (47 loc) · 1.55 KB
/
Models.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
library(h2o)
library(dplyr)
library(dummies)
library(devtools)
library(ggbiplot)
library(factoextra)
library(corrplot)
# split training set
train.raw <- readRDS("train.rds")
set.seed(1234)
s=sample(1:nrow(train.raw),3000000)
train <- train.raw[s,]
test <- train.raw[-s,]
train.pca <- train%>%
select(-Id,-groupId,-matchId)
train.pca <- train.pca%>%
select(-winPlacePerc)
###########################################
############## PCA (PCR) ##################
###########################################
# exclude ids ("Id","groupId","matchId")
tr.pca <- prcomp(train.pca,
center = TRUE,
scale. = TRUE)
print(tr.pca)
# eigenvalues
eig.val <- get_eigenvalue(tr.pca)
# scree plot
fviz_eig(tr.pca, addlabels = TRUE, ylim = c(0, 50)) # first five components explained approximately 80% variance
# variable importance
var <- get_pca_var(tr.pca)
fviz_pca_var(tr.pca, alpha.var="contrib")
corrplot(var$contrib, is.corr=FALSE)
# Contributions of variables to PC1
fviz_contrib(tr.pca, choice = "var", axes = 1, top = 10)
# Contributions of variables to PC2
fviz_contrib(tr.pca, choice = "var", axes = 2, top = 10)
# Contributions of variables to PC3
fviz_contrib(tr.pca, choice = "var", axes = 3, top = 10)
# Contributions of variables to PC4
fviz_contrib(tr.pca, choice = "var", axes = 4, top = 10)
# Contributions of variables to PC5
fviz_contrib(tr.pca, choice = "var", axes = 5, top = 10)
# correlation by contribution
fviz_pca_var(tr.pca, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
)