-
Notifications
You must be signed in to change notification settings - Fork 5
/
LOGISTIC REG WC 2019.R
104 lines (59 loc) · 2.51 KB
/
LOGISTIC REG WC 2019.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
## LOGISTIC REGRESSION - WORLD CUP 2019
wc = read.csv('WC_Train.csv')
## Data From 2007 World Cup till 2018 Cricket Matches
train = wc[which(wc$Year >= 2007 & wc$Year <=2019),]
#train = wc
attach(train)
## Creat dummy variable sfor Team A and Team B TRAIN
Team.A.matrix = model.matrix(~ Trim.Team.A - 1, data = train)
train = data.frame(train, Team.A.matrix)
Team.B.matrix = model.matrix(~ Trim.Team.B - 1, data = train)
train = data.frame(train, Team.B.matrix)
## Remove Year, Team A & Team B
train = train[, -c(1,3,4,6,8,5,9)]
## Randomly Arrange Dataset
library(dplyr)
train = sample_frac(train, 1)
## TEST DATA SET
test1 = read.csv('Test_Final.csv')
## UPDATE AS ON 08-7-2019
## TRAIN DATASET ALSO INCLUDED WC MATCHES
## PREDICTING WC 2019 FOr SEMI-FINALS
test1 = test1[-c(1:38),] ## For Final
Team.A.matrix.test = model.matrix(~ Trim.Team.A - 1, data = test1)
test1 = data.frame(test1, Team.A.matrix.test)
Team.B.matrix.test = model.matrix(~ Trim.Team.B - 1, data = test1)
test1 = data.frame(test1, Team.B.matrix.test)
test = test1[, -c(1,3,4,6,5,7)]
## Build Logistic Regression Model
attach(train)
library(SDMTools)
#logit = Team.A.Won ~ Trim.Team.AAustralia + Trim.Team.ABangladesh + Trim.Team.AEngland +
# Trim.Team.ASouth.Africa + Trim.Team.ASri.Lanka + Trim.Team.BAustralia +
# Trim.Team.BBangladesh + Trim.Team.BIndia + Trim.Team.BPakistan + Trim.Team.BSouth.Africa
logit = Team.A.Won ~ . # Few Variables arenot significant, However, due to Teams we decided to consider All variables.
logit.plot = glm(logit, data = train, family = binomial)
summary(logit.plot)
test1$predict.logit = predict.glm(logit.plot, newdata = test1, type = 'response')
test1$Team.A.Win = ifelse(test1$predict.logit > 0.5,1,0)
test1 = test1[, -c(6:25)]
test1 = test1[, -5] ## Remove Ground Variable - Not included in Study
View(test1)
write.csv(test1, 'Logistic Regression Prediction SemiFinals Final.csv')
## Model Evaluation
attach(test1)
m3.matrix = confusion.matrix(test1$Team.A.Win, predict.logit, threshold = 0.5)
m3.matrix
library(pROC)
m3.roc = roc(test1$Team.A.Win, predict.logit)
m3.roc
plot(m3.roc)
## ON RESULT RATIOS DATA SET
accuracy.logit<-sum(diag(m3.matrix))/sum(m3.matrix)
accuracy.logit
loss.logit<-m3.matrix[1,2]/(m3.matrix[1,2]+m3.matrix[1,1])
loss.logit
opp.loss.logit<-m3.matrix[2,1]/(m3.matrix[2,1]+m3.matrix[2,2])
opp.loss.logit
tot.loss.logit<-0.95*loss.logit+0.05*opp.loss.logit
tot.loss.logit