-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathch13.R
183 lines (132 loc) · 5.95 KB
/
ch13.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
library(ISwR)
library(MASS)
no.yes <- c("No", "Yes")
smoking <- gl(2,1,8,no.yes)
obesity <- gl(2,2,8,no.yes)
snoring <- gl(2,4,8, no.yes)
n.tot <- c(60,17,8,2,187,85,51,23)
n.hyp <- c(5,2,1,0,35,13,15,8)
supasmoke <- data.frame(smoking, obesity, snoring, n.tot, n.hyp)
print(supasmoke)
#R fits logistic reg in 2 ways:
#1: specify response as a matrix of column of success and one of failure
hyp.tbl <- cbind(n.hyp, n.tot-n.hyp)
print(hyp.tbl)
#then create model
hyp1 <- glm(hyp.tbl~smoking+obesity+snoring, family = binomial("logit"))
#2: give the proportion of sucesses
prop.hyp <- n.hyp/n.tot
glm.hyp <- glm(prop.hyp~smoking+obesity+snoring, family = binomial("logit"), weights=n.tot)
print(glm.hyp)
print(summary(hyp1))
glm.hyp <- glm(prop.hyp~obesity+snoring, family = binomial("logit"), weights=n.tot)
print(summary(glm.hyp))
#analysis of deviance table
print(anova(hyp1, test="Chisq"))
hyp1 <- glm(hyp.tbl~obesity+snoring+smoking, family = binomial("logit"))
print(anova(hyp1, test="Chisq"))
#gives a deviance based test for removing smoking.
print(anova(glm.hyp, test="Chisq"))
print(drop1(glm.hyp, test="Chisq"))
#LRT = Liklihood ratio test, aka deviance change.
#When degrees of freedom = 1 and large sample count, X^2 ~= z^2
#testing factors with more than 2 categories requires the deviance table.
#small sample situations require special attention.
#13.2.2 Connection to test for trends.
print(caesar.shoe)
shoe.score <- 1:6
print(summary(glm(t(caesar.shoe)~shoe.score, binomial)))
print(anova(glm(t(caesar.shoe)~shoe.score, binomial)))
#13.3 Likelihood Profiling
#Summary z tests are based on the Wald approximation, calcs what the approx std. err. of a parameter estimate
#should be if the true value of the parameters were equal to the estimates.
#In small datasets this difference can be a lot.
#This can mess with confidence intervals so you need to do a +-1.96x s.e. calc to do
#can do via MASS package confint which inverts the likelihood ratio test
print(confint(glm.hyp))
print(confint.default(glm.hyp))
plot(profile(glm.hyp))
print(exp(cbind(OR=coef(glm.hyp), confint(glm.hyp))))
juul$menarche <- factor(juul$menarche, labels = c("No", "Yes"))
juul$tanner <- factor(juul$tanner)
juul.girl <- subset(juul, age>8 & age<20 & complete.cases(menarche))
attach(juul.girl)
print(summary(glm(menarche~age, family = binomial)))
print(summary(glm(menarche~age+tanner, family = binomial)))
print(drop1(glm(menarche~age+tanner, binomial), test="Chisq"))
print(predict(glm.hyp))
print(predict(glm.hyp, type="response"))
plot(age, fitted(glm(menarche~age, binomial)))
glm.menarche <- glm(menarche~age, binomial)
Age <- seq(8,20,.1)
newages <- data.frame(age=Age)
predicted.probability <- predict(glm.menarche, newages, type="resp")
plot(predicted.probability ~ Age, type="l")
print(fitted(glm.hyp))
print(prop.hyp)
print(fitted(glm.hyp)*n.tot)
print(data.frame(fit=fitted(glm.hyp)*n.tot, n.hyp, n.tot))
age.group <- cut(age, c(8, 10, 12, 13, 14, 15, 16, 18, 20))
tb <- table(age.group, menarche)
print(tb)
rel.freq <- prop.table(tb,1)[,2]
print(rel.freq)
print(points(rel.freq ~ c(9,11,12.5,13.5,14.5,15.5,17,19), pch=5))
age.gr <- cut(age, c(8,12,13,14,20))
print(summary(glm(menarche~age+age.gr, binomial)))
print(anova(glm(menarche~age+age.gr, binomial)))
print(anova(glm(menarche~age+I(age^2)+I(age^3)+age.gr, binomial)))
glm.menarche <- glm(menarche~age+I(age^2)+I(age^3), binomial)
predicted.probability <- predict(glm.menarche, newages, type="resp")
plot(predicted.probability~Age, type="l")
points(rel.freq~c(9,11,12.5,13.5,14.5,15.5,17,19), pch=5)
#13.8 Exercises
#13.1 In the malaria set, analyze risk of malaria with age and log-transformed
#antibody lvl as explanatory variables.
print(summary(glm(mal~age+log(ab), binomial, data=malaria)))
#13.2 Fit a logistic regression model to the graft.vs.host dataset, prediciting
# gvhd reponse. Use different transformations of the index variable.
#Reduce the model using backwards elimination
attach(graft.vs.host)
type <- factor(type,labels=c("AML", "ALL", "CML"))
m1 <- glm(gvhd~rcpage+donage+type+preg+log(index), binomial)
m1a <- glm(gvhd~rcpage+donage+type+preg+index, binomial)
print(summary(m1))
print(summary(m1a))
drop1(m1, test="Chisq")
print(drop1(update(m1, ~ . -rcpage), test="Chisq"))
print(drop1(update(m1, ~ . -rcpage - type), test="Chisq"))
print(drop1(update(m1, ~ . -rcpage - type - preg), test="Chisq"))
print(summary(m2 <- glm(gvhd~donage+log(index), binomial)))
#13.3 In the analysis of the malaria and graft.vs.host data, try using the
#confint function to find improved confidence intervals for the regression
#coefficents.
confint(m2)
est <- coefficients(summary(m2))[,1]
se <- coefficients(summary(m2))[,2]
est + cbind(qnorm(.025)*se, qnorm(.975)*se)
print(confint.default(m2))
#13.4 Following up on Exercise 8.2 about Rocky Mountain spotted fever
#splitting the data by age groups gives the table below. Does this confirm the
#earlier analysis?
counts <- c(13,40,157,40,21,61)
total <- c(108,264,375,310,181,162)
age <- gl(3,1,6)
type <- gl(2,3,6)
print(anova(glm(counts/total ~ age+type, weights=total, binomial), test="Chisq"))
#13.5 A probit regression is just like a logistic regression,
#but uses a different link function. Try the analysis of the menarche variabl
#in the juul dataset with this link fxn. Does the fit improve?
juul.girl <- transform(subset(juul, age>8 & age<20 & complete.cases(menarche)),
menarche=factor(menarche))
logit.menarche <- glm(menarche~age+I(age^2)+I(age^3), binomial, data=juul.girl)
probit.menarche <- glm(menarche~age+I(age^2)+I(age^3), binomial(probit), data=juul.girl)
print(summary(logit.menarche))
print(summary(probit.menarche))
Age <- seq(8,20,.1)
newages <- data.frame(age=Age)
p.logit <- predict(logit.menarche, newdata=newages, type="resp")
p.probit <- predict(probit.menarche, newdata=newages, type="resp")
matplot(Age, cbind(p.probit, p.logit), type="l")
#Questions: What is actually important to know from this chapter?
#How is fit measured here?