-
Notifications
You must be signed in to change notification settings - Fork 12
/
2011-10-02_lcmm_post_upload
131 lines (100 loc) · 7.39 KB
/
2011-10-02_lcmm_post_upload
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# load libraries
library(ggplot2)
library(catspec)
library(gridExtra)
library(lcmm)
# look at our data
str(dat)
head(dat)
###################
# start exploring #
###################
setwd("graphs")
# looking at everyones x plot #
p1 <- ggplot(dat, aes(x, y, group=id)) + geom_line() + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="One line per person, all subjects")
p2 <- ggplot(dat[dat$id %in% unique(dat$id)[1:300] & dat$totobs>5,], aes(x, y, group=id)) + geom_line() + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Just 300 random subjects with >5 total obs")
p3 <- ggplot(dat[dat$id %in% unique(dat$id)[1:300] & dat$totobs>5,], aes(x, y, group=id)) + geom_smooth(aes(group=id), method="lm", se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Just 300 random subjects with >5 total obs\n straight line per person")
p4 <- ggplot(dat[dat$id %in% unique(dat$id)[1:300] & dat$totobs>5,], aes(x, y, group=id)) + geom_smooth(aes(group=id), method="loess", se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Just 300 random subjects with >5 total obs\n smoothed line per person")
png(paste(Sys.Date(),"_4_bulk_a1c_plots.png",sep=""),width=1000,height=1000, res=90)
grid.arrange(p1,p2,p3,p4, nrow=2)
dev.off()
# smoothing out the plots #
# dat$id <- as.character(dat$id)
p1 <- ggplot(dat[dat$totobs==58,], aes(x, y, group=id, colour=id)) + geom_line() + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Raw")
p2 <- ggplot(dat[dat$totobs==58,], aes(x, y, group=id, colour=id)) + geom_smooth(aes(group=id), method="loess", se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Smoothed", legend.position="none")
p3 <- ggplot(dat[dat$totobs==58,], aes(x, y, group=id, colour=id)) + geom_smooth(aes(group=id), method="loess", se=F, size=2) + geom_line(size=1) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y") + opts(title="Combined", legend.position="none")
png(paste(Sys.Date(),"_3_smoothing_plots.png",sep=""), width=1000, height=500, res=90)
grid.arrange(p1,p2,p3, ncol=3, main="Three subjects with 58 observations" )
dev.off()
# latent class model
dummy <- dat[dat$x0<(160.8) & dat$x0>(67) & dat$rf3 %in% c("95-97","98-00") & dat$rf4 > 9 & dat$totobs>4, ] # & dat$totobs>30 & dat$totobs<36
dummy <- dummy[!dummy$id %in% names(which(table(dummy$id)<4)),]
length(unique(dummy$id))
summary(as.vector(table(dummy$id)))
# trying with 2 groups
dummy$id <- as.factor(dummy$id)
d2 <-lcmm(y~x,random=~x,subject='id',mixture=~x,ng=2,idiag=TRUE,data=dummy,link="linear")
summary(d2)
postprob(d2)
# look at the post probs closer
round(summary(as.numeric(d2$pprob[d2$pprob[,"class"]==1,"prob1"])),2)
round(summary(as.numeric(d2$pprob[d2$pprob[,"class"]==2,"prob2"])),2)
# pull out who is in which class
dummy$id <- as.character(dummy$id)
people2 <- as.data.frame(d2$pprob[,1:2])
dummy$group2 <- factor(people2$class[sapply(dummy$id, function(x) which(people2$id==x))])
# plot check
p1 <- ggplot(dummy, aes(x, y, group=id, colour=group2)) + geom_line() + geom_smooth(aes(group=group2), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw")
p2 <- ggplot(dummy, aes(x, y, group=id, colour=group2)) + geom_smooth(aes(group=id, colour=group2),size=0.5, se=F) + geom_smooth(aes(group=group2), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Smoothed", legend.position="none")
png(paste(Sys.Date(),"_2_latent_class.png",sep=""), width=1000, height=550, res=90)
grid.arrange(p1,p2, ncol=2, main="2 Latent Classes")
dev.off()
# trying with 3 groups #
d3 <- lcmm(y~x,random=~x,subject='id',mixture=~x,ng=3,idiag=TRUE,data=dummy,link="linear")
summary(d3)
postprob(d3)
summary(as.numeric(d3$pprob[d3$pprob[,"class"]==1,"prob1"]))
summary(as.numeric(d3$pprob[d3$pprob[,"class"]==2,"prob2"]))
summary(as.numeric(d3$pprob[d3$pprob[,"class"]==3,"prob3"]))
people3 <- as.data.frame(d3$pprob[,1:2])
dummy$group3 <- factor(people3$class[sapply(as.numeric(dummy$id), function(x) which(people3$id==x))], )
# plot check
p1 <- ggplot(dummy, aes(x, y, group=id, colour=group3)) + geom_line() + geom_smooth(aes(group=group3), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw")
p2 <- ggplot(dummy, aes(x, y, group=id, colour=group3)) + geom_smooth(aes(group=id, colour=group3),size=0.5, se=F) + geom_smooth(aes(group=group3), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Smoothed", legend.position="none")
png(paste(Sys.Date(),"_3_latent_class.png",sep=""), width=1000, height=550, res=90)
grid.arrange(p1,p2, ncol=2, main="3 Latent Classes")
dev.off()
# trying with 4 groups #
d4 <-lcmm(y~x,random=~x,subject='id',mixture=~x,ng=4,idiag=TRUE,data=dummy,link="linear")
summary(d4)
postprob(d4)
summary(as.numeric(d4$pprob[d4$pprob[,"class"]==1,"prob1"]))
summary(as.numeric(d4$pprob[d4$pprob[,"class"]==2,"prob2"]))
summary(as.numeric(d4$pprob[d4$pprob[,"class"]==3,"prob3"]))
summary(as.numeric(d4$pprob[d4$pprob[,"class"]==4,"prob4"]))
people4 <- as.data.frame(d4$pprob[,1:2])
dummy$group4 <- factor(people4$class[sapply(dummy$id, function(x) which(people4$id==x))])
# plot check
p1 <- ggplot(dummy, aes(x, y, group=id, colour=group4)) + geom_line() + geom_smooth(aes(group=group4), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw")
p2 <- ggplot(dummy, aes(x, y, group=id, colour=group4)) + geom_smooth(aes(group=id, colour=group4),size=0.5, se=F) + geom_smooth(aes(group=group4), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Smoothed", legend.position="none")
png(paste(Sys.Date(),"_4_latent_class.png",sep=""), width=1000, height=550, res=90)
grid.arrange(p1,p2, ncol=2, main="4 Latent Classes")
dev.off()
# trying with 3 groups - higher term #
dummy$x2 <- dummy$x^2
d32 <- lcmm(y~x+x2,random=~x+x2,subject='id',mixture=~x+x2,ng=3,idiag=TRUE,data=dummy,link="linear")
summary(d32)
postprob(d32)
summary(as.numeric(d32$pprob[d32$pprob[,"class"]==1,"prob1"]))
summary(as.numeric(d32$pprob[d32$pprob[,"class"]==2,"prob2"]))
summary(as.numeric(d32$pprob[d32$pprob[,"class"]==3,"prob3"]))
people32 <- as.data.frame(d32$pprob[,1:2])
dummy$group32 <- factor(people32$class[sapply(dummy$id, function(x) which(people32$id==x))], )
# dummy$id <- as.character(dummy$id)
# dummy$id <- as.factor(dummy$id)
# plot check
p1 <- ggplot(dummy, aes(x, y, group=id, colour=group32)) + geom_line() + geom_smooth(aes(group=group32), method="loess", size=2, se=F) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class") + opts(title="Raw")# + scale_x_continuous(limits = c(80,250))
p2 <- ggplot(dummy, aes(x, y, group=id, colour=group32)) + geom_smooth(aes(group=id, colour=group32),size=0.5, se=F) + geom_smooth(aes(group=group32), method="loess", size=2, se=T) + scale_y_continuous(limits = c(13,37)) + labs(x="x",y="y",colour="Latent Class")# + opts(title="Smoothed", legend.position="none") + scale_x_continuous(limits = c(80,250))
png(paste(Sys.Date(),"_3_latent_class_higher.png",sep=""), width=1000, height=550, res=90)
grid.arrange(p1,p2, ncol=2, main="3 Latent Classes - higher order term")
dev.off()