forked from jerudolph13/SimTeach
-
Notifications
You must be signed in to change notification settings - Fork 0
/
simteach_p-value.R
115 lines (84 loc) · 3.81 KB
/
simteach_p-value.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#######################################################################################################
#
# Purpose: To show how simulation can be used to demonstrate the definition of a p-value
#
# Author: Jacqueline E. Rudolph
#
# Required packages: ggplot2, ggpubr
#
# Last Updated: 15 Oct 2020
#
#######################################################################################################
##Load in necessary packages
packages <- c("ggplot2", "ggpubr")
for (package in packages) {
library(package, character.only=T)
}
# Run simulation ----------------------------------------------------------
reps <- 10000 #Number of permutations
n <- 1000 #Number of individuals in each sample
set.seed(123)
rd <- rep(NA,reps) #Vector to hold risk difference results
#Simulate randomized treatment, with P(X=1)=0.5
x <- rbinom(n, 1, 0.5)
#Simulate binary outcome affected by treatment
#P(Y=1)=0.5 and the RD for X is 0.05
p.y <- 0.5 + 0.05*x - 0.05*0.5
y <- rbinom(n, 1, p.y)
#Estimate the RD for X and its p-value
res <- glm(y ~ x, family=gaussian(link="identity"))
rd_est <- summary(res)$coefficients[2]
rd_pvalue <- summary(res)$coefficients[8]
#Estimate the distribution of RDs under the null hypothesis that RD=0
#Randomly assign a new exposure to each individual, still with P(X'=1)=0.5
#For this new exposure, estimate the RD on the original outcome
for (i in 1:reps){
#Draw new randomized exposure
x_new <- rbinom(n, 1, 0.5)
#Using new exposure and original outcome, estimate the RD and save its value in the rd vector
rd[i] <- summary(glm(y ~ x_new, family=gaussian(link="identity")))$coefficients[2]
}
# Summarize ---------------------------------------------------------------
#Calculate the one-sided p-value, by taking the proportion of reps
#that had a RD greater than the RD from the original sample
prop1 <- sum(rd > rd_est)/reps
#Calculate the two-sided p-value, by taking the proportion of reps
#that had a RD more extreme than the RD from the original sample
#In other words, the absolute value of the RD was greater than the original RD
prop2 <- sum(abs(rd) > rd_est)/reps
# Visualize ---------------------------------------------------------------
#Set formatting options
thm <- theme_classic() +
theme(
#Format text
axis.title = element_text(family="Helvetica", size=16, color="black"),
axis.text = element_text(family="Helvetica", size=16, color="black"),
#Use tag to label paneled plots
plot.tag.position="topleft",
plot.tag=element_text(family="Helvetica", size=16, color="black", margin=margin(r=10, b=20)),
#Format axis
axis.line = element_line(size=0.75),
axis.ticks = element_line(size=0.75),
#Add space around plot
plot.margin = unit(c(1, 1, 1, 1), "lines"),
)
#Plot one-sided p-value
p1 <- ggplot() + thm +
#Plot labels
labs(tag="A)", x="\nRisk Difference", y="Density\n") +
#Generate plots
geom_histogram(aes(rd, stat(density)), bins=36, color="gray", fill="lightgray") +
geom_vline(xintercept=rd_est, color="black", size=0.75, linetype="dashed") +
#Define axis options
scale_x_continuous(expand=c(0, 0), limits=c(-0.12, 0.12)) +
scale_y_continuous(expand=c(0, 0), limits=c(0, 15))
#Plot two-sided p-value
p2 <- ggplot() + thm +
labs(tag="B)", x="\nAbsolute Value of Risk Difference", y="Density\n") +
geom_histogram(aes(abs(rd), stat(density)), bins=36, color="gray", fill="lightgray") +
geom_vline(xintercept=rd_est, color="black", size=0.75, linetype="dashed") +
scale_x_continuous(expand=c(0, 0), limits=c(-0.12, 0.12)) +
scale_y_continuous(expand=c(0, 0), limits=c(0, 30))
pdf("./pvalue_fig.pdf", height=5, width=10)
ggarrange(p1, p2, ncol=2)
dev.off()