-
Notifications
You must be signed in to change notification settings - Fork 0
/
bias_correction.R
executable file
·105 lines (88 loc) · 4.8 KB
/
bias_correction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Balance correction of pretreatment variables (three treatment groups)
# df: data frame
# indicator_treatment: column name of the feature indicating the treatment given to each unit
# Treatment groups: treatment_1,treatment_2,control
# Pretreatment characteristics: features
ps_matching <- function(df,indicator_treatment,treatment_1,treatment_2,control,features){
# First pairwise comparison: treatment_1 vs. control
df.t1.ct <- subset(df,indicator_treatment == treatment_1 | indicator_treatment == control)
# Second pairwise comparison: treatment_2 vs. control
df.t2.ct <- subset(df,indicator_treatment == treatment_2 | indicator_treatment == control)
# Propensity score (PS) formula
formula <- as.formula(paste("indicator_treatment ~", paste(features, sep = "", collapse = "+")))
# Assessment before applying matching treatment_1 vs. control
df.t1.ct$indicator_treatment <- ifelse(df.t1.ct$indicator_treatment == treatment_1,1,0)
# Chi-square
prior.chi.t1.ct <- RItools::xBalance(formula, data = df.t1.ct, report = c("chisquare.test"))
print(prior.chi.t1.ct)
# SMD
prior.smd.t1.ct <- tableone::CreateTableOne(vars = features, strata = "indicator_treatment", data = df.t1.ct)
print(prior.smd.t1.ct,smd = TRUE)
addmargins(table(ExtractSmd(prior.smd.t1.ct)>0.1))
# SD
treated.t1 <- (df.t1.ct$indicator_treatment ==1)
prior.std.diff.t1.ct <- apply(df.t1.ct[,features],2,function(x)100*(mean(x[treated.t1])-mean(x[!treated.t1]))/(sqrt(0.5*(var(x[treated.t1])+var(x[!treated.t1])))))
abs(prior.std.diff.t1.ct)
# Assessment before applying matching treatment_2 vs. control
df.t2.ct$indicator_treatment <- ifelse(df.t2.ct$indicator_treatment == treatment_2,1,0)
# Chi-square
prior.chi.t2.ct <- RItools::xBalance(formula, data = df.t2.ct, report = c("chisquare.test"))
print(prior.chi.t2.ct)
# SMD
prior.smd.t2.ct <- tableone::CreateTableOne(vars = features, strata = "indicator_treatment", data = df.t2.ct)
print(prior.smd.t2.ct,smd = TRUE)
addmargins(table(ExtractSmd(prior.smd.t2.ct)>0.1))
# SD
treated.t2 <- (df.t2.ct$indicator_treatment ==1)
prior.std.diff.t2.ct <- apply(df.t2.ct[,features],2,function(x)100*(mean(x[treated.t2])-mean(x[!treated.t2]))/(sqrt(0.5*(var(x[treated.t2])+var(x[!treated.t2])))))
abs(prior.std.diff.t2.ct)
# Applying matching for df.t1.ct (treatment_1 vs. control)
set.seed(100)
match.t1.ct <- MatchIt::matchit(formula, data = df.t1.ct, method = "optimal", ratio = 1)
t1.ct.mtch <- MatchIt::match.data(match.t1.ct)
summary(match.t1.ct)
# Check balance after matching: df.t1.ct (treatment_1 vs. control)
# Chi-square
post.Chisquare.t1.ct <- RItools::xBalance(formula, data = t1.ct.mtch, report = c("chisquare.test"))
print(post.Chisquare.t1.ct)
# SMD
post.smd.t1.ct <- tableone::CreateTableOne(vars = features, strata = "indicator_treatment", data = t1.ct.mtch)
post.smd.t1.ct <- print(aft.smd.t1.ct,smd = TRUE)
t1.ct.mtch$indicator_treatment <- ifelse(t1.ct.mtch$indicator_treatment == 1, treatment_1,control)
# Matching df.t2.ct (treatment_2 vs. control)
set.seed(100)
match.t2.ct <- MatchIt::matchit(formula, data = df.t2.ct, method = "optimal", ratio = 1)
t2.ct.mtch <- MatchIt::match.data(match.t2.ct)
summary(match.t2.ct)
# Check balance after matching: df.t2.ct (treatment_2 vs. control)
# Chi-square
post.Chisquare.t2.ct <- RItools::xBalance(formula, data = t2.ct.mtch, report = c("chisquare.test"))
print(post.Chisquare.t2.ct)
# SMD
post.smd.t2.ct <- tableone::CreateTableOne(vars = features, strata = "indicator_treatment", data = t2.ct.mtch)
post.smd.t2.ct <- print(post.smd.t2.ct,smd = TRUE)
addmargins(table(ExtractSmd(post.smd.t2.ct)>0.1))
t2.ct.mtch$indicator_treatment <- ifelse(t2.ct.mtch$indicator_treatment == 1, treatment_2,control)
# Final DF
duprows <- rownames(t1.ct.mtch) %in% rownames(t2.ct.mtch)
df <- rbind(t2.ct.mtch, t1.ct.mtch[!duprows,])
summary <- list(dataframe=NULL,
ini.Chisquare.t1.ct=NULL,
ini.std.diff.t1.ct=NULL,
ini.Chisquare.t2.ct=NULL,
ini.std.diff.t2.ct=NULL,
aft.Chisquare.t1.ct =NULL,
aft.smd.t1.ct=NULL,
aft.Chisquare.t2.ct=NULL,
aft.smd.t2.ct=NULL)
summary$dataframe <- df
summary$Prior_Chi_T1 <- prior.Chisquare.t1.ct
summary$Prior_SD_diff_T1 <- prior.std.diff.t1.ct
summary$Prior_Chi_T2 <- prior.Chisquare.t2.ct
summary$Prior_SD_diff_T2 <- prior.std.diff.t2.ct
summary$Post_Chi_T1 <- post.Chisquare.t1.ct
summary$Post_SD_diff_T1 <- post.smd.t1.ct
summary$Post_Chi_T2 <- post.Chisquare.t2.ct
summary$Post_SD_diff_T2 <- post.smd.t2.ct
return(summary)
}