Skip to content

Commit

Permalink
update synthdid
Browse files Browse the repository at this point in the history
  • Loading branch information
mikenguyen13 committed Jan 12, 2024
1 parent a396266 commit b7ab9f9
Show file tree
Hide file tree
Showing 348 changed files with 10,469 additions and 472 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ export(balance_assessment)
export(balance_scatter_custom)
export(get_balanced_panel)
export(nice_tab)
export(panel_estimate)
export(plot_PanelEstimate)
export(plot_coef_par_trends)
export(plot_covariate_balance_pretrend)
export(plot_density_by_treatment)
export(plot_par_trends)
export(plot_treat_time)
export(plot_trends_across_group)
export(process_panel_estimate)
export(stack_data)
export(synthdid_est)
export(synthdid_est_ate)
Expand Down
58 changes: 58 additions & 0 deletions R/panel_estimate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Panel Estimate Function
#'
#' This function computes estimates and standard errors for panel data using selected estimators.
#' It allows the user to select specific estimators and set parameters for Monte Carlo replications and seed.
#'
#' @param setup A list containing matrices Y, N0, and T0 for panel data analysis.
#' @param selected_estimators A character vector specifying which estimators to use.
#' Defaults to all available estimators.
#' @param mc_replications The number of Monte Carlo replications for computing standard errors.
#' Applicable if the 'mc' estimator is used. Defaults to 200.
#' @param seed An integer value to set the random seed for reproducibility. Defaults to 1.
#'
#' @return A list where each element corresponds to an estimator and contains its estimate and standard error.
#'
#' @examples
#' \dontrun{
#' data('california_prop99')
#' setup = panel.matrices(california_prop99)
#' results_all = panel_estimate(setup)
#' results_selected = panel_estimate(setup, selected_estimators = c("did", "sc"))
#' summary(results_selected$did$estimate)
#' }
#'
#' @export
panel_estimate <-
function(setup,
selected_estimators = names(panel_estimators),
mc_replications = 200,
seed = 1) {
set.seed(seed)
# Subset the list of estimators based on user selection
estimators_to_use = panel_estimators[selected_estimators]

# Compute estimates
estimates = lapply(estimators_to_use, function(estimator) {
estimator(setup$Y, setup$N0, setup$T0)
})

# Compute standard errors
standard.errors = mapply(function(estimate, name) {
if (name == 'mc') {
mc_placebo_se(setup$Y, setup$N0, setup$T0, replications = mc_replications)
} else {
sqrt(vcov(estimate, method = 'placebo'))
}
}, estimates, names(estimators_to_use))

# Combine estimates and standard errors
results = Map(function(est, se)
list(estimate = est, std.error = se),
estimates,
standard.errors)

# Name the results with estimator names
names(results) = names(estimators_to_use)

return(results)
}
35 changes: 35 additions & 0 deletions R/process_panel_estimate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' Process Panel Estimate
#'
#' This function processes the output from `panel_estimate()` for panel estimates and returns a formatted data frame.
#' It takes a list of results, each corresponding to a different method, and combines them into a single data frame.
#' The data frame includes the method name, estimate, and standard error for each method.
#'
#' @param results_selected A list of results from `panel_estimate()`. Each element in the list should be an object
#' containing the results for a particular estimation method. Each object must have an `estimate` and a `std.error` attribute.
#'
#' @return A data frame with columns `Method`, `Estimate`, and `SE`, representing the method name, the estimate value,
#' and the standard error, respectively. The data frame is formatted using `causalverse::nice_tab()`.
#'
#' @examples
#' \dontrun{
#' library(synthdid)
#' setup = synthdid::panel.matrices(synthdid::california_prop99)
#' results_selected = panel_estimate(setup, selected_estimators = c("did", "sc"))
#' results_table = process_panel_estimate(results_selected)
#' print(results_table)
#' }
#'
#' @export
process_panel_estimate <- function(results_selected) {
# Create the data frame from the results
results_df <- do.call(rbind, lapply(names(results_selected), function(name) {
data.frame(
Method = toupper(name),
Estimate = as.numeric(results_selected[[name]]$estimate),
SE = as.numeric(results_selected[[name]]$std.error)
)
})) |>
causalverse::nice_tab()

return(results_df)
}
64 changes: 52 additions & 12 deletions R/synthdid_est.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#' Synthetic DID Estimation Using synthdid Package
#'
#' This function estimates synthetic difference-in-differences using the `synthdid` package.
#' It differs from `synthdid::synthdid_estimate` in that it calculates treatment effects (TEs)
#' It offers a choice among `synthdid_estimate`, `did_estimate`, and `sc_estimate` methods
#' for estimation, defaulting to `synthdid_estimate`. It calculates treatment effects (TEs)
#' for each period instead of a single TE for all treated periods.
#'
#' @param data Data frame to analyze.
Expand All @@ -15,6 +16,25 @@
#' @param treat_stat_var Name of the treatment indicator column.
#' @param outcome_var Name of the outcome variable column.
#' @param seed A numeric value for setting the random seed (only for placebo SE). Default is 1.
#' @param method The estimation method to be used. Methods include:
#' - 'did': Difference-in-Differences.
#' - 'sc': Synthetic Control Method.
#' - 'sc_ridge': Synthetic Control Method with Ridge Penalty. It adds a ridge regularization to the synthetic control method when estimating the synthetic control weights.
#' - 'difp': De-meaned Synthetic Control Method, as proposed by Doudchenko and Imbens (2016) and Ferman and Pinto (2021).
#' - 'difp_ridge': De-meaned Synthetic Control with Ridge Penalty. It adds a ridge regularizationd when estimating the synthetic control weights.
#' - 'synthdid': Synthetic Difference-in-Differences, a method developed by Arkhangelsky et al. (2021)
#' Defaults to 'synthdid'.
#' @references
#' Ferman, B., & Pinto, C. (2021). Synthetic controls with imperfect pretreatment fit.
#' Quantitative Economics, 12(4), 1197-1221.
#'
#' Doudchenko, Nikolay, and Guido W. Imbens. 2016.
#' “Balancing, Regression, Difference-in-Differences and Synthetic Control Methods: A Synthesis.”
#' NBER Working Paper 22791.
#'
#' Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S. (2021).
#' Synthetic difference-in-differences.
#' American Economic Review, 111(12), 4088-4118.
#' @return A list containing the estimated treatment effects, standard errors, observed and predicted outcomes, synthetic control lambda weights, and counts of treated and control units.
#' @export
#' @examples
Expand Down Expand Up @@ -58,7 +78,8 @@ synthdid_est <-
treated_period_var,
treat_stat_var,
outcome_var,
seed = 1) {
seed = 1,
method = "synthdid") {


# Convert treatment variable to logical and select necessary columns
Expand All @@ -85,31 +106,50 @@ synthdid_est <-
treatment = treat_stat_var
)

# Run synthdid estimation
sdid <- synthdid::synthdid_estimate(setup$Y, setup$N0, setup$T0)
# Run estimation based on the specified method
estimation_res <- switch(
method,
"did" = synthdid::did_estimate(setup$Y, setup$N0, setup$T0),
"sc" = synthdid::sc_estimate(setup$Y, setup$N0, setup$T0),
"sc_ridge" = synthdid::sc_estimate(setup$Y, setup$N0, setup$T0,
eta.omega = ((nrow(setup$Y) - setup$N0) * (ncol(setup$Y) - setup$T0)) ^ (1 / 4)),
"difp" = synthdid::synthdid_estimate(setup$Y,
setup$N0,
setup$T0,
weights = list(lambda = rep(1 / setup$T0, setup$T0)),
eta.omega = 1e-6),
"difp_ridge" = synthdid::synthdid_estimate(setup$Y, setup$N0, setup$T0,
weights = list(lambda = rep(1 / setup$T0, setup$T0))),

# Default case
synthdid::synthdid_estimate(setup$Y, setup$N0, setup$T0)
)


# Subgroup adjustment (i.e., treated are the only subgroup of interest)
if (!is.null(subgroup)) {
# get treated unit IDs
treat_ids <- rownames(setup$Y[-(1:setup$N0), ])

# keep only those in subgroup
keep <- c(rep(TRUE, setup$N0), treat_ids %in% subgroup)
sdid.setup <- attr(sdid, 'setup')
sdid.setup$Y <- sdid.setup$Y[keep, ]
sdid.setup$X <- sdid.setup$X[keep, , drop = FALSE]
attr(sdid, 'setup') <- sdid.setup

# change the setup matrix to contain only that data.
estimation_res.setup <- attr(estimation_res, 'setup')
estimation_res.setup$Y <- estimation_res.setup$Y[keep, ]
estimation_res.setup$X <- estimation_res.setup$X[keep, ,]
attr(estimation_res, 'setup') <- estimation_res.setup
}

# Extract outcomes from synthdid estimates
setup <- attr(sdid, 'setup')
weights <- attr(sdid, 'weights')
setup <- attr(estimation_res, 'setup')
weights <- attr(estimation_res, 'weights')
Y <- setup$Y - synthdid:::contract3(setup$X, weights$beta)
N0 <- setup$N0
T0 <- setup$T0

# Compute treatment effects and standard errors
est <- synthdid_est_per(Y, N0, T0, weights)
se <- synthdid_se_jacknife(sdid, seed = seed)
se <- synthdid_se_jacknife(estimation_res, seed = seed)

return(
list(
Expand Down
29 changes: 25 additions & 4 deletions R/synthdid_est_ate.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,25 @@
#' @param subgroup Vector, IDs for subgroup analysis.
#' @param conf_level Numeric, confidence level for the interval estimation (Default: 95%).
#' @param seed A numeric value for setting the random seed (for placebo SE and placebo analysis). Default is 1.
#' @param method The estimation method to be used. Methods include:
#' - 'did': Difference-in-Differences.
#' - 'sc': Synthetic Control Method.
#' - 'sc_ridge': Synthetic Control Method with Ridge Penalty. It adds a ridge regularization to the synthetic control method when estimating the synthetic control weights.
#' - 'difp': De-meaned Synthetic Control Method, as proposed by Doudchenko and Imbens (2016) and Ferman and Pinto (2021).
#' - 'difp_ridge': De-meaned Synthetic Control with Ridge Penalty. It adds a ridge regularizationd when estimating the synthetic control weights.
#' - 'synthdid': Synthetic Difference-in-Differences, a method developed by Arkhangelsky et al. (2021)
#' Defaults to 'synthdid'.
#' @references
#' Ferman, B., & Pinto, C. (2021). Synthetic controls with imperfect pretreatment fit.
#' Quantitative Economics, 12(4), 1197-1221.
#'
#' Doudchenko, Nikolay, and Guido W. Imbens. 2016.
#' “Balancing, Regression, Difference-in-Differences and Synthetic Control Methods: A Synthesis.”
#' NBER Working Paper 22791.
#'
#' Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S. (2021).
#' Synthetic difference-in-differences.
#' American Economic Review, 111(12), 4088-4118.
#'
#' @return A list containing the following elements:
#' \itemize{
Expand Down Expand Up @@ -81,7 +100,8 @@ synthdid_est_ate <-
pooled = F,
subgroup = NULL,
conf_level = 0.95,
seed = 1
seed = 1,
method = "synthdid"
) {
set.seed(seed)
# Validate input data
Expand Down Expand Up @@ -182,7 +202,7 @@ synthdid_est_ate <-
n_treat = length(unique(balanced_df[balanced_df[,treated_period_var]==adoption_cohort, unit_id_var]))

if (!is.null(subgroup)) {
n_treat = sum(unique(balanced_df[balanced_df[,treated_period_var]==adoption_cohort, unit_id_var]) %in% subgroup)
n_treat = sum(unique(balanced_df[balanced_df[, treated_period_var] == adoption_cohort, unit_id_var]) %in% subgroup)
}

cat("Treated units:", n_treat,"Control units:", n_control ,"\n")
Expand All @@ -207,7 +227,8 @@ synthdid_est_ate <-
treated_period_var = treated_period_var,
treat_stat_var = treat_stat_var,
outcome_var = outcome_var,
seed = seed
seed = seed,
method = method
)

# Save output
Expand All @@ -220,7 +241,7 @@ synthdid_est_ate <-

# Aggregate adoption-cohort-level ATTs
time <- seq(-lags, leads)
col_names <- c(time, paste("cumul.", 0:leads, sep = ""))
col_names <- c(time, paste("cumul.", 0:leads, sep = ""))
TE <- data.frame(TE)
colnames(TE) <- col_names
rownames(TE) <- out_adoption_cohorts
Expand Down
8 changes: 4 additions & 4 deletions R/synthdid_plot_ate.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#'
#' @param est Estimation object from `synthdid_est_ate`.
#' @param show_CI Logical; if TRUE, shows confidence intervals on the plot.
#' @param custom_title String; title of the plot.
#' @param title String; title of the plot.
#' @param xlab String; label for the x-axis.
#' @param ylab String; label for the y-axis.
#' @param y_intercept Numeric; value at which a horizontal line is drawn.
Expand Down Expand Up @@ -40,12 +40,12 @@
#' outcome_var = "y"
#' )
#' # Generate the plot
#' synthdid_plot_ate(est, show_CI = TRUE, custom_title = "Sample ATE Plot")
#' synthdid_plot_ate(est, show_CI = TRUE, title = "Sample ATE Plot")
#' }
synthdid_plot_ate <- function(
est,
show_CI = TRUE,
custom_title = "",
title = "",
xlab = "Relative Time Period",
ylab = "ATE",
y_intercept = 0,
Expand Down Expand Up @@ -80,7 +80,7 @@ synthdid_plot_ate <- function(
# Add labels and apply the specified theme
p <- p +
labs(
title = custom_title,
title = title,
x = xlab,
y = ylab
) +
Expand Down
48 changes: 48 additions & 0 deletions R/utils_synthdid.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,51 @@ sum_normalize <- function(x) {
weighted_avg_SE <- function(se, weights = rep(1, sum(!is.na(se)))) {
sqrt(sum(se ^ 2 * weights ^ 2, na.rm = TRUE) / sum(weights) ^ 2)
}


# Define your estimator functions
mc_estimate = function(Y, N0, T0) {
N1 = nrow(Y) - N0
T1 = ncol(Y) - T0
W <- outer(c(rep(0, N0), rep(1, N1)), c(rep(0, T0), rep(1, T1)))
mc_pred <- MCPanel::mcnnm_cv(Y, 1 - W, num_lam_L = 20)
mc_fit <- mc_pred$L + outer(mc_pred$u, mc_pred$v, '+')
mc_est <- sum(W * (Y - mc_fit)) / sum(W)
return(mc_est)
}
mc_placebo_se = function(Y, N0, T0, replications = 200) {
N1 = nrow(Y) - N0
theta = function(ind) {
mc_estimate(Y[ind, ], length(ind) - N1, T0)
}

res <- sqrt((replications - 1) / replications) * sd(replicate(replications, theta(sample(1:N0))))
return(res)
}

difp_estimate = function(Y, N0, T0) {
synthdid::synthdid_estimate(Y,
N0,
T0,
weights = list(lambda = rep(1 / T0, T0)),
eta.omega = 1e-6)
}

sc_estimate_ridge = function(Y, N0, T0) {
synthdid::sc_estimate(Y, N0, T0, eta.omega = ((nrow(Y) - N0) * (ncol(Y) -
T0)) ^ (1 / 4))
}
difp_estimate_ridge = function(Y, N0, T0) {
synthdid::synthdid_estimate(Y, N0, T0, weights = list(lambda = rep(1 / T0, T0)))
}

panel_estimators = list(
synthdid = synthdid::synthdid_estimate,
did = synthdid::did_estimate,
sc = synthdid::sc_estimate,
sdid = synthdid::synthdid_estimate,
difp = difp_estimate,
mc = mc_estimate,
sc_ridge = sc_estimate_ridge,
difp_ridge = difp_estimate_ridge
)
Binary file modified causalverse_0.0.0.9000.pdf
Binary file not shown.
9 changes: 3 additions & 6 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b7ab9f9

Please sign in to comment.