update synthdid

mikenguyen13 · Jan 12, 2024 · b7ab9f9 · b7ab9f9
1 parent a396266
commit b7ab9f9
Show file tree

Hide file tree

Showing 348 changed files with 10,469 additions and 472 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,13 +10,15 @@ export(balance_assessment)
 export(balance_scatter_custom)
 export(get_balanced_panel)
 export(nice_tab)
+export(panel_estimate)
 export(plot_PanelEstimate)
 export(plot_coef_par_trends)
 export(plot_covariate_balance_pretrend)
 export(plot_density_by_treatment)
 export(plot_par_trends)
 export(plot_treat_time)
 export(plot_trends_across_group)
+export(process_panel_estimate)
 export(stack_data)
 export(synthdid_est)
 export(synthdid_est_ate)

diff --git a/R/panel_estimate.R b/R/panel_estimate.R
@@ -0,0 +1,58 @@
+#' Panel Estimate Function
+#'
+#' This function computes estimates and standard errors for panel data using selected estimators.
+#' It allows the user to select specific estimators and set parameters for Monte Carlo replications and seed.
+#'
+#' @param setup A list containing matrices Y, N0, and T0 for panel data analysis.
+#' @param selected_estimators A character vector specifying which estimators to use.
+#'   Defaults to all available estimators.
+#' @param mc_replications The number of Monte Carlo replications for computing standard errors. 
+#'   Applicable if the 'mc' estimator is used. Defaults to 200.
+#' @param seed An integer value to set the random seed for reproducibility. Defaults to 1.
+#'
+#' @return A list where each element corresponds to an estimator and contains its estimate and standard error.
+#'
+#' @examples
+#' \dontrun{
+#' data('california_prop99')
+#' setup = panel.matrices(california_prop99)
+#' results_all = panel_estimate(setup)
+#' results_selected = panel_estimate(setup, selected_estimators = c("did", "sc"))
+#' summary(results_selected$did$estimate)
+#' }
+#' 
+#' @export
+panel_estimate <-
+  function(setup,
+           selected_estimators = names(panel_estimators),
+           mc_replications = 200,
+           seed = 1) {
+    set.seed(seed)
+    # Subset the list of estimators based on user selection
+    estimators_to_use = panel_estimators[selected_estimators]
+
+    # Compute estimates
+    estimates = lapply(estimators_to_use, function(estimator) {
+      estimator(setup$Y, setup$N0, setup$T0)
+    })
+
+    # Compute standard errors
+    standard.errors = mapply(function(estimate, name) {
+      if (name == 'mc') {
+        mc_placebo_se(setup$Y, setup$N0, setup$T0, replications = mc_replications)
+      } else {
+        sqrt(vcov(estimate, method = 'placebo'))
+      }
+    }, estimates, names(estimators_to_use))
+
+    # Combine estimates and standard errors
+    results = Map(function(est, se)
+      list(estimate = est, std.error = se),
+      estimates,
+      standard.errors)
+
+    # Name the results with estimator names
+    names(results) = names(estimators_to_use)
+
+    return(results)
+  }
diff --git a/R/process_panel_estimate.R b/R/process_panel_estimate.R
@@ -0,0 +1,35 @@
+#' Process Panel Estimate
+#'
+#' This function processes the output from `panel_estimate()` for panel estimates and returns a formatted data frame.
+#' It takes a list of results, each corresponding to a different method, and combines them into a single data frame.
+#' The data frame includes the method name, estimate, and standard error for each method.
+#'
+#' @param results_selected A list of results from `panel_estimate()`. Each element in the list should be an object
+#' containing the results for a particular estimation method. Each object must have an `estimate` and a `std.error` attribute.
+#'
+#' @return A data frame with columns `Method`, `Estimate`, and `SE`, representing the method name, the estimate value,
+#' and the standard error, respectively. The data frame is formatted using `causalverse::nice_tab()`.
+#' 
+#' @examples
+#' \dontrun{
+#' library(synthdid)
+#' setup = synthdid::panel.matrices(synthdid::california_prop99)
+#' results_selected = panel_estimate(setup, selected_estimators = c("did", "sc"))
+#' results_table = process_panel_estimate(results_selected)
+#' print(results_table)
+#' }
+#' 
+#' @export
+process_panel_estimate <- function(results_selected) {
+  # Create the data frame from the results
+  results_df <- do.call(rbind, lapply(names(results_selected), function(name) {
+    data.frame(
+      Method = toupper(name),
+      Estimate = as.numeric(results_selected[[name]]$estimate),
+      SE = as.numeric(results_selected[[name]]$std.error)
+    )
+  })) |> 
+    causalverse::nice_tab()
+
+  return(results_df)
+}
diff --git a/R/synthdid_est.R b/R/synthdid_est.R
@@ -1,7 +1,8 @@
 #' Synthetic DID Estimation Using synthdid Package
 #'
 #' This function estimates synthetic difference-in-differences using the `synthdid` package.
-#' It differs from `synthdid::synthdid_estimate` in that it calculates treatment effects (TEs)
+#' It offers a choice among `synthdid_estimate`, `did_estimate`, and `sc_estimate` methods 
+#' for estimation, defaulting to `synthdid_estimate`. It calculates treatment effects (TEs)
 #' for each period instead of a single TE for all treated periods.
 #'
 #' @param data Data frame to analyze.
@@ -15,6 +16,25 @@
 #' @param treat_stat_var Name of the treatment indicator column.
 #' @param outcome_var Name of the outcome variable column.
 #' @param seed A numeric value for setting the random seed (only for placebo SE). Default is 1.
+#' @param method The estimation method to be used. Methods include:
+#'   - 'did': Difference-in-Differences.
+#'   - 'sc': Synthetic Control Method. 
+#'   - 'sc_ridge': Synthetic Control Method with Ridge Penalty. It adds a ridge regularization to the synthetic control method when estimating the synthetic control weights.
+#'   - 'difp': De-meaned Synthetic Control Method, as proposed by Doudchenko and Imbens (2016) and Ferman and Pinto (2021).
+#'   - 'difp_ridge': De-meaned Synthetic Control with Ridge Penalty. It adds a ridge regularizationd when estimating the synthetic control weights.
+#'   - 'synthdid': Synthetic Difference-in-Differences, a method developed by Arkhangelsky et al. (2021)
+#' Defaults to 'synthdid'.
+#' @references
+#' Ferman, B., & Pinto, C. (2021). Synthetic controls with imperfect pretreatment fit. 
+#' Quantitative Economics, 12(4), 1197-1221.
+#'
+#' Doudchenko, Nikolay, and Guido W. Imbens. 2016. 
+#' “Balancing, Regression, Difference-in-Differences and Synthetic Control Methods: A Synthesis.” 
+#' NBER Working Paper 22791.
+#'
+#' Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S. (2021). 
+#' Synthetic difference-in-differences. 
+#' American Economic Review, 111(12), 4088-4118.
 #' @return A list containing the estimated treatment effects, standard errors, observed and predicted outcomes, synthetic control lambda weights, and counts of treated and control units.
 #' @export
 #' @examples
@@ -58,7 +78,8 @@ synthdid_est <-
            treated_period_var,
            treat_stat_var,
            outcome_var, 
-           seed = 1) {
+           seed = 1,
+           method = "synthdid") {
 
 
     # Convert treatment variable to logical and select necessary columns
@@ -85,31 +106,50 @@ synthdid_est <-
         treatment = treat_stat_var
       )
 
-    # Run synthdid estimation
-    sdid <- synthdid::synthdid_estimate(setup$Y, setup$N0, setup$T0)
+    # Run estimation based on the specified method
+    estimation_res <- switch(
+      method,
+      "did"      = synthdid::did_estimate(setup$Y, setup$N0, setup$T0),
+      "sc"       = synthdid::sc_estimate(setup$Y, setup$N0, setup$T0),
+      "sc_ridge" = synthdid::sc_estimate(setup$Y, setup$N0, setup$T0,
+                                         eta.omega = ((nrow(setup$Y) - setup$N0) * (ncol(setup$Y) - setup$T0)) ^ (1 / 4)),
+      "difp"     = synthdid::synthdid_estimate(setup$Y,
+                                               setup$N0,
+                                               setup$T0,
+                                               weights = list(lambda = rep(1 / setup$T0, setup$T0)),
+                                               eta.omega = 1e-6),
+      "difp_ridge" = synthdid::synthdid_estimate(setup$Y, setup$N0, setup$T0,
+                                                 weights = list(lambda = rep(1 / setup$T0, setup$T0))),
+
+      # Default case
+      synthdid::synthdid_estimate(setup$Y, setup$N0, setup$T0)
+    )
+
 
     # Subgroup adjustment (i.e., treated are the only subgroup of interest)
     if (!is.null(subgroup)) {
       # get treated unit IDs
       treat_ids <- rownames(setup$Y[-(1:setup$N0), ])
-
+      # keep only those in subgroup
       keep <- c(rep(TRUE, setup$N0), treat_ids %in% subgroup)
-      sdid.setup <- attr(sdid, 'setup')
-      sdid.setup$Y <- sdid.setup$Y[keep, ]
-      sdid.setup$X <- sdid.setup$X[keep, , drop = FALSE]
-      attr(sdid, 'setup') <- sdid.setup
+
+      # change the setup matrix to contain only that data.
+      estimation_res.setup          <- attr(estimation_res, 'setup')
+      estimation_res.setup$Y        <- estimation_res.setup$Y[keep, ]
+      estimation_res.setup$X        <- estimation_res.setup$X[keep, ,]
+      attr(estimation_res, 'setup') <- estimation_res.setup
     }
 
     # Extract outcomes from synthdid estimates
-    setup   <- attr(sdid, 'setup')
-    weights <- attr(sdid, 'weights')
+    setup   <- attr(estimation_res, 'setup')
+    weights <- attr(estimation_res, 'weights')
     Y       <- setup$Y - synthdid:::contract3(setup$X, weights$beta)
     N0      <- setup$N0
     T0      <- setup$T0
 
     # Compute treatment effects and standard errors
     est <- synthdid_est_per(Y, N0, T0, weights)
-    se  <- synthdid_se_jacknife(sdid, seed = seed)
+    se  <- synthdid_se_jacknife(estimation_res, seed = seed)
 
     return(
       list(

diff --git a/R/synthdid_est_ate.R b/R/synthdid_est_ate.R
@@ -29,6 +29,25 @@
 #' @param subgroup Vector, IDs for subgroup analysis.
 #' @param conf_level Numeric, confidence level for the interval estimation (Default: 95%).
 #' @param seed A numeric value for setting the random seed (for placebo SE and placebo analysis). Default is 1.
+#' @param method The estimation method to be used. Methods include:
+#'   - 'did': Difference-in-Differences.
+#'   - 'sc': Synthetic Control Method. 
+#'   - 'sc_ridge': Synthetic Control Method with Ridge Penalty. It adds a ridge regularization to the synthetic control method when estimating the synthetic control weights.
+#'   - 'difp': De-meaned Synthetic Control Method, as proposed by Doudchenko and Imbens (2016) and Ferman and Pinto (2021).
+#'   - 'difp_ridge': De-meaned Synthetic Control with Ridge Penalty. It adds a ridge regularizationd when estimating the synthetic control weights.
+#'   - 'synthdid': Synthetic Difference-in-Differences, a method developed by Arkhangelsky et al. (2021)
+#' Defaults to 'synthdid'.
+#' @references
+#' Ferman, B., & Pinto, C. (2021). Synthetic controls with imperfect pretreatment fit. 
+#' Quantitative Economics, 12(4), 1197-1221.
+#'
+#' Doudchenko, Nikolay, and Guido W. Imbens. 2016. 
+#' “Balancing, Regression, Difference-in-Differences and Synthetic Control Methods: A Synthesis.” 
+#' NBER Working Paper 22791.
+#'
+#' Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S. (2021). 
+#' Synthetic difference-in-differences. 
+#' American Economic Review, 111(12), 4088-4118.
 #'
 #' @return A list containing the following elements:
 #' \itemize{
@@ -81,7 +100,8 @@ synthdid_est_ate <-
            pooled = F,
            subgroup = NULL,
            conf_level = 0.95, 
-           seed = 1
+           seed = 1, 
+           method = "synthdid"
   ) {
     set.seed(seed)
     # Validate input data
@@ -182,7 +202,7 @@ synthdid_est_ate <-
       n_treat   = length(unique(balanced_df[balanced_df[,treated_period_var]==adoption_cohort, unit_id_var]))
 
       if (!is.null(subgroup)) {
-        n_treat = sum(unique(balanced_df[balanced_df[,treated_period_var]==adoption_cohort, unit_id_var]) %in% subgroup)
+        n_treat = sum(unique(balanced_df[balanced_df[, treated_period_var] == adoption_cohort, unit_id_var]) %in% subgroup)
       }
 
       cat("Treated units:", n_treat,"Control units:", n_control ,"\n")
@@ -207,7 +227,8 @@ synthdid_est_ate <-
           treated_period_var = treated_period_var,
           treat_stat_var     = treat_stat_var,
           outcome_var        = outcome_var,
-          seed               = seed
+          seed               = seed, 
+          method             = method
         )
 
         # Save output
@@ -220,7 +241,7 @@ synthdid_est_ate <-
 
     # Aggregate adoption-cohort-level ATTs 
     time         <-  seq(-lags, leads)
-    col_names       <-  c(time, paste("cumul.", 0:leads, sep = ""))
+    col_names    <-  c(time, paste("cumul.", 0:leads, sep = ""))
     TE           <- data.frame(TE)
     colnames(TE) <- col_names
     rownames(TE) <- out_adoption_cohorts

diff --git a/R/synthdid_plot_ate.R b/R/synthdid_plot_ate.R
@@ -5,7 +5,7 @@
 #'
 #' @param est Estimation object from `synthdid_est_ate`.
 #' @param show_CI Logical; if TRUE, shows confidence intervals on the plot.
-#' @param custom_title String; title of the plot.
+#' @param title String; title of the plot.
 #' @param xlab String; label for the x-axis.
 #' @param ylab String; label for the y-axis.
 #' @param y_intercept Numeric; value at which a horizontal line is drawn.
@@ -40,12 +40,12 @@
 #'       outcome_var = "y"
 #'     )
 #'   # Generate the plot
-#'   synthdid_plot_ate(est, show_CI = TRUE, custom_title = "Sample ATE Plot")
+#'   synthdid_plot_ate(est, show_CI = TRUE, title = "Sample ATE Plot")
 #' }
 synthdid_plot_ate <- function(
     est,
     show_CI = TRUE,
-    custom_title = "",
+    title = "",
     xlab = "Relative Time Period", 
     ylab = "ATE", 
     y_intercept = 0,
@@ -80,7 +80,7 @@ synthdid_plot_ate <- function(
   # Add labels and apply the specified theme
   p <- p +
     labs(
-      title = custom_title,
+      title = title,
       x = xlab,
       y = ylab
     ) +

diff --git a/R/utils_synthdid.R b/R/utils_synthdid.R
@@ -20,3 +20,51 @@ sum_normalize <- function(x) {
 weighted_avg_SE <- function(se, weights = rep(1, sum(!is.na(se)))) {
   sqrt(sum(se ^ 2 * weights ^ 2, na.rm = TRUE) / sum(weights) ^ 2)
 }
+
+
+# Define your estimator functions
+mc_estimate = function(Y, N0, T0) {
+  N1 = nrow(Y) - N0
+  T1 = ncol(Y) - T0
+  W       <- outer(c(rep(0, N0), rep(1, N1)), c(rep(0, T0), rep(1, T1)))
+  mc_pred <- MCPanel::mcnnm_cv(Y, 1 - W, num_lam_L = 20)
+  mc_fit  <- mc_pred$L + outer(mc_pred$u, mc_pred$v, '+')
+  mc_est  <- sum(W * (Y - mc_fit)) / sum(W)
+  return(mc_est)
+}
+mc_placebo_se = function(Y, N0, T0, replications = 200) {
+  N1 = nrow(Y) - N0
+  theta = function(ind) {
+    mc_estimate(Y[ind, ], length(ind) - N1, T0)
+  }
+
+  res <- sqrt((replications - 1) / replications) * sd(replicate(replications, theta(sample(1:N0))))
+  return(res)
+}
+
+difp_estimate = function(Y, N0, T0) {
+  synthdid::synthdid_estimate(Y,
+                              N0,
+                              T0,
+                              weights = list(lambda = rep(1 / T0, T0)),
+                              eta.omega = 1e-6)
+}
+
+sc_estimate_ridge = function(Y, N0, T0) {
+  synthdid::sc_estimate(Y, N0, T0, eta.omega = ((nrow(Y) - N0) * (ncol(Y) -
+                                                                    T0)) ^ (1 / 4))
+}
+difp_estimate_ridge = function(Y, N0, T0) {
+  synthdid::synthdid_estimate(Y, N0, T0, weights = list(lambda = rep(1 / T0, T0)))
+}
+
+panel_estimators = list(
+  synthdid   = synthdid::synthdid_estimate,
+  did        = synthdid::did_estimate,
+  sc         = synthdid::sc_estimate,
+  sdid       = synthdid::synthdid_estimate,
+  difp       = difp_estimate,
+  mc         = mc_estimate, 
+  sc_ridge   = sc_estimate_ridge,
+  difp_ridge = difp_estimate_ridge
+)
diff --git a/causalverse_0.0.0.9000.pdf b/causalverse_0.0.0.9000.pdf
diff --git a/docs/404.html b/docs/404.html