diff --git a/DESCRIPTION b/DESCRIPTION index 4067031..1536ea9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,9 @@ Imports: ggplot2 (>= 3.4.2), ggthemes (>= 4.2.4), tidyverse (>= 2.0.0), + data.table (>= 1.14.8), + formattable (>= 0.2.1), + rdbounds (>= 1.1), lubridate (>= 1.9.2), rio (>= 0.5.29), xtable (>= 1.8.4), @@ -42,5 +45,6 @@ Imports: synthdid (>= 0.0.9), plm (>= 2.6.3), MASS, + MedBounds (>= 0.1.0), foreach (>= 1.5.2) VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 5ef4db0..3b92de6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(ama_theme) export(balance_assessment) export(balance_scatter_custom) export(get_balanced_panel) +export(lee_bounds) export(med_ind) export(nice_tab) export(panel_estimate) @@ -17,6 +18,7 @@ export(plot_coef_par_trends) export(plot_covariate_balance_pretrend) export(plot_density_by_treatment) export(plot_par_trends) +export(plot_rd_aa_share) export(plot_treat_time) export(plot_trends_across_group) export(process_panel_estimate) @@ -27,11 +29,15 @@ export(synthdid_est_per) export(synthdid_plot_ate) export(synthdid_se_jacknife) export(synthdid_se_placebo) +import(data.table) +import(formattable) import(ggplot2) import(lubridate) +import(rdbounds) import(rio) import(xtable) importFrom(Hotelling,hotelling.test) +importFrom(MedBounds,compute_bounds_ats) importFrom(PanelMatch,PanelMatch) importFrom(PanelMatch,get_covariate_balance) importFrom(dplyr,"%>%") @@ -41,6 +47,7 @@ importFrom(dplyr,mutate) importFrom(dplyr,rowwise) importFrom(dplyr,select) importFrom(dplyr,summarise) +importFrom(dplyr,summarise_all) importFrom(dplyr,ungroup) importFrom(fixest,coefplot) importFrom(fixest,feols) diff --git a/R/lee_bounds.R b/R/lee_bounds.R new file mode 100644 index 0000000..60ec1da --- /dev/null +++ b/R/lee_bounds.R @@ -0,0 +1,66 @@ +#' Summarize Lee Bounds for Always-Takers +#' +#' Computes and summarizes the Lee bounds on the average direct effect for always-takers (ATs) for whom there is a direct effect of treatment (D) on the outcome (Y). This function utilizes \code{\link[MedBounds]{compute_bounds_ats}} to calculate initial bounds and applies bootstrapping to estimate the standard deviation of these estimates, providing a summary in a data frame format. +#' +#' @param df A data frame containing the data. +#' @param d Name of the treatment variable in \code{df}. +#' @param m Name of the mediator variable in \code{df}. +#' @param y Name of the outcome variable in \code{df}. +#' @param cluster (Optional) The name of the cluster variable for clustered bootstrapping. +#' @param c_at_ratio (Optional) Specifies the ratio of E[Y(1,1) | C]/E[Y(1,1) | AT]. If this is specified, the direct effect for ATs is point-identified. +#' @param units A string denoting the units of the outcome variable (for labeling purposes). +#' @param numdraws The number of bootstrap draws for estimating the standard deviation. +#' @return A data frame summarizing the computed bounds with terms, estimates, and standard errors. +#' @examples +#' \dontrun{ +#' data(example_data) +#' summarized_bounds <- lee_bounds(df = example_data, d = "treatment", m = "mediator", y = "outcome") +#' } +#' @export +#' @importFrom MedBounds compute_bounds_ats +#' @importFrom dplyr summarise_all +lee_bounds <- function(df, d, m, y, cluster = NULL, c_at_ratio = NULL, units = "", numdraws = 1000) { + # Compute the point estimate using MedBounds::compute_bounds_ats + pt_estimate <- MedBounds::compute_bounds_ats( + df = df, + d = d, + m = m, + y = y, + c_at_ratio = c_at_ratio + ) + + # Compute bootstrap draws with optional clustering + bootstrap_draws <- MedBounds:::compute_bootstrap_draws_clustered( + df = df, + d = d, + m = m, + y = y, + f = function(..., c_at_ratio_) { + MedBounds::compute_bounds_ats(..., c_at_ratio = c_at_ratio) + }, + cluster = cluster, + fix_n1 = FALSE, + numdraws = numdraws + ) + + # Use dplyr to summarise and compute the standard deviation of the columns of bootstrap_draws + bootstrap_sds <- bootstrap_draws %>% + summarise_all(sd) + + # Determine the term list based on the presence of c_at_ratio + termlist <- if (is.null(c_at_ratio)) { + c("Lower bound", "Upper bound") + } else { + "Point estimate" + } + + # Tidy the results into a data frame + tidy_results <- data.frame( + term = termlist, + estimate = as.numeric(pt_estimate), + std.error = as.numeric(bootstrap_sds), + stringsAsFactors = FALSE + ) + + return(tidy_results) +} diff --git a/R/plot_rd_aa_share.R b/R/plot_rd_aa_share.R new file mode 100644 index 0000000..8c04abb --- /dev/null +++ b/R/plot_rd_aa_share.R @@ -0,0 +1,114 @@ +#' Plot RD Always-assigned Share +#' +#' This function creates a plot for the share of always-assigned units +#' in a Regression Discontinuity (RD) design, either Sharp RD (SRD) or +#' Fuzzy RD (FRD). It provides options to include various confidence +#' intervals and reference lines. +#' +#' @param data The output object from the \code{rdbounds} function. +#' @param rd_type The type of RD design, either "SRD" for Sharp RD or "FRD" +#' for Fuzzy RD. Default is "SRD". +#' @param x_label The label for the x-axis. Default is "Share of Always-assigned Units". +#' @param y_label The label for the y-axis. Default is "ATE". +#' @param plot_title The title of the plot. Default is an empty string. +#' @param theme_use A ggplot2 theme function to apply to the plot. Default is +#' \code{causalverse::ama_theme()}. +#' @param tau Logical, whether to include a vertical line at the estimated +#' treatment effect. Default is TRUE. +#' @param tau_CI Logical, whether to include confidence intervals for the +#' treatment effect estimate. Default is FALSE. +#' @param bounds_CI Logical, whether to include confidence intervals for the +#' manipulation bounds. Default is TRUE. +#' @param ref_line The y-intercept for a reference line. Default is 0. +#' @param ... Additional arguments passed to \code{labs} in ggplot2. +#' +#' @return A ggplot object. +#' @import ggplot2 +#' @import formattable +#' @import data.table +#' @import rdbounds +#' @examples +#' \dontrun{ +#' set.seed(1) +#' data <- rdbounds::rdbounds_sampledata(10000, covs = FALSE) +#' rdbounds_est_tau <- rdbounds::rdbounds( +#' y = data$y, +#' x = data$x, +#' treatment = data$treatment, +#' c = 0, +#' discrete_x = FALSE, +#' discrete_y = FALSE, +#' bwsx = c(.2, .5), +#' bwy = 1, +#' kernel = "epanechnikov", +#' orders = 1, +#' evaluation_ys = seq(from = 0, to = 15, by = 1), +#' refinement_A = TRUE, +#' refinement_B = TRUE, +#' right_effects = TRUE, +#' potential_taus = c(.025, .05, .1, .2), +#' yextremes = c(0, 15), +#' num_bootstraps = 5 +#' ) +#' plot_rd_aa_share(rdbounds_est_tau) +#' } +#' @export +plot_rd_aa_share <- function(data, + rd_type = "SRD", + x_label = "Share of Always-assigned Units", + y_label = "ATE", + plot_title = "", + theme_use = causalverse::ama_theme(), + tau = TRUE, + tau_CI = FALSE, + bounds_CI = TRUE, + ref_line = 0, + ...) { + + # Determine the correct prefix based on rd_type + prefix <- if (rd_type == "FRD") "FRD" else "SRD" + + # Extract the necessary data from the rdbounds_est_tau object + df <- as.data.frame(data$estimates[,1][[paste0("TE_", prefix, "_CIs_manipulation")]]) + naive_estimate <- data$estimates[,1][[paste0("TE_", prefix, "_naive")]] + tau_hat <- data$estimates[,1]$tau_hat + tau_hat_CI <- data$estimates[,1]$tau_hat_CI + + # Create the plot + p <- ggplot(df, aes(x = potential_taus)) + + geom_point(aes(y = TE_lower), size = 3, color = "black") + + geom_point(aes(y = TE_upper), size = 3, color = "black") + + geom_line(aes(y = TE_lower), linetype = "solid", color = "black") + + geom_line(aes(y = TE_upper), linetype = "solid", color = "black") + + geom_line(aes(y = get(paste0("TE_", prefix, "_CIs_manipulation_lower"))), linetype = "dotted", color = "black") + + geom_line(aes(y = get(paste0("TE_", prefix, "_CIs_manipulation_upper"))), linetype = "dotted", color = "black") + + annotate("point", x = 0, y = naive_estimate, size = 3, color = "black") + + labs(x = x_label, y = y_label, title = plot_title, ...) + + theme_use + + # Add reference line + if (!is.null(ref_line)) { + p <- p + geom_hline(yintercept = ref_line, linetype = "dashed", color = "red") + } + + # Add vertical line for tau_hat + if (tau) { + p <- p + geom_vline(xintercept = round(tau_hat, 3), linetype = "solid", color = "black") + } + + # Add confidence intervals for tau_hat + if (tau_CI) { + p <- p + + geom_vline(xintercept = round(tau_hat_CI, 3)[1], linetype = "dotted", color = "black") + + geom_vline(xintercept = round(tau_hat_CI, 3)[2], linetype = "dotted", color = "black") + } + + # Add manipulation bounds confidence intervals + if (bounds_CI) { + p <- p + + geom_line(aes(y = get(paste0("TE_", prefix, "_CIs_manipulation_lower"))), linetype = "dotted", color = "black") + + geom_line(aes(y = get(paste0("TE_", prefix, "_CIs_manipulation_upper"))), linetype = "dotted", color = "black") + } + + return(p) +} diff --git a/causalverse_0.0.0.9000.pdf b/causalverse_0.0.0.9000.pdf index 46ad349..838b3f0 100644 Binary files a/causalverse_0.0.0.9000.pdf and b/causalverse_0.0.0.9000.pdf differ diff --git a/docs/articles/a_introduction.html b/docs/articles/a_introduction.html index d172a35..bec2bc1 100644 --- a/docs/articles/a_introduction.html +++ b/docs/articles/a_introduction.html @@ -91,7 +91,7 @@
a_introduction.Rmd
b_synthdid.Rmd
c_did.Rmd
get_balanced_panel()
Extract a Balanced Panel
Summarize Lee Bounds for Always-Takers
plot_par_trends()
Plot Parallel Trends
Plot RD Always-assigned Share
lee_bounds.Rd
Computes and summarizes the Lee bounds on the average direct effect for always-takers (ATs) for whom there is a direct effect of treatment (D) on the outcome (Y). This function utilizes compute_bounds_ats
to calculate initial bounds and applies bootstrapping to estimate the standard deviation of these estimates, providing a summary in a data frame format.
lee_bounds(
+ df,
+ d,
+ m,
+ y,
+ cluster = NULL,
+ c_at_ratio = NULL,
+ units = "",
+ numdraws = 1000
+)
A data frame containing the data.
Name of the treatment variable in df
.
Name of the mediator variable in df
.
Name of the outcome variable in df
.
(Optional) The name of the cluster variable for clustered bootstrapping.
(Optional) Specifies the ratio of EY(1,1) | C/EY(1,1) | AT. If this is specified, the direct effect for ATs is point-identified.
A string denoting the units of the outcome variable (for labeling purposes).
The number of bootstrap draws for estimating the standard deviation.
A data frame summarizing the computed bounds with terms, estimates, and standard errors.
+if (FALSE) {
+data(example_data)
+summarized_bounds <- lee_bounds(df = example_data, d = "treatment", m = "mediator", y = "outcome")
+}
+
plot_rd_aa_share.Rd
This function creates a plot for the share of always-assigned units +in a Regression Discontinuity (RD) design, either Sharp RD (SRD) or +Fuzzy RD (FRD). It provides options to include various confidence +intervals and reference lines.
+plot_rd_aa_share(
+ data,
+ rd_type = "SRD",
+ x_label = "Share of Always-assigned Units",
+ y_label = "ATE",
+ plot_title = "",
+ theme_use = causalverse::ama_theme(),
+ tau = TRUE,
+ tau_CI = FALSE,
+ bounds_CI = TRUE,
+ ref_line = 0,
+ ...
+)
The output object from the rdbounds
function.
The type of RD design, either "SRD" for Sharp RD or "FRD" +for Fuzzy RD. Default is "SRD".
The label for the x-axis. Default is "Share of Always-assigned Units".
The label for the y-axis. Default is "ATE".
The title of the plot. Default is an empty string.
A ggplot2 theme function to apply to the plot. Default is
+causalverse::ama_theme()
.
Logical, whether to include a vertical line at the estimated +treatment effect. Default is TRUE.
Logical, whether to include confidence intervals for the +treatment effect estimate. Default is FALSE.
Logical, whether to include confidence intervals for the +manipulation bounds. Default is TRUE.
The y-intercept for a reference line. Default is 0.
Additional arguments passed to labs
in ggplot2.
A ggplot object.
+if (FALSE) {
+set.seed(1)
+data <- rdbounds::rdbounds_sampledata(10000, covs = FALSE)
+rdbounds_est_tau <- rdbounds::rdbounds(
+ y = data$y,
+ x = data$x,
+ treatment = data$treatment,
+ c = 0,
+ discrete_x = FALSE,
+ discrete_y = FALSE,
+ bwsx = c(.2, .5),
+ bwy = 1,
+ kernel = "epanechnikov",
+ orders = 1,
+ evaluation_ys = seq(from = 0, to = 15, by = 1),
+ refinement_A = TRUE,
+ refinement_B = TRUE,
+ right_effects = TRUE,
+ potential_taus = c(.025, .05, .1, .2),
+ yextremes = c(0, 15),
+ num_bootstraps = 5
+)
+plot_rd_aa_share(rdbounds_est_tau)
+}
+