diff --git a/.Rbuildignore b/.Rbuildignore index 41b889c..8d35a0a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,4 +4,3 @@ README deltaSD.png ^LICENSE\.md$ .github -vignettes/precompute.R diff --git a/DESCRIPTION b/DESCRIPTION index f4c99c2..5129d2a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,6 +40,4 @@ Description: Provides functions to conduct robust inference in difference-in-differences and event study designs by implementing the methods developed in Rambachan & Roth (2023, RESTUD), "A More Credible Approach to Parallel Trends" [Previously titled "An Honest Approach..."]. Inference is conducted under a weaker version of the parallel trends assumption. Uniformly valid confidence sets are constructed based upon conditional confidence sets, fixed-length confidence sets and hybridized confidence sets. Encoding: UTF-8 LazyData: true -VignetteBuilder: - knitr License: MIT + file LICENSE diff --git a/README.Rmd b/README.Rmd index ffeef44..ac587b9 100644 --- a/README.Rmd +++ b/README.Rmd @@ -283,9 +283,9 @@ HonestDiD::createSensitivityPlot_relativeMagnitudes(sensitivity_results$robust_c ## Additional options and resources -See the previous package [vignette] for additional examples and package options, including incorporating sign and monotonicity restrictions, and combining relative magnitudes and smoothness restrictions. +See the previous package [vignette](https://github.com/asheshrambachan/HonestDiD/blob/fdadae33428a024df204c210da48cc5362966e89/vignettes/HonestDiD_Example.pdf) for additional examples and package options, including incorporating sign and monotonicity restrictions, and combining relative magnitudes and smoothness restrictions. -You can also view a video presentation about this paper [here](https://www.youtube.com/watch?v=6-NkiA2jN7U). +You can view a video presentation about this paper [here](https://www.youtube.com/watch?v=6-NkiA2jN7U). ## Authors diff --git a/README.md b/README.md index dea8bfe..6f3f883 100644 --- a/README.md +++ b/README.md @@ -592,7 +592,7 @@ HonestDiD::createSensitivityPlot_relativeMagnitudes(sensitivity_results$robust_c ## Additional options and resources -See the previous package \[vignette\] for additional examples and +See the previous package [vignette](https://github.com/asheshrambachan/HonestDiD/blob/fdadae33428a024df204c210da48cc5362966e89/vignettes/HonestDiD_Example.pdf) for additional examples and package options, including incorporating sign and monotonicity restrictions, and combining relative magnitudes and smoothness restrictions. diff --git a/data/VignetteResults.rda b/data/VignetteResults.rda deleted file mode 100644 index 07a51c2..0000000 Binary files a/data/VignetteResults.rda and /dev/null differ diff --git a/man/VignetteResults.Rd b/man/VignetteResults.Rd deleted file mode 100644 index fcd5940..0000000 --- a/man/VignetteResults.Rd +++ /dev/null @@ -1,18 +0,0 @@ -\docType{data} -\name{VignetteResults} -\alias{VignetteResults} -\title{ - Pre-computed results to use in the Vignette. -} -\description{ - This list contains pre-comuted time-intensite results used in the vignette. -} -\format{ - \describe{A list, containing 5 results shown in the vignette; refer to the text for details. - \item{BC_DeltaSDNB_RobustResults}{} - \item{BC_DeltaSDRM_RobustResults}{} - \item{BC_OriginalResults}{} - \item{LW_DeltaSD_RobustResults}{} - \item{LW_DeltaSDD_RobustResults}{} - } -} diff --git a/vignettes/HonestDiD_Example.Rmd b/vignettes/HonestDiD_Example.Rmd deleted file mode 100644 index 0e71fde..0000000 --- a/vignettes/HonestDiD_Example.Rmd +++ /dev/null @@ -1,372 +0,0 @@ ---- -output: - pdf_document: - citation_package: natbib - latex_engine: pdflatex -title: "HonestDiD: Vignette" -author: "Ashesh Rambachan and Jonathan Roth" -date: "`r format(Sys.time(), '%B %d, %Y')`" -geometry: margin=1in -fontfamily: mathpazo -bibliography: bibliography.bib -fontsize: 11pt -vignette: > - %\VignetteIndexEntry{dfadjust} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r setup, include=FALSE, cache = FALSE} -library('knitr') -knitr::opts_knit$set(self.contained = FALSE) -knitr::opts_chunk$set(tidy = TRUE, collapse=TRUE, comment = "#>", - tidy.opts=list(blank=FALSE, width.cutoff=61)) -``` - -# Description - -The `HonestDiD` package implements the methods developed in @RambachanRoth for performing inference in difference-in-differences and event-study designs that is robust to violations of the parallel trends assumption. See @RambachanRoth for methodological details. - -We illustrate how the package can be used by replicating Figure 4 in @RambachanRoth, which applies these methods to @BenzartiCarloni's results on the effects of a decrease in the value-added tax in France on restaurant profits, and replicating Figure 6 in @RambachanRoth, which applies these methods to @LovenheimWillen's results on the effect of public sector bargaining laws on long-run female labor market outcomes. - -The estimated event study coefficients and variance-covariance matrix for the baseline estimates on profits in @BenzartiCarloni are included in the package as `BCdata_EventStudy` (see Section 6.1 of @RambachanRoth for details on the underlying event study specification). The estimated event study coefficients and variance-covariance matrix for the baseline female estimates on employment of @LovenheimWillen are included in the package as `LWdata_EventStudy` (see Section 6.2 of @RambachanRoth for details on the underlying event study specification). The underlying raw data for the estimated event study of @LovenheimWillen are also included in the package. - -# Installation - -To install the package, use the function `install_github()` from the `remotes` package: -```{r, eval = FALSE} -install.packages("remotes") # if remotes package not installed - -# Turn off warning-error-conversion, because the tiniest -# warning stops installation -Sys.setenv("R_REMOTES_NO_ERRORS_FROM_WARNINGS" = "true") - -# install from github -remotes::install_github("asheshrambachan/HonestDiD") -``` -After installation, load in the package using `library('HonestDiD')`. - -```{r, include=FALSE} -library('HonestDiD') -``` - -# Background: choices of $\Delta$ - -Following @RambachanRoth, the parameter of interest is $\theta = l'\tau_{post}$, where $\tau_{post}$ is the vector of dynamic causal effects of interest in the post-periods and $l$ is a vector specified by the user. For instance, if the user is interested in the effect in the first period after treatment, then $l$ should be set to the basis vector with a 1 in the 1st position and zeros elsewhere. - -To construct confidence sets for $\theta$ that are robust to violations of the parallel trends assumption, the user must specify the set $\Delta$, which describes the set of possible violations of the parallel trends assumption that the user is willing to consider. - -The `HonestDiD` package currently allows for several choices of $\Delta$. We discuss these possible choices of $\Delta$ below. We refer the reader to Section 2.3 for extensive details on these choices. - -## Smoothness Restrictions -A reasonable baseline in many cases is $\Delta = \Delta^{SD}(M)$, which requires that the underlying trend not deviate ``too much'' from linearity. It imposes that the change in the slope of the underlying trend (i.e. the second difference in $\delta$) be no more than $M$ between consecutive periods, where $M$ is a parameter that is specified by the researcher. Formally, this set is defined as -$$ -\Delta^{SD}(M) = \{\delta : | (\delta_t - \delta_{t-1}) - (\delta_{t-1} - \delta_{t-2}) | \leq M\}. -$$ -For the choice $M = 0$, this choice of $\Delta$ limits the violation of parallel trends to be linear, while $M>0$ relaxes the assumption of exact linearity. See Section 2.3 of @RambachanRoth for further discussion. - -The user may additionally restrict the sign of the bias in the post-period. This may be reasonable, for instance, in cases where there is a simulataneous policy change which we think affects the outcome of interest in a particular direction. We refer to restrictions that combine $\Delta^{SD}$ with a restriction on the post-period bias by $\Delta = \Delta^{SDB}(M)$. For example, $\Delta^{SDPB}(M)$ additionally imposes that the violation of parallel trends must be positive in the post-periods, $\delta_t \geq 0$ for $t \geq 0$. Likewise, $\Delta^{SDNB}(M)$ additionally imposes that the violation of parallel trends must be negative in the post-periods $\delta_t \leq 0$ for $t \geq 0$. - -The researcher may specify additional shape restrictions that specify that the violation of parallel trends must be monotonically increasing or decreasing. Such restrictions may be reasonable in cases where the researcher is concerned about secular trends that would have continued absent treatment. We refer to restrictions that combine $\Delta^{SD}$ with monotonicity restrictions by $\Delta = \Delta^{SDM}(M)$. In the case where the violation of parallel trends must be increasing, denoted by $\Delta^{SDI}(M)$, this additionally restricts $\delta_{t} \geq \delta_{t-1}$ for all $t$. In the case where the violation of parallel trends must be decreasing, denoted by $\Delta^{SDD}(M)$, this additionally restricts $\delta_{t} \leq \delta_{t-1}$ for all $t$. - -These "smoothness restriction" based choices of $\Delta$ are implemented in `HonestDiD`. - -## Bounding Relative Magnitudes - -Alternatively, the researcher may specify $\Delta$ in a manner that bounds the worst-case violation of parallel trends in the post-treatment period based on the observed worst-case violation in the pre-treatment period. @RambachanRoth consider two forms of such $\Delta$. - -First, $\Delta^{RM}(\bar{M})$ bounds the maximum post-treatment violation of parallel trends (between consecutive periods) by $\bar{M}$ times the maximum pre-treatment violation of parallel trends. This is defined as -$$ -\Delta^{RM}(\bar{M}) = \{ \delta \,:\, \forall t\geq0, \, |\delta_{t+1} - \delta_{t}| \leq \bar{M} \cdot \max_{s< 0} |\delta_{s+1} - \delta_{s}| \} -$$ -Second $\Delta^{SDRM}(\bar{M})$ bounds the maximum deviation from a linear trend in the post-treatment period by $\bar{M}$ times the observed maximum deviation from a linear trend in the pre-treatment period. This is defined as -$$ -\Delta^{SDRM}(\bar{M}) = \{ \delta \,:\, \forall t\geq0, \, |(\delta_{t+1} - \delta_{t}) - (\delta_{t} - \delta_{t-1}) | \leq \bar{M} \cdot \max_{s<0} |(\delta_{s+1} - \delta_{s}) - (\delta_{s} - \delta_{s-1})| \} -$$ -Notice that this choice of $\delta = \Delta^{SDRM}(\bar{M})$ is analogous to the earlier choice $\Delta^{SD}(M)$, but it allows the magnitude of the possible deviations from a linear trend to depend on the observed pre-treatment trends (rather than being imposed a priori by the researcher). Both of these choices $\Delta^{RM}(\bar{M})$ and $\Delta^{SDRM}(\bar{M})$ may also be combined with the sign and shape restrictions discussed above. - -These "relative magnitude" based choices of $\Delta$ are implemented in `HonestDiD`. - -# Constructing a sensitivity plot - -We next show to use the package `HonestDiD` to conduct a formal sensitivity analysis. We recommend that the user creates a sensitivity plot that shows how the robust confidence sets vary under different assumptions about $\Delta$ (e.g., letting $M$ vary or adding sign/shape restrictions). - -The function `createSensitivityResults` provides a wrapper function to conduct sensitivity analysis for the ``smoothness restriction'' based choices of $\Delta$ discussed in the previous section. This function takes as inputs the estimated event study coefficients, the estimated variance-covariance matrix of the estimates along with the user's choice of $\Delta$ and chosen method for constructing robust confidence intervals. It returns the upper and lower bounds of the robust confidence sets for a vector of choices of $M$ as a dataframe. The researcher may specify that $\Delta$ equals $\Delta^{SD}(M)$, $\Delta^{SDB}(M)$ or $\Delta^{SDM}(M)$. In the latter two cases, the user additionally specifies the sign/direction of the bias/monotonicity. - -If the user leaves the desired method as `NULL` in `createSensitivityResults`, the function automatically selects the robust confidence interval based upon the recommendations in Section 5.3 of @RambachanRoth. If $\Delta = \Delta^{SD}(M)$, the FLCI is used. If $\Delta = \Delta^{SDB}(M)$ or $\Delta = \Delta^{SDM}(M)$, the conditional FLCI hybrid confidence set is used. As a default, the function sets the parameter of interest to be the first post-period causal effect, $\theta = \tau_1$. The user may directly specify the parameter of interest by setting the input `l_vec`. - -The function `createSensitivityResults_relativeMagnitudes` provides a wrapper function to conduct sensitivity analysis for the "relative magnitudes" based choices of $\Delta$ discussed in the previous section. This function also takes as inputs the estimated event study coefficients, the estimated variance-covariance matrix of the estimates along with the user's choice of $\Delta$ and chosen method for constructing robust confidence intervals. It returns the upper and lower bounds of the robust confidence sets for a vector of choices of $\bar{M}$ as a dataframe. The user may choose $\Delta^{RM}(\bar{M})$ by setting `bound = "deviation from parallel trends` and $\Delta^{SDRM}(\bar{M})$ by setting `bound = deviation from linear trend`. The user may also specify additional sign/shape restrictions which requires further specifying the sign/direction of the bias/monotonicity. - -If the user leaves the desired method as `NULL` in `createSensitivityResults_relativeMagnitudes`, the function automatically selects the robust confidence interval based upon the recommendations in Section 5.3 of @RambachanRoth. For $\Delta^{RM}(\bar{M})$, $\Delta^{SDRM}(\bar{M})$ and their variants that incorporate sign/shape restrictions, the function uses the conditional least favorable hybrid confidence set by default. Again, as a default, the function sets the parameter of interest to be the first post-period causal effect, $\theta = \tau_1$. The user may directly specify the parameter of interest by setting the input `l_vec`. - -# Sensitivity Analysis: Incidence of Value-Added Tax Cut - -## Preliminaries - -The included data frame `BCdata_EventStudy` contains the estimated event study coefficients and estimated variance-covariance matrix for the baseline estimates for profits based on the event study specification of @BenzartiCarloni. As discussed in Section 6.1 of @RambachanRoth, the authors estimate the effect of a reduction in the value-added tax in France in July 2009 on restaurant profits. Comparing restaurants to a control group of other market services firms that were unaffected by the policy change, the authors estimate the event study specification -$$ -Y_{it} = \sum_{s\neq2008} \beta_s \times 1[t = s] \times D_{i} + \phi_i + \lambda_t + \epsilon_{it}, -$$ -where $Y_{it}$ is the log of (before-tax) profits for firm $i$ in in year $t$; $D_{i}$ is an indicator for whether firm $i$ is a restaurant; $\phi_i$ and $\lambda_t$ are firm and year fixed effects; and standard errors are clustered at the regional level. `BCdata_EventStudy` contains the estimated event study coefficients $\hat \beta_{s}$, the associated variance-covariance matrix of these estimates and some additional information about the event study specification. The next code snippet loads the data. - -```{r, echo=FALSE} -data('VignetteResults', package = "HonestDiD") -for (i in names(VignetteResults)) assign(i, VignetteResults[[i]]) -``` - -```{r} -data('BCdata_EventStudy', package = "HonestDiD") - -# Number of pre-periods -BC_numPrePeriods = length(BCdata_EventStudy$prePeriodIndices) -BC_numPostPeriods = length(BCdata_EventStudy$postPeriodIndices) -``` - -## Conducting Sensitivity Analyses - -We now show how to use the package `HonestDiD` to conduct a formal sensitivity analysis using the estimated event study from @BenzartiCarloni. - -In the next code snippet, we conduct the sensitivity analysis plotted in the top right panel of Figure 4 in @RambachanRoth, which shows a sensitivity analysis using $\Delta = \Delta^{SDRM}(\bar{M})$ for the effect on profits in 2009, $\theta = \tau_{2009}$. To do so, we set `bound = "deviation from linear trend"` in `createSensitivityResults_relativeMagnitudes`. - -```{r, warning = FALSE, eval = FALSE} -# Create l_vec to define the parameter of interest, the -# first post-treatment period. -BC_l_vec = basisVector(index = 1, size = BC_numPostPeriods) - -# Construct robust confidence intervals for -# Delta^{SDRM}(Mbar) for first post-treatment period. We -# specify 100 gridPoints over [-1, 1] for the underlying -# test inversion to construct the robust confidence set. -# Users may wish to leave this at the default values. -BC_DeltaSDRM_RobustResults = createSensitivityResults_relativeMagnitudes( # - betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - bound = "deviation from linear trend", - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - Mbarvec = seq(from = 0, to = 2, by = 0.5), - gridPoints = 100, grid.lb = -1, grid.ub = 1) - -head(BC_DeltaSDRM_RobustResults) -``` - -```{r, echo=FALSE} -BC_l_vec = basisVector(index = 1, size = BC_numPostPeriods) -head(BC_DeltaSDRM_RobustResults) -``` - -The function `createSensitivityPlot_relativeMagnitudes` can then be used to construct a sensitivity plot presenting these results. `createSensitivityPlot_relativeMagnitudes` takes two key inputs. The first input is the dataframe that is produced by `createSensitivityResults_relativeMagnitudes`, which contains the robust confidence intervals. The second input is a dataframe that contains the OLS confidence set for the parameter of interest. This dataframe can be constructed using the function `constructOriginalCS`. In the next code snippet, we show how these functions can be used to replicate the top right panel of Figure 4 in @RambachanRoth. - -```{r, warning = FALSE} -# Construct dataframe with OLS confidence interval for theta -BC_OriginalResults = constructOriginalCS(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec ) - -# Construct sensitivity plot. -BC_DeltaSDRM_SensitivityPlot = createSensitivityPlot_relativeMagnitudes( # - robustResults = BC_DeltaSDRM_RobustResults, - originalResults = BC_OriginalResults) - -BC_DeltaSDRM_SensitivityPlot -``` - -This sensitivity analysis finds that the "breakdown value" of $\bar{M}$ is about 1.5, which means that the significant treatment effect found in 2009 is robust to allowing for a non-linearity in the differential trend between restaurants and other service firms in the post-treatment period that is about 1.5 times the maximum observed non-linearity in the pre-treatment period. - -We also discuss how additional context-specific knowledge may inform the sensitivity analysis. As discussed in Section 6.1 of @RambachanRoth, @BenzartiCarloni indicate that their event-study estimates may be biased since other confounding policy changes occurred at the same time of the value-added tax change. This suggests that we may wish to incorporate an additional sign restriction in the sensitivity analysis, which imposes that the sign of the bias be negative. The next code snippet therefore conducts the sensitivity analysis plotted in the bottom right panel of Figure 4 in @RambachanRoth, which shows a sensitivity analysis using $\Delta = \Delta^{SDNB}(M) := \Delta^{SD}(M) \cap \{\delta \,:\, \delta_{post} \leq 0 \}$. We construct the sensitivity analysis using `createSensitivityResults` and specify that `biasDirection = "negative"`. We plot the results using `createSensitivityPlot`. - -```{r, warning = FALSE, eval = FALSE} -# Construct robust confidence intervals for Delta^{SDNB}(M) -# for first post-treatment period -BC_DeltaSDNB_RobustResults = createSensitivityResults( # - betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - Mvec = seq(from = 0, to = 0.3, by = 0.1), - biasDirection = "negative") -BC_DeltaSDNB_SensitivityPlot = createSensitivityPlot( # - robustResults = BC_DeltaSDNB_RobustResults, - originalResults = BC_OriginalResults) -BC_DeltaSDNB_SensitivityPlot -``` - -```{r, echo = FALSE} -createSensitivityPlot(robustResults = BC_DeltaSDNB_RobustResults, - originalResults = BC_OriginalResults) -``` - -# Sensitivity Analysis: Effect of Public Sector Bargaining Laws - -## Preliminaries - -The included dataframe `LWdata_EventStudy` contains the estimated event study coefficients and estimated variance-covariance matrix for the baseline estimates for female employment based on the event study specification of @LovenheimWillen. As discussed in Section 6.2 of @RambachanRoth, the authors use the American Community Survey to study the impact of public-sector duty-to-bargain (DTB) laws, which strengthen teachers' unions, on adult labor market outcomes. Exploiting the differential timing of the passage of DTB laws across states, the authors estimate the event study specification -$$ -Y_{sct} = \sum_{r=-11}^{21} D_{scr} \beta_{r} + X_{sct}^\prime \gamma + \lambda_{ct} + \phi_s + \epsilon_{sct}, -$$ -where $Y_{sct}$ is the average employment for a cohort of students born in state $s$ in cohort $c$ in ACS calendar year $t$ and $D_{scr}$ is an indicator for whether state $s$ passed a DTB law $r$ years before cohort $c$ turned age 18. `LWdata_EventStudy` contains the estimated event study coefficients $\hat \beta_{r}$, the associated variance-covariance matrix of these estimates and some additional information about the event study specification. The next code snippet loads the data. - -```{r} -data('LWdata_EventStudy', package = "HonestDiD") - -# Number of pre-periods -LW_numPrePeriods = length(LWdata_EventStudy$prePeriodIndices) -LW_numPostPeriods = length(LWdata_EventStudy$postPeriodIndices) -``` - -We provide additional details on how the results in `LWdata_EventStudy` are constructed in the section titled ``Details on replicating @LovenheimWillen'' below. - -## Conducting Sensitivity Analyses - -We now show how to use the package `HonestDiD` to conduct a formal sensitivity analysis using the estimated event study from @LovenheimWillen. - -In the next code snippet, we conduct the sensitivity analysis plotted in the right panel of Figure 6 in @RambachanRoth, which shows a sensitivity analysis using $\Delta = \Delta^{SD}(M)$ for the effect on female employment after 15 years of exposure to a duty-to-bargain law, $\theta = \tau_{15}$. - -```{r warning = FALSE, eval = FALSE} -# Create l_vec corresponding with 15 years of exposure -# Reference is -2 years of exposure, so want effect 17 pds later -LW_l_vec = basisVector(15 - (-2), LW_numPostPeriods) - -# Construct robust confidence intervals for Delta^{SD}(M) for 15 years of exposure -LW_DeltaSD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec, - Mvec = seq(from = 0, to = 0.04, by = 0.005)) -head(LW_DeltaSD_RobustResults) -``` - -```{r echo = FALSE} -LW_l_vec = basisVector(15 - (-2), LW_numPostPeriods) -head(LW_DeltaSD_RobustResults) -``` - -The function `createSensitivityPlot` can then be used to construct a sensitivity plot presenting these results. `createSensitivityPlot` takes two key inputs. The first input is the dataframe that is produced by `createSensitivityResults`, which contains the robust confidence intervals. The second input is a dataframe that contains the OLS confidence set for the parameter of interest. This dataframe can be constructed using the function `constructOriginalCS`. In the next code snippet, we show how these functions can be used to replicate the right panel of Figure 6 in @RambachanRoth. - -```{r warning = FALSE} -# Construct dataframe with OLS confidence interval for theta -LW_OriginalResults = constructOriginalCS(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec ) - -# Construct sensitivity plot -LW_DeltaSD_SensitivityPlot = createSensitivityPlot( # - robustResults = LW_DeltaSD_RobustResults, - originalResults = LW_OriginalResults) -LW_DeltaSD_SensitivityPlot -``` - -In the next code snippet, we conduct an additional sensitivity analysis. The exercise is similar to that shown above, except we now impose that any violations of parallel trends be (weakly) decreasing ($\Delta = \Delta^{SDD}(M)$). This incorporates the intuition from @LovenheimWillen that the pre-trends for women are likely due to secular trends in female labor supply that would have continued absent treatment. - -```{r warning = FALSE, eval = FALSE} -# Construct robust confidence intervals for Delta^{SDD}(M) -LW_DeltaSDD_RobustResults = createSensitivityResults( # - betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - monotonicityDirection = "decreasing", - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec, - Mvec = seq(from = 0, to = 0.04, by = 0.005)) - -# Construct sensitivity plot -LW_DeltaSDD_SensitivityPlot = createSensitivityPlot( # - robustResults = LW_DeltaSDD_RobustResults, - originalResults = LW_OriginalResults) -LW_DeltaSDD_SensitivityPlot -``` - -```{r echo = FALSE} -createSensitivityPlot(robustResults = LW_DeltaSDD_RobustResults, originalResults = LW_OriginalResults) -``` - -## Benchmarking $M$ - -The sensitivity plots discussed above show how our conclusions change as we allow for larger degrees of possible non-linearity in the violations of parallel trends, parameterized by $M$. @RambachanRoth discuss multiple ways for benchmarking $M$ in applied settings. - -One approach for benchmarking $M$ is to use context-specific knowledge about the magnitudes of potential confounds. For instance, in the context of @LovenheimWillen, one concern is differential changes in education quality that would have occurred even absent the passage of DTB laws. Section 6.2 of @RambachanRoth calibrates $M$ using estimates of the effect of teacher quality on adult employment from @CFR2014. In this calibration, a value of $M = 0.01$ corresponds with a change in slope of the diffferential trend corresponding with a change in teacher quality of $0.025$ standard deviations. - -In some cases, it may also be useful to benchmark $M$ -- which bounds the change in slope of the differential trend between consecutive periods -- using estimates of the largest change in slope in the pre-period. We provide the functions `DeltaSD_lowerBound_Mpre` and `DeltaSD_upperBound_Mpre`, which create one-sided confidence intervals for the largest change in slope in the pre-period. Values of $M$ below the values computed by `DeltaSD_lowerBound_Mpre` are rejected by the data (at the given significance level), and thus should be viewed with caution. On the other hand, we stress that data from the pre-period cannot, on its own, place an upper bound on the possible degree of non-linearity under the counterfactual in the post-periods. However, in some cases it may be useful to benchmark the assumed maximal degree of non-linearity $M$ in terms of the largest change in slope in the pre-period. These functions can also be used analogously to benchmark $M$ using event-studies for placebo groups. See the R documentation for additional details. - -Both functions require the user to specify the vector of estimated event study coefficients, the variance covariance matrix, the number of pre-periods and the desired size of the one-sided confidence intervals. They can be used as follows: - -```{r, eval = FALSE} -LW_lowerBound_M = DeltaSD_lowerBound_Mpre(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods) -LW_upperBound_M = DeltaSD_upperBound_Mpre(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods) -``` - -## Details on replicating @LovenheimWillen - -We now provide additional details on the construction of the event-study results from @LovenheimWillen used in the example above. This event study specification can be fully replicated using a dataframe provided in the package `HonestDiD`. Within the subdirectory `inst/extdata`, the package provides the stata dataset `LWdata_RawData.dta`. This contains the estimation sample for females that is used to estimate the above event study specification. This dataset is provided in the replication files for @LovenheimWillen and can be found in the subdirectory of the replication files, `Data files/Estimation samples/Estimation_FEMALE.dta`. The following code snippet shows how to reproduce the event study above using the provided data - -```{r eval = FALSE, eval = FALSE} -# Load in LWdata_RawData.dta -LWdata_RawData = haven::read_dta(system.file("extdata", "LWdata_RawData.dta", - package = "HonestDiD")) - -# Estimate event study using lfe package -EmpFemale.EventStudy = lfe::felm(emp ~ rtESV13 + rtESV14 + rtESV15 + - rtESV16 + rtESV17 + rtESV18 + - rtESV19 + rtESV110 + rtESV111 + # End Pre-periods - rtESV113 + rtESV114 + rtESV115 + - rtESV116 + rtESV117 + rtESV118 + - rtESV119 + rtESV120 + rtESV121 + - rtESV122 + rtESV123 + rtESV124 + - rtESV125 + rtESV126 + rtESV127 + - rtESV128 + rtESV129 + rtESV130 + - rtESV131 + rtESV132 + rtESV133 + - rtESV134 + rtESV135 + # End post-periods - yearsfcor + yearsflr + aveitc + fscontrol + - asian + black + hispanic + other | - factor(PUS_SURVEY_YEAR)*factor(BIRTHYEAR) + - factor(PUS_SURVEY_YEAR) + factor(BIRTHSTATE) | - 0 | BIRTHSTATE, - data = LWdata_RawData, - weights = LWdata_RawData$nobs) - -# Extract coefficients of regression associated with event study coefficients -coefIndex = which(grepl(x = dimnames(EmpFemale.EventStudy$coefficients)[[1]], - pattern = "rtESV")) -betahat = EmpFemale.EventStudy$beta[coefIndex, ] - -# Extract estimated variance-covariance matrix of event study coefficients -sigma = EmpFemale.EventStudy$clustervcv[coefIndex, coefIndex] - -#Rescale by 100 so that results will be in units of percentage points -betahat = 100 * betahat -sigma = 100^2 * sigma - -# Construct vector of event times and the scalar reference period -timeVec = c(seq(from = -11, to = -3, by = 1), seq(from = -1, to = 21, by = 1)) -referencePeriod = -2 -postPeriodIndices = which(timeVec > -2) -prePeriodIndices = which(timeVec < -2) - -# Construct standard errors associated with event study coefficients -stdErrors = summary(EmpFemale.EventStudy)$coefficients[coefIndex,2] - -# Create list containing objects produced by the event study -LWdata_EventStudy = list( - betahat = betahat, - sigma = sigma, - timeVec = timeVec, - referencePeriod = referencePeriod, - prePeriodIndices = prePeriodIndices, - postPeriodIndices = postPeriodIndices, - stdErrors = stdErrors -) -``` - -\newpage -# References diff --git a/vignettes/HonestDiD_Example.pdf b/vignettes/HonestDiD_Example.pdf deleted file mode 100644 index 7d3ec43..0000000 Binary files a/vignettes/HonestDiD_Example.pdf and /dev/null differ diff --git a/vignettes/bibliography.bib b/vignettes/bibliography.bib deleted file mode 100644 index 01d73a4..0000000 --- a/vignettes/bibliography.bib +++ /dev/null @@ -1,50 +0,0 @@ -@techreport{RambachanRoth, - title = "An Honest Approach to Parallel Trends", - author = "Ashesh Rambachan and Jonathan Roth", - year={2021} -} - -@article{LovenheimWillen, - title = {The Long-Run Effects of Teacher Collective Bargaining}, - journal = {American Economic Journal: Economic Policy}, - author = {Lovenheim, Michael F. and Willen, Alexander}, - volume={11}, - number={3}, - year={2019}, - pages={292-324}, -} - -@article{BenzartiCarloni, - title={Who Really Benefits from Consumption Tax Cuts? Evidence from a Large VAT Reform in France}, - author={Youssef Benzarti and Dorian Carloni}, - journal = {American Economic Journal: Economic Policy}, - volume={11}, - number={1}, - pages={38-63}, - year={2019} -} - -@techreport{AndrewsRothPakes, - title={Inference for Linear Conditional Moment Inequalities}, - author={Isaiah Andrews and Jonathan Roth and Ariel Pakes}, - year={2019} -} - - -@article{CFR2014, - title = {Measuring the {Impacts} of {Teachers} {II}: {Teacher} {Value}-{Added} and {Student} {Outcomes} in {Adulthood}}, - volume = {104}, - issn = {0002-8282}, - shorttitle = {Measuring the {Impacts} of {Teachers} {II}}, - url = {http://pubs.aeaweb.org/doi/10.1257/aer.104.9.2633}, - doi = {10.1257/aer.104.9.2633}, - language = {en}, - number = {9}, - urldate = {2019-11-07}, - journal = {American Economic Review}, - author = {Chetty, Raj and Friedman, John N. and Rockoff, Jonah E.}, - month = sep, - year = {2014}, - pages = {2633--2679}, - file = {Chetty et al. - 2014 - Measuring the Impacts of Teachers II Teacher Valu.pdf:/Users/jonathanroth/Zotero/storage/WDHMLCV9/Chetty et al. - 2014 - Measuring the Impacts of Teachers II Teacher Valu.pdf:application/pdf} -} diff --git a/vignettes/precompute.R b/vignettes/precompute.R deleted file mode 100644 index f3dcbf2..0000000 --- a/vignettes/precompute.R +++ /dev/null @@ -1,167 +0,0 @@ -library(HonestDiD) - -# data('VignetteResults', package="HonestDiD") -data('BCdata_EventStudy', package="HonestDiD") - -# Number of pre-periods -BC_numPrePeriods = length(BCdata_EventStudy$prePeriodIndices) -BC_numPostPeriods = length(BCdata_EventStudy$postPeriodIndices) - -# Create l_vec to define the parameter of interest, the first post-treatment period. -BC_l_vec = basisVector(index = 1, size = BC_numPostPeriods) - -# Construct robust confidence intervals for Delta^{SDRM}(Mbar) for first post-treatment period. -# We specify 100 gridPoints over [-1, 1] for the underlying test inversion to construct the robust confidence set. -# Users may wish to leave this at the default values. -BC_DeltaSDRM_RobustResults = - createSensitivityResults_relativeMagnitudes(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - bound = "deviation from linear trend", - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - Mbarvec = seq(from = 0, to = 2, by = 0.5), - gridPoints = 100, grid.lb = -1, grid.ub = 1) - -head(BC_DeltaSDRM_RobustResults) - -# Construct dataframe with OLS confidence interval for theta. -BC_OriginalResults = constructOriginalCS(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec ) - -# Construct sensitivity plot. -BC_DeltaSDRM_SensitivityPlot = - createSensitivityPlot_relativeMagnitudes(robustResults = BC_DeltaSDRM_RobustResults, - originalResults = BC_OriginalResults) - -BC_DeltaSDRM_SensitivityPlot - -# Construct robust confidence intervals for Delta^{SDNB}(M) for first post-treatment period -BC_DeltaSDNB_RobustResults = createSensitivityResults(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - Mvec = seq(from = 0, to = 0.3, by = 0.1), - biasDirection = "negative") -BC_DeltaSDNB_SensitivityPlot = createSensitivityPlot(robustResults = BC_DeltaSDNB_RobustResults, - originalResults = BC_OriginalResults) -BC_DeltaSDNB_SensitivityPlot - -data('LWdata_EventStudy', package = "HonestDiD") - -# Number of pre-periods -LW_numPrePeriods = length(LWdata_EventStudy$prePeriodIndices) -LW_numPostPeriods = length(LWdata_EventStudy$postPeriodIndices) - -# Create l_vec corresponding with 15 years of exposure -# Reference is -2 years of exposure, so want effect 17 pds later -LW_l_vec = basisVector(15 - (-2), LW_numPostPeriods) - -# Construct robust confidence intervals for Delta^{SD}(M) for 15 years of exposure -LW_DeltaSD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec, - Mvec = seq(from = 0, to = 0.04, by = 0.005)) -head(LW_DeltaSD_RobustResults) - -# Construct dataframe with OLS confidence interval for theta -LW_OriginalResults = constructOriginalCS(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec ) - -# Construct sensitivity plot -LW_DeltaSD_SensitivityPlot = createSensitivityPlot(robustResults = LW_DeltaSD_RobustResults, - originalResults = LW_OriginalResults) -LW_DeltaSD_SensitivityPlot - -# Construct robust confidence intervals for Delta^{SDD}(M) -LW_DeltaSDD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - monotonicityDirection = "decreasing", - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec, - Mvec = seq(from = 0, to = 0.04, by = 0.005)) - -# Construct sensitivity plot -LW_DeltaSDD_SensitivityPlot = createSensitivityPlot(robustResults = LW_DeltaSDD_RobustResults, - originalResults = LW_OriginalResults) -LW_DeltaSDD_SensitivityPlot - -LW_lowerBound_M = DeltaSD_lowerBound_Mpre(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods) -LW_upperBound_M = DeltaSD_upperBound_Mpre(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods) - -# Load in LWdata_RawData.dta -LWdata_RawData = haven::read_dta(system.file("extdata", "LWdata_RawData.dta", - package = "HonestDiD")) - -# Estimate event study using lfe package -EmpFemale.EventStudy = lfe::felm(emp ~ rtESV13 + rtESV14 + rtESV15 + - rtESV16 + rtESV17 + rtESV18 + - rtESV19 + rtESV110 + rtESV111 + # End Pre-periods - rtESV113 + rtESV114 + rtESV115 + - rtESV116 + rtESV117 + rtESV118 + - rtESV119 + rtESV120 + rtESV121 + - rtESV122 + rtESV123 + rtESV124 + - rtESV125 + rtESV126 + rtESV127 + - rtESV128 + rtESV129 + rtESV130 + - rtESV131 + rtESV132 + rtESV133 + - rtESV134 + rtESV135 + # End post-periods - yearsfcor + yearsflr + aveitc + fscontrol + - asian + black + hispanic + other | - factor(PUS_SURVEY_YEAR)*factor(BIRTHYEAR) + - factor(PUS_SURVEY_YEAR) + factor(BIRTHSTATE) | - 0 | BIRTHSTATE, - data = LWdata_RawData, - weights = LWdata_RawData$nobs) - -# Extract coefficients of regression associated with event study coefficients -coefIndex = which(grepl(x = dimnames(EmpFemale.EventStudy$coefficients)[[1]], - pattern = "rtESV")) -betahat = EmpFemale.EventStudy$beta[coefIndex, ] - -# Extract estimated variance-covariance matrix of event study coefficients -sigma = EmpFemale.EventStudy$clustervcv[coefIndex, coefIndex] - -#Rescale by 100 so that results will be in units of percentage points -betahat = 100 * betahat -sigma = 100^2 * sigma - -# Construct vector of event times and the scalar reference period -timeVec = c(seq(from = -11, to = -3, by = 1), seq(from = -1, to = 21, by = 1)) -referencePeriod = -2 -postPeriodIndices = which(timeVec > -2) -prePeriodIndices = which(timeVec < -2) - -# Construct standard errors associated with event study coefficients -stdErrors = summary(EmpFemale.EventStudy)$coefficients[coefIndex,2] - -# Create list containing objects produced by the event study -LWdata_EventStudy = list( - betahat = betahat, - sigma = sigma, - timeVec = timeVec, - referencePeriod = referencePeriod, - prePeriodIndices = prePeriodIndices, - postPeriodIndices = postPeriodIndices, - stdErrors = stdErrors -) - -VignetteResults <- list(BC_DeltaSDRM_RobustResults = BC_DeltaSDRM_RobustResults, - BC_OriginalResults = BC_OriginalResults, - BC_DeltaSDNB_RobustResults = BC_DeltaSDNB_RobustResults, - LW_DeltaSD_RobustResults = LW_DeltaSD_RobustResults, - LW_DeltaSDD_RobustResults = LW_DeltaSDD_RobustResults) -save(VignetteResults, file="data/VignetteResults.rda")