Skip to content

Commit

Permalink
Fix EITC calibration (#98)
Browse files Browse the repository at this point in the history
* Add dropout

* Add dropout and EITC calibration

* Format

* Add install catch

* Change download folder

* Flip order of US install and bump US

* Add EITC targets

* Update data links

* Fix EITC returns are incorrectly calibrated #97

* Lower dropout rate to 5%

* Update validation

* Update data releases
  • Loading branch information
nikhilwoodruff authored Oct 8, 2024
1 parent 4866f7c commit 32e19d8
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 54 deletions.
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
fixed:
- EITC calibration.
94 changes: 47 additions & 47 deletions docs/validation.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion policyengine_us_data/datasets/acs/acs.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class ACS_2022(ACS):
time_period = 2022
file_path = STORAGE_FOLDER / "acs_2022.h5"
census_acs = CensusACS_2022
url = "release://PolicyEngine/policyengine-us-data/1.9.0/acs_2022.h5"
url = "release://PolicyEngine/policyengine-us-data/1.10.0/acs_2022.h5"


if __name__ == "__main__":
Expand Down
30 changes: 28 additions & 2 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def generate(self):
raw_data.close()
self.save_dataset(cps)

add_takeup(self)


def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
cps["tenure_type"] = household.H_TENURE.map(
Expand Down Expand Up @@ -114,6 +116,30 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
cps["real_estate_taxes"][mask] = imputed_values["real_estate_taxes"]


def add_takeup(self):
data = self.load_dataset()

from policyengine_us import system, Microsimulation

baseline = Microsimulation(dataset=self)
parameters = baseline.tax_benefit_system.parameters(self.time_period)
generator = np.random.default_rng(seed=100)

snap_takeup_rate = parameters.gov.usda.snap.takeup_rate
data["takes_up_snap_if_eligible"] = (
generator.random(len(data["spm_unit_id"])) < snap_takeup_rate
)

eitc_takeup_rates = parameters.gov.irs.credits.eitc.takeup
eitc_child_count = baseline.calculate("eitc_child_count").values
eitc_takeup_rate = eitc_takeup_rates.calc(eitc_child_count)
data["takes_up_eitc"] = (
generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
)

self.save_dataset(data)


def uprate_cps_data(data, from_period, to_period):
uprating = create_policyengine_uprating_factors_table()
for variable in uprating.index.unique():
Expand Down Expand Up @@ -622,7 +648,7 @@ class CPS_2024(CPS):
label = "CPS 2024 (2022-based)"
file_path = STORAGE_FOLDER / "cps_2024.h5"
time_period = 2024
url = "release://policyengine/policyengine-us-data/1.9.0/cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.10.0/cps_2024.h5"


class PooledCPS(Dataset):
Expand Down Expand Up @@ -681,7 +707,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
CPS_2023,
]
time_period = 2023
url = "release://PolicyEngine/policyengine-us-data/1.9.0/pooled_3_year_cps_2023.h5"
url = "release://PolicyEngine/policyengine-us-data/1.10.0/pooled_3_year_cps_2023.h5"


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions policyengine_us_data/datasets/cps/enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def reweight(
original_weights,
loss_matrix,
targets_array,
dropout_rate=0.1,
dropout_rate=0.05,
):
target_names = np.array(loss_matrix.columns)
loss_matrix = torch.tensor(loss_matrix.values, dtype=torch.float32)
Expand Down Expand Up @@ -189,7 +189,7 @@ class EnhancedCPS_2024(EnhancedCPS):
name = "enhanced_cps_2024"
label = "Enhanced CPS 2024"
file_path = STORAGE_FOLDER / "enhanced_cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.9.0/enhanced_cps_2024.h5"
url = "release://policyengine/policyengine-us-data/1.10.0/enhanced_cps_2024.h5"


if __name__ == "__main__":
Expand Down
12 changes: 10 additions & 2 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,16 @@ def build_loss_matrix(dataset: type, time_period):
)
eitc_eligible_children = sim.calculate("eitc_child_count").values
eitc = sim.calculate("eitc").values
if row["count_children"] < 2:
meets_child_criteria = (
eitc_eligible_children == row["count_children"]
)
else:
meets_child_criteria = (
eitc_eligible_children >= row["count_children"]
)
loss_matrix[returns_label] = sim.map_result(
(eitc > 0) * (eitc_eligible_children == row["count_children"]),
(eitc > 0) * meets_child_criteria,
"tax_unit",
"household",
)
Expand All @@ -219,7 +227,7 @@ def build_loss_matrix(dataset: type, time_period):
f"irs/eitc/spending/count_children_{row['count_children']}"
)
loss_matrix[spending_label] = sim.map_result(
eitc * (eitc_eligible_children == row["count_children"]),
eitc * meets_child_criteria,
"tax_unit",
"household",
)
Expand Down

0 comments on commit 32e19d8

Please sign in to comment.