Skip to content

Commit 32e19d8

Browse files
Fix EITC calibration (#98)
* Add dropout * Add dropout and EITC calibration * Format * Add install catch * Change download folder * Flip order of US install and bump US * Add EITC targets * Update data links * Fix EITC returns are incorrectly calibrated #97 * Lower dropout rate to 5% * Update validation * Update data releases
1 parent 4866f7c commit 32e19d8

6 files changed

Lines changed: 92 additions & 54 deletions

File tree

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: minor
2+
changes:
3+
fixed:
4+
- EITC calibration.

docs/validation.ipynb

Lines changed: 47 additions & 47 deletions
Large diffs are not rendered by default.

policyengine_us_data/datasets/acs/acs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class ACS_2022(ACS):
111111
time_period = 2022
112112
file_path = STORAGE_FOLDER / "acs_2022.h5"
113113
census_acs = CensusACS_2022
114-
url = "release://PolicyEngine/policyengine-us-data/1.9.0/acs_2022.h5"
114+
url = "release://PolicyEngine/policyengine-us-data/1.10.0/acs_2022.h5"
115115

116116

117117
if __name__ == "__main__":

policyengine_us_data/datasets/cps/cps.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ def generate(self):
5454
raw_data.close()
5555
self.save_dataset(cps)
5656

57+
add_takeup(self)
58+
5759

5860
def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
5961
cps["tenure_type"] = household.H_TENURE.map(
@@ -114,6 +116,30 @@ def add_rent(self, cps: h5py.File, person: DataFrame, household: DataFrame):
114116
cps["real_estate_taxes"][mask] = imputed_values["real_estate_taxes"]
115117

116118

119+
def add_takeup(self):
120+
data = self.load_dataset()
121+
122+
from policyengine_us import system, Microsimulation
123+
124+
baseline = Microsimulation(dataset=self)
125+
parameters = baseline.tax_benefit_system.parameters(self.time_period)
126+
generator = np.random.default_rng(seed=100)
127+
128+
snap_takeup_rate = parameters.gov.usda.snap.takeup_rate
129+
data["takes_up_snap_if_eligible"] = (
130+
generator.random(len(data["spm_unit_id"])) < snap_takeup_rate
131+
)
132+
133+
eitc_takeup_rates = parameters.gov.irs.credits.eitc.takeup
134+
eitc_child_count = baseline.calculate("eitc_child_count").values
135+
eitc_takeup_rate = eitc_takeup_rates.calc(eitc_child_count)
136+
data["takes_up_eitc"] = (
137+
generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
138+
)
139+
140+
self.save_dataset(data)
141+
142+
117143
def uprate_cps_data(data, from_period, to_period):
118144
uprating = create_policyengine_uprating_factors_table()
119145
for variable in uprating.index.unique():
@@ -622,7 +648,7 @@ class CPS_2024(CPS):
622648
label = "CPS 2024 (2022-based)"
623649
file_path = STORAGE_FOLDER / "cps_2024.h5"
624650
time_period = 2024
625-
url = "release://policyengine/policyengine-us-data/1.9.0/cps_2024.h5"
651+
url = "release://policyengine/policyengine-us-data/1.10.0/cps_2024.h5"
626652

627653

628654
class PooledCPS(Dataset):
@@ -681,7 +707,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
681707
CPS_2023,
682708
]
683709
time_period = 2023
684-
url = "release://PolicyEngine/policyengine-us-data/1.9.0/pooled_3_year_cps_2023.h5"
710+
url = "release://PolicyEngine/policyengine-us-data/1.10.0/pooled_3_year_cps_2023.h5"
685711

686712

687713
if __name__ == "__main__":

policyengine_us_data/datasets/cps/enhanced_cps.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def reweight(
2525
original_weights,
2626
loss_matrix,
2727
targets_array,
28-
dropout_rate=0.1,
28+
dropout_rate=0.05,
2929
):
3030
target_names = np.array(loss_matrix.columns)
3131
loss_matrix = torch.tensor(loss_matrix.values, dtype=torch.float32)
@@ -189,7 +189,7 @@ class EnhancedCPS_2024(EnhancedCPS):
189189
name = "enhanced_cps_2024"
190190
label = "Enhanced CPS 2024"
191191
file_path = STORAGE_FOLDER / "enhanced_cps_2024.h5"
192-
url = "release://policyengine/policyengine-us-data/1.9.0/enhanced_cps_2024.h5"
192+
url = "release://policyengine/policyengine-us-data/1.10.0/enhanced_cps_2024.h5"
193193

194194

195195
if __name__ == "__main__":

policyengine_us_data/utils/loss.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,16 @@ def build_loss_matrix(dataset: type, time_period):
208208
)
209209
eitc_eligible_children = sim.calculate("eitc_child_count").values
210210
eitc = sim.calculate("eitc").values
211+
if row["count_children"] < 2:
212+
meets_child_criteria = (
213+
eitc_eligible_children == row["count_children"]
214+
)
215+
else:
216+
meets_child_criteria = (
217+
eitc_eligible_children >= row["count_children"]
218+
)
211219
loss_matrix[returns_label] = sim.map_result(
212-
(eitc > 0) * (eitc_eligible_children == row["count_children"]),
220+
(eitc > 0) * meets_child_criteria,
213221
"tax_unit",
214222
"household",
215223
)
@@ -219,7 +227,7 @@ def build_loss_matrix(dataset: type, time_period):
219227
f"irs/eitc/spending/count_children_{row['count_children']}"
220228
)
221229
loss_matrix[spending_label] = sim.map_result(
222-
eitc * (eitc_eligible_children == row["count_children"]),
230+
eitc * meets_child_criteria,
223231
"tax_unit",
224232
"household",
225233
)

0 commit comments

Comments
 (0)