Skip to content

Commit 7bc6b19

Browse files
authored
Merge pull request #113 from PolicyEngine/MaxGhenis/issue111
Add methodology paper
2 parents 728cc41 + 97f4e66 commit 7bc6b19

26 files changed

Lines changed: 1201 additions & 1 deletion

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: all format test install download upload docker documentation data clean build
1+
.PHONY: all format test install download upload docker documentation data clean build paper clean-paper
22

33
all: data test
44

@@ -49,3 +49,15 @@ build:
4949

5050
publish:
5151
twine upload dist/*
52+
53+
paper: paper/main.pdf
54+
55+
paper/main.pdf: $(wildcard paper/sections/**/*.tex) $(wildcard paper/bibliography/*.bib) paper/main.tex paper/macros.tex
56+
cd paper && \
57+
BIBINPUTS=./bibliography pdflatex main && \
58+
BIBINPUTS=./bibliography bibtex main && \
59+
pdflatex main && \
60+
pdflatex main
61+
62+
clean-paper:
63+
rm -f paper/*.aux paper/*.bbl paper/*.blg paper/*.log paper/*.out paper/*.toc paper/main.pdf paper/sections/**/*.aux

README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,50 @@
11
# PolicyEngine US Data
2+
3+
## Installation
4+
5+
```bash
6+
pip install policyengine-us-data
7+
```
8+
9+
## Building the Paper
10+
11+
### Prerequisites
12+
13+
The paper requires a LaTeX distribution (e.g., TeXLive or MiKTeX) with the following packages:
14+
15+
- graphicx (for figures)
16+
- amsmath (for mathematical notation)
17+
- natbib (for bibliography management)
18+
- hyperref (for PDF links)
19+
- booktabs (for tables)
20+
- geometry (for page layout)
21+
- microtype (for typography)
22+
- xcolor (for colored links)
23+
24+
On Ubuntu/Debian, you can install these with:
25+
26+
```bash
27+
sudo apt-get install texlive-latex-base texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended
28+
```
29+
30+
On macOS with Homebrew:
31+
32+
```bash
33+
brew install --cask mactex
34+
```
35+
36+
### Building
37+
38+
To build the paper:
39+
40+
```bash
41+
make paper
42+
```
43+
44+
To clean LaTeX build files:
45+
46+
```bash
47+
make clean-paper
48+
```
49+
50+
The output PDF will be at `paper/main.pdf`.

changelog_entry.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- Paper on methodology.

paper/.gitignore

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
## Core latex/pdflatex auxiliary files:
2+
*.aux
3+
*.lof
4+
*.log
5+
*.lot
6+
*.fls
7+
*.out
8+
*.toc
9+
*.fmt
10+
*.fot
11+
*.cb
12+
*.cb2
13+
.*.lb
14+
15+
## Generated if empty string is given at "Please type another file name for output:"
16+
.pdf
17+
18+
## Bibliography auxiliary files (bibtex/biblatex/biber):
19+
*.bbl
20+
*.bcf
21+
*.blg
22+
*-blx.aux
23+
*-blx.bib
24+
*.run.xml
25+
26+
## Build tool auxiliary files:
27+
*.fdb_latexmk
28+
*.synctex
29+
*.synctex(busy)
30+
*.synctex.gz
31+
*.synctex.gz(busy)
32+
*.pdfsync

paper/bibliography/references.bib

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
@techreport{cbo2018,
2+
title = {An Overview of CBO's Microsimulation Tax Model},
3+
author = {{Congressional Budget Office}},
4+
institution = {Congressional Budget Office},
5+
year = {2018},
6+
url = {https://www.cbo.gov/publication/54096}
7+
}
8+
9+
@techreport{jct2023,
10+
title = {Overview of JCT Revenue Estimating Methods},
11+
author = {{Joint Committee on Taxation}},
12+
institution = {Joint Committee on Taxation},
13+
number = {JCX-48-23},
14+
year = {2023},
15+
url = {https://www.jct.gov/publications/2023/jcx-48-23/}
16+
}
17+
18+
@techreport{ota2012,
19+
title = {Revenue Estimating Models at the U.S. Treasury Department},
20+
author = {{Office of Tax Analysis}},
21+
institution = {U.S. Department of the Treasury},
22+
number = {Technical Paper 12},
23+
year = {2012},
24+
url = {https://home.treasury.gov/system/files/131/TP-12.pdf}
25+
}
26+
27+
@article{saez2012,
28+
title = {The Elasticity of Taxable Income with Respect to Marginal Tax Rates: A Critical Review},
29+
author = {Saez, Emmanuel and Slemrod, Joel and Giertz, Seth H},
30+
journal = {Journal of Economic Literature},
31+
volume = {50},
32+
number = {1},
33+
pages = {3--50},
34+
year = {2012}
35+
}
36+
37+
@misc{tpc2022,
38+
title = {Brief Description of the Tax Model},
39+
author = {{Tax Policy Center}},
40+
year = {2022},
41+
url = {https://www.taxpolicycenter.org/resources/brief-description-tax-model},
42+
note = {Updated March 2022}
43+
}
44+
45+
@misc{itep2024,
46+
title = {ITEP Tax Model Overview},
47+
author = {{Institute on Taxation and Economic Policy}},
48+
year = {2024},
49+
url = {https://itep.org/itep-tax-model/}
50+
}
51+
52+
@misc{tf2024,
53+
title = {Overview of the Tax Foundation's Taxes and Growth Model},
54+
author = {{Tax Foundation}},
55+
year = {2024},
56+
url = {https://taxfoundation.org/research/all/federal/overview-tax-foundations-taxes-growth-model/}
57+
}
58+
59+
@misc{trim2024,
60+
title = {TRIM3 Project Documentation: Transfer Income Model, Version 3},
61+
author = {{Urban Institute}},
62+
year = {2024},
63+
url = {https://boreas.urban.org/documentation/input/Concepts%20and%20Procedures/Modifications%20to%20the%20Underlying%20Surveys.php}
64+
}
65+
66+
@misc{attis2024,
67+
title = {ATTIS Microsimulation Model},
68+
author = {{Urban Institute}},
69+
year = {2024},
70+
url = {https://www.urban.org/research-methods/attis-microsimulation-model}
71+
}
72+
73+
@misc{budgetlab2024,
74+
title = {Tax Microsimulation at The Budget Lab},
75+
author = {{Budget Lab}},
76+
institution = {Yale University},
77+
year = {2024},
78+
url = {https://budgetlab.yale.edu/research/tax-microsimulation-budget-lab}
79+
}
80+
81+
@misc{psl2024,
82+
title = {Tax-Data Documentation},
83+
author = {{Policy Simulation Library}},
84+
year = {2024},
85+
url = {https://github.com/PSLmodels/taxdata}
86+
}
87+
88+
@article{ohare2009,
89+
title = {Statistical Matching Using the Current Population Survey as the Donor: Techniques and Issues},
90+
author = {O'Hare, William P},
91+
journal = {National Tax Journal},
92+
volume = {62},
93+
number = {3},
94+
pages = {519--537},
95+
year = {2009}
96+
}
97+
98+
@techreport{piketty2018,
99+
title = {Distributional National Accounts: Methods and Estimates for the United States},
100+
author = {Piketty, Thomas and Saez, Emmanuel and Zucman, Gabriel},
101+
institution = {National Bureau of Economic Research},
102+
number = {w22945},
103+
year = {2018}
104+
}
105+
106+
@article{burkhauser2012,
107+
title = {Recent Trends in Top Income Shares in the United States: Reconciling Estimates from March CPS and IRS Tax Return Data},
108+
author = {Burkhauser, Richard V and Feng, Shuaizhang and Jenkins, Stephen P and Larrimore, Jeff},
109+
journal = {Review of Economics and Statistics},
110+
volume = {94},
111+
number = {2},
112+
pages = {371--388},
113+
year = {2012}
114+
}
115+
116+
@article{auerbach2018,
117+
title = {Macroeconomic Modeling of Tax Policy: A Comparison of Current Methodologies},
118+
author = {Auerbach, Alan J and Kotlikoff, Laurence J and Koehler, Darryl},
119+
journal = {National Tax Journal},
120+
volume = {71},
121+
number = {3},
122+
pages = {541--576},
123+
year = {2018}
124+
}
125+
126+
@techreport{bryant2023a,
127+
title = {General Description Booklet for the 2015 Public Use Tax File},
128+
author = {Bryant, Victoria},
129+
institution = {Statistics of Income Division, Internal Revenue Service},
130+
year = {2023},
131+
month = {February},
132+
type = {Technical Documentation},
133+
url = {https://drive.google.com/file/d/1WoTU70GEjYMO0KHsHvTTH0NwCc-kN5cE/view}
134+
}
135+
136+
@techreport{bryant2023b,
137+
title = {General Description Booklet for the 2015 Public Use Tax File Demographic File},
138+
author = {Bryant, Victoria},
139+
institution = {Statistics of Income Division, Internal Revenue Service},
140+
year = {2023},
141+
month = {February},
142+
type = {Technical Documentation},
143+
url = {https://drive.google.com/file/d/1WoTU70GEjYMO0KHsHvTTH0NwCc-kN5cE/view}
144+
}
145+
146+
@techreport{census2024,
147+
title = {Current Population Survey, 2024 Annual Social and Economic (ASEC) Supplement},
148+
author = {{U.S. Census Bureau}},
149+
institution = {U.S. Census Bureau},
150+
year = {2024},
151+
url = {https://www2.census.gov/programs-surveys/cps/datasets/2024/march/asec2024_ddl_pub_full.pdf}
152+
}
153+
154+
@article{meinshausen2006quantile,
155+
title = {Quantile regression forests},
156+
author = {Meinshausen, Nicolai and Ridgeway, Greg},
157+
journal = {Journal of machine learning research},
158+
volume = {7},
159+
number = {6},
160+
year = {2006}
161+
}
162+
163+
@misc{zillow2024quantile,
164+
title = {quantile-forest: Scikit-learn compatible quantile regression forests},
165+
author = {{Zillow Group}},
166+
year = {2024},
167+
howpublished = {\url{https://zillow.github.io/quantile-forest/}}
168+
}
169+
170+
@article{pytorch2019,
171+
title = {PyTorch: An Imperative Style, High-Performance Deep Learning Library},
172+
author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others},
173+
journal = {Advances in Neural Information Processing Systems},
174+
volume = {32},
175+
year = {2019}
176+
}
177+
178+
@techreport{woodruff2023survey,
179+
title = {Surveying the (loss) landscape: using machine learning to improve household survey accuracy},
180+
author = {Woodruff, Nikhil},
181+
institution = {University of Durham},
182+
year = {2023},
183+
month = {April},
184+
note = {Demonstrates superiority of machine learning approaches over traditional methods for survey enhancement through comprehensive benchmarking},
185+
url = {https://github.com/policyengine/survey-enhance/blob/main/docs/paper/project_paper.pdf}
186+
}

paper/figures/data_flow.png

205 KB
Loading

paper/figures/ecps_vs_cps_puf.png

64.3 KB
Loading

paper/macros.tex

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
% Custom commands and mathematics macros
2+
\newcommand{\policyengine}{\textsc{PolicyEngine}}
3+
\newcommand{\cps}{\textsc{CPS}}
4+
\newcommand{\puf}{\textsc{PUF}}

paper/main.pdf

402 KB
Binary file not shown.

paper/main.tex

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
\documentclass[12pt]{article}
2+
3+
\usepackage{graphicx}
4+
\usepackage{amsmath}
5+
\usepackage[round]{natbib} % Keep round option
6+
\usepackage{hyperref}
7+
\usepackage{booktabs}
8+
\usepackage{geometry}
9+
\usepackage{microtype}
10+
\usepackage{xcolor}
11+
12+
% Set citation style in preamble
13+
\bibpunct{(}{)}{;}{a}{,}{,} % Move here
14+
\setcitestyle{authoryear,round} % Move here
15+
16+
\input{macros}
17+
18+
\geometry{margin=1in}
19+
\hypersetup{
20+
colorlinks=true,
21+
linkcolor=blue,
22+
filecolor=magenta,
23+
urlcolor=blue,
24+
citecolor=blue,
25+
}
26+
27+
28+
\title{Enhancing Survey Microdata with Administrative Records: \\ A Novel Approach to Microsimulation Dataset Construction}
29+
% Define the \samethanks command
30+
\newcommand*\samethanks[1][\value{footnote}]{\footnotemark[#1]}
31+
32+
% Define authors with the same affiliation
33+
\author{
34+
Nikhil Woodruff\thanks{PolicyEngine} \and
35+
Max Ghenis\samethanks
36+
}
37+
\date{\today}
38+
39+
\begin{document}
40+
41+
\maketitle
42+
43+
\input{sections/abstract}
44+
\input{sections/introduction}
45+
\input{sections/background}
46+
\input{sections/data}
47+
\input{sections/methodology}
48+
\input{sections/results}
49+
\input{sections/discussion}
50+
\input{sections/conclusion}
51+
52+
\bibliographystyle{plainnat}
53+
\bibliography{./bibliography/references}
54+
55+
\end{document}

0 commit comments

Comments
 (0)