|
| 1 | +import pandas as pd |
| 2 | + |
| 3 | +# Per-ZCTA population dataset |
| 4 | +# ACS 5-year estimates, download URL: https://data.census.gov/cedsci/table?q=DP05%3A%20ACS%20DEMOGRAPHIC%20AND%20HOUSING%20ESTIMATES&g=0100000US%248600000&tid=ACSDP5Y2020.DP05 |
| 5 | + |
| 6 | +zcta = pd.read_csv( |
| 7 | + "zcta_2020_population.csv", |
| 8 | + low_memory=False, |
| 9 | + usecols=["DP05_0001E", "NAME"], |
| 10 | + skiprows=1, |
| 11 | + header=0, |
| 12 | + names=["population", "zcta"], |
| 13 | +) |
| 14 | +zcta.zcta = zcta.zcta.apply(lambda x: x.split(" ")[1]) |
| 15 | +zcta.zcta = zcta.zcta.astype(int) |
| 16 | +zcta.population = zcta.population.astype(int) |
| 17 | + |
| 18 | +# ZCTA-county dataset |
| 19 | +# 2020 ZCTA to County Relationship File, download URL: https://www.census.gov/geographies/reference-files/time-series/geo/relationship-files.html#zcta |
| 20 | +zcta_to_county = pd.read_csv( |
| 21 | + "zcta_2020_to_county_2020.csv", |
| 22 | + delimiter="|", |
| 23 | + usecols=["GEOID_ZCTA5_20", "NAMELSAD_COUNTY_20"], |
| 24 | + names=["zcta", "county"], |
| 25 | +) |
| 26 | +zcta_to_county = zcta_to_county.dropna() |
| 27 | +zcta_to_county.zcta = zcta_to_county.zcta.astype(int) |
| 28 | +# Some ZCTAs have more than one county - select a random one |
| 29 | +zcta_to_county = zcta_to_county.groupby("zcta").apply(lambda x: x.sample(1)) |
| 30 | + |
| 31 | +# ZIP code-ZCTA dataset |
| 32 | +# Download URL: https://udsmapper.org/zip-code-to-zcta-crosswalk/ |
| 33 | +zip_code = pd.read_csv( |
| 34 | + "zip_code_to_zcta.csv", |
| 35 | + usecols=["ZIP_CODE", "ZCTA", "STATE"], |
| 36 | + names=["zip_code", "zcta", "state"], |
| 37 | +) |
| 38 | +zip_code.zip_code = zip_code.zip_code.astype(int) |
| 39 | +zip_code = zip_code[zip_code.zcta != "No ZCTA"] |
| 40 | +zip_code.zcta = zip_code.zcta.astype(int) |
| 41 | +zip_code = zip_code[zip_code.zcta.isin(zcta.zcta)] |
| 42 | +zip_code = zip_code[zip_code.zcta.isin(zcta_to_county.zcta)] |
| 43 | + |
| 44 | +# ZCTAs have multiple ZIP codes - split each ZCTA population equally into its component ZIP codes |
| 45 | +zip_code["population"] = ( |
| 46 | + zcta.set_index("zcta").population[zip_code.zcta].values |
| 47 | + / zip_code.groupby("zcta").zip_code.count()[zip_code.zcta].values |
| 48 | +) |
| 49 | +zip_code["county"] = ( |
| 50 | + zcta_to_county.set_index("zcta").county[zip_code.zcta].values |
| 51 | +) |
| 52 | +zip_code.to_csv("zip_codes.csv", compression="gzip") |
0 commit comments