Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ repos:
args: [--fix]
- id: ruff-format

# Type-check with mypy --strict (config lives in pyproject [tool.mypy]).
# Pinned to the same major as CI's `mypy<2`. httpx/anyio are installed into
# the isolated hook env so their types resolve — without them mypy falls back
# to `Any` and mis-reports (the runtime deps aren't in the hook's venv).
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.20.2
hooks:
- id: mypy
pass_filenames: false
additional_dependencies: [httpx, anyio]

# Strip cell outputs + execution_count from notebooks on commit so the
# diff is the source, not the rendered run. Demos still execute fine
# locally; clean commits keep PRs reviewable and avoid quota/timestamp
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
Like the original R version
[`dataRetrieval`](https://github.com/DOI-USGS/dataRetrieval), it retrieves major
U.S. Geological Survey (USGS) hydrology data types available on the Web, as well
as data from the Water Quality Portal (WQP) and Network Linked Data Index
(NLDI).
as data from the Water Quality Portal (WQP), the National Ground-Water
Monitoring Network (NGWMN), and the Network Linked Data Index (NLDI).

Check the [NEWS](NEWS.md) for all updates and announcements.

Expand Down Expand Up @@ -85,7 +85,7 @@ stream sites in Maryland:
```python
# Get monitoring location information
df, metadata = waterdata.get_monitoring_locations(
state_name='Maryland',
state='Maryland', # full name, postal code ('MD'), or FIPS ('24')
site_type_code='ST' # Stream sites
)

Expand Down
22 changes: 21 additions & 1 deletion dataretrieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

A failed request raises a subclass of :class:`dataretrieval.DataRetrievalError`
(the taxonomy lives in ``dataretrieval.exceptions``); connection-level failures
(timeouts, DNS) are wrapped as :class:`dataretrieval.NetworkError`.
(timeouts, DNS) are wrapped as :class:`dataretrieval.NetworkError`. A large
request interrupted mid-stream raises :class:`dataretrieval.ChunkInterrupted`,
whose ``.call.resume()`` continues from the work already completed.
"""

from importlib.metadata import PackageNotFoundError, version
Expand All @@ -42,9 +44,22 @@
URLTooLong,
)

# Resumable chunk-interruption exceptions. They are defined in
# ``dataretrieval.ogc.chunking`` rather than ``dataretrieval.exceptions``
# because they carry pandas/httpx state and a resumable ``ChunkedCall`` handle,
# which would pull heavy dependencies into the lightweight exceptions module.
# Surfaced here so callers get a stable public path:
# ``from dataretrieval import ChunkInterrupted``.
from dataretrieval.ogc.chunking import (
ChunkInterrupted,
QuotaExhausted,
ServiceInterrupted,
)

from . import (
exceptions,
nadp,
ngwmn,
nwis,
samples,
streamstats,
Expand All @@ -56,6 +71,7 @@
__all__ = [
# service modules
"nadp",
"ngwmn",
"nwis",
"samples",
"streamstats",
Expand All @@ -75,5 +91,9 @@
"TransientError",
"URLTooLong",
"Unchunkable",
# resumable chunk-interruption exceptions (defined in ogc.chunking)
"ChunkInterrupted",
"QuotaExhausted",
"ServiceInterrupted",
"__version__",
]
75 changes: 73 additions & 2 deletions dataretrieval/codes/states.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
"""State code lookups keyed by full state name.
"""State code lookups and normalization, keyed by full state name.

``state_codes`` maps each state name to its two-letter postal abbreviation
(e.g. ``"Alabama": "al"``); ``fips_codes`` maps it to its two-digit FIPS
code (e.g. ``"Alabama": "01"``).
code (e.g. ``"Alabama": "01"``). :func:`to_state` normalizes a state
identifier -- a full name, postal code, or two-digit / ``US:``-prefixed FIPS
code (or an iterable of them) -- to a chosen representation, raising
``ValueError`` on an unrecognized value. Coverage is the 50 states plus the
District of Columbia.
"""

from __future__ import annotations

from collections.abc import Iterable

state_codes = {
"Alabama": "al",
"Alaska": "ak",
Expand Down Expand Up @@ -112,3 +120,66 @@
"Wisconsin": "55",
"Wyoming": "56",
}

# Reverse lookups (built once): postal code -> name, FIPS code -> name, and a
# case-insensitive full-name index. ``state_codes`` and ``fips_codes`` share the
# same keys, so any name resolved here is valid in both.
_name_by_postal = {code: name for name, code in state_codes.items()}
_name_by_fips = {fips: name for name, fips in fips_codes.items()}
_name_by_lower = {name.lower(): name for name in state_codes}


def to_state(value: str | Iterable[str], to: str = "name") -> str | list[str]:
"""Normalize a US state/territory identifier to a chosen representation.

``value`` may be given as a full name (``"Wisconsin"``), a two-letter
postal code (``"WI"``), a two-digit ANSI/FIPS code (``"55"``), or a
prefixed FIPS code (``"US:55"``). The encodings are unambiguous: a value
prefixed ``US:`` or all-digits is a FIPS code, exactly two letters is a
postal code, anything else is matched (case-insensitively) as a full name.
An iterable of identifiers is resolved element-wise to a list.

``to`` selects the output representation:

* ``"name"`` -> full name, e.g. ``"Wisconsin"``
* ``"postal"`` -> uppercase two-letter code, e.g. ``"WI"``
* ``"fips"`` -> two-digit ANSI/FIPS code, e.g. ``"55"``
* ``"fips_us"`` -> ``"US:"`` + FIPS code, e.g. ``"US:55"``

Coverage is the 50 states plus the District of Columbia. A ``value`` that
isn't a recognized state in one of those encodings raises ``ValueError``
(so a typo fails fast rather than silently matching nothing).
"""
if isinstance(value, str):
return _to_state_one(value, to)
return [_to_state_one(v, to) for v in value]


def _to_state_one(value: str, to: str) -> str:
"""Resolve a single state identifier; see :func:`to_state`."""
s = value.strip()
if s[:3].upper() == "US:": # prefixed FIPS, e.g. "US:55"
name = _name_by_fips.get(s[3:].strip().zfill(2))
elif s.isdigit(): # bare FIPS, e.g. "55"
name = _name_by_fips.get(s.zfill(2))
elif len(s) == 2 and s.isalpha(): # postal, e.g. "WI"
name = _name_by_postal.get(s.lower())
else: # full name (case-insensitive)
name = _name_by_lower.get(s.lower())

if name is None:
raise ValueError(
f"{value!r} is not a recognized US state or the District of "
f'Columbia. Provide a full name ("Wisconsin"), a two-letter postal '
f'code ("WI"), or a two-digit ANSI/FIPS code ("55").'
)

if to == "name":
return name
if to == "postal":
return state_codes[name].upper()
if to == "fips":
return fips_codes[name]
if to == "fips_us":
return f"US:{fips_codes[name]}"
raise ValueError(f"to must be 'name', 'postal', 'fips', or 'fips_us'; got {to!r}")
Loading