diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3c5cf390..0813f1fe 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,6 +27,17 @@ repos:
         args: [--fix]
       - id: ruff-format
 
+  # Type-check with mypy --strict (config lives in pyproject [tool.mypy]).
+  # Pinned to the same major as CI's `mypy<2`. httpx/anyio are installed into
+  # the isolated hook env so their types resolve — without them mypy falls back
+  # to `Any` and mis-reports (the runtime deps aren't in the hook's venv).
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.20.2
+    hooks:
+      - id: mypy
+        pass_filenames: false
+        additional_dependencies: [httpx, anyio]
+
   # Strip cell outputs + execution_count from notebooks on commit so the
   # diff is the source, not the rendered run. Demos still execute fine
   # locally; clean commits keep PRs reviewable and avoid quota/timestamp
diff --git a/README.md b/README.md
index c1e8b6fe..d651be6c 100644
--- a/README.md
+++ b/README.md
@@ -10,8 +10,8 @@
 Like the original R version
 [`dataRetrieval`](https://github.com/DOI-USGS/dataRetrieval), it retrieves major
 U.S. Geological Survey (USGS) hydrology data types available on the Web, as well
-as data from the Water Quality Portal (WQP) and Network Linked Data Index
-(NLDI).
+as data from the Water Quality Portal (WQP), the National Ground-Water
+Monitoring Network (NGWMN), and the Network Linked Data Index (NLDI).
 
 Check the [NEWS](NEWS.md) for all updates and announcements.
 
@@ -85,7 +85,7 @@ stream sites in Maryland:
 ```python
 # Get monitoring location information
 df, metadata = waterdata.get_monitoring_locations(
-    state_name='Maryland',
+    state='Maryland',  # full name, postal code ('MD'), or FIPS ('24')
     site_type_code='ST'  # Stream sites
 )
 
diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py
index 9dfb1991..45bae98d 100644
--- a/dataretrieval/__init__.py
+++ b/dataretrieval/__init__.py
@@ -19,7 +19,9 @@
 
 A failed request raises a subclass of :class:`dataretrieval.DataRetrievalError`
 (the taxonomy lives in ``dataretrieval.exceptions``); connection-level failures
-(timeouts, DNS) are wrapped as :class:`dataretrieval.NetworkError`.
+(timeouts, DNS) are wrapped as :class:`dataretrieval.NetworkError`. A large
+request interrupted mid-stream raises :class:`dataretrieval.ChunkInterrupted`,
+whose ``.call.resume()`` continues from the work already completed.
 """
 
 from importlib.metadata import PackageNotFoundError, version
@@ -42,9 +44,22 @@
     URLTooLong,
 )
 
+# Resumable chunk-interruption exceptions. They are defined in
+# ``dataretrieval.ogc.chunking`` rather than ``dataretrieval.exceptions``
+# because they carry pandas/httpx state and a resumable ``ChunkedCall`` handle,
+# which would pull heavy dependencies into the lightweight exceptions module.
+# Surfaced here so callers get a stable public path:
+# ``from dataretrieval import ChunkInterrupted``.
+from dataretrieval.ogc.chunking import (
+    ChunkInterrupted,
+    QuotaExhausted,
+    ServiceInterrupted,
+)
+
 from . import (
     exceptions,
     nadp,
+    ngwmn,
     nwis,
     samples,
     streamstats,
@@ -56,6 +71,7 @@
 __all__ = [
     # service modules
     "nadp",
+    "ngwmn",
     "nwis",
     "samples",
     "streamstats",
@@ -75,5 +91,9 @@
     "TransientError",
     "URLTooLong",
     "Unchunkable",
+    # resumable chunk-interruption exceptions (defined in ogc.chunking)
+    "ChunkInterrupted",
+    "QuotaExhausted",
+    "ServiceInterrupted",
     "__version__",
 ]
diff --git a/dataretrieval/codes/states.py b/dataretrieval/codes/states.py
index 5d761736..8bb587ad 100644
--- a/dataretrieval/codes/states.py
+++ b/dataretrieval/codes/states.py
@@ -1,10 +1,18 @@
-"""State code lookups keyed by full state name.
+"""State code lookups and normalization, keyed by full state name.
 
 ``state_codes`` maps each state name to its two-letter postal abbreviation
 (e.g. ``"Alabama": "al"``); ``fips_codes`` maps it to its two-digit FIPS
-code (e.g. ``"Alabama": "01"``).
+code (e.g. ``"Alabama": "01"``). :func:`to_state` normalizes a state
+identifier -- a full name, postal code, or two-digit / ``US:``-prefixed FIPS
+code (or an iterable of them) -- to a chosen representation, raising
+``ValueError`` on an unrecognized value. Coverage is the 50 states plus the
+District of Columbia.
 """
 
+from __future__ import annotations
+
+from collections.abc import Iterable
+
 state_codes = {
     "Alabama": "al",
     "Alaska": "ak",
@@ -112,3 +120,66 @@
     "Wisconsin": "55",
     "Wyoming": "56",
 }
+
+# Reverse lookups (built once): postal code -> name, FIPS code -> name, and a
+# case-insensitive full-name index. ``state_codes`` and ``fips_codes`` share the
+# same keys, so any name resolved here is valid in both.
+_name_by_postal = {code: name for name, code in state_codes.items()}
+_name_by_fips = {fips: name for name, fips in fips_codes.items()}
+_name_by_lower = {name.lower(): name for name in state_codes}
+
+
+def to_state(value: str | Iterable[str], to: str = "name") -> str | list[str]:
+    """Normalize a US state/territory identifier to a chosen representation.
+
+    ``value`` may be given as a full name (``"Wisconsin"``), a two-letter
+    postal code (``"WI"``), a two-digit ANSI/FIPS code (``"55"``), or a
+    prefixed FIPS code (``"US:55"``). The encodings are unambiguous: a value
+    prefixed ``US:`` or all-digits is a FIPS code, exactly two letters is a
+    postal code, anything else is matched (case-insensitively) as a full name.
+    An iterable of identifiers is resolved element-wise to a list.
+
+    ``to`` selects the output representation:
+
+    * ``"name"``    -> full name, e.g. ``"Wisconsin"``
+    * ``"postal"``  -> uppercase two-letter code, e.g. ``"WI"``
+    * ``"fips"``    -> two-digit ANSI/FIPS code, e.g. ``"55"``
+    * ``"fips_us"`` -> ``"US:"`` + FIPS code, e.g. ``"US:55"``
+
+    Coverage is the 50 states plus the District of Columbia. A ``value`` that
+    isn't a recognized state in one of those encodings raises ``ValueError``
+    (so a typo fails fast rather than silently matching nothing).
+    """
+    if isinstance(value, str):
+        return _to_state_one(value, to)
+    return [_to_state_one(v, to) for v in value]
+
+
+def _to_state_one(value: str, to: str) -> str:
+    """Resolve a single state identifier; see :func:`to_state`."""
+    s = value.strip()
+    if s[:3].upper() == "US:":  # prefixed FIPS, e.g. "US:55"
+        name = _name_by_fips.get(s[3:].strip().zfill(2))
+    elif s.isdigit():  # bare FIPS, e.g. "55"
+        name = _name_by_fips.get(s.zfill(2))
+    elif len(s) == 2 and s.isalpha():  # postal, e.g. "WI"
+        name = _name_by_postal.get(s.lower())
+    else:  # full name (case-insensitive)
+        name = _name_by_lower.get(s.lower())
+
+    if name is None:
+        raise ValueError(
+            f"{value!r} is not a recognized US state or the District of "
+            f'Columbia. Provide a full name ("Wisconsin"), a two-letter postal '
+            f'code ("WI"), or a two-digit ANSI/FIPS code ("55").'
+        )
+
+    if to == "name":
+        return name
+    if to == "postal":
+        return state_codes[name].upper()
+    if to == "fips":
+        return fips_codes[name]
+    if to == "fips_us":
+        return f"US:{fips_codes[name]}"
+    raise ValueError(f"to must be 'name', 'postal', 'fips', or 'fips_us'; got {to!r}")
diff --git a/dataretrieval/ngwmn.py b/dataretrieval/ngwmn.py
new file mode 100644
index 00000000..879962c8
--- /dev/null
+++ b/dataretrieval/ngwmn.py
@@ -0,0 +1,422 @@
+"""National Ground-Water Monitoring Network (NGWMN) getters.
+
+The NGWMN exposes its data through a dedicated OGC API
+(``https://api.waterdata.usgs.gov/ngwmn/ogcapi``) with five collections:
+``sites``, ``waterLevelObs``, ``lithologyObs``, ``constructionObs``, and
+``providers``. Each getter below delegates to the shared OGC engine
+(:func:`~dataretrieval.ogc.engine.get_ogc_data`) with
+``base_url=NGWMN_OGC_API_URL``, so multi-value chunking, pagination,
+retry/resume, and result shaping all behave exactly as they do for the main
+Water Data getters.
+
+Unlike the main Water Data collections, NGWMN aggregates monitoring locations
+from many agencies, so ``monitoring_location_id`` values use other agency
+prefixes besides ``USGS-`` (e.g. ``MBMG-702934``, ``AKDNR-535134236016630``).
+
+See https://api.waterdata.usgs.gov/ngwmn/ogcapi for the API reference.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Any
+
+import pandas as pd
+
+from dataretrieval.codes.states import to_state
+from dataretrieval.ogc.engine import BASE_URL, OgcDialect, _get_args, get_ogc_data
+from dataretrieval.utils import BaseMetadata
+
+# The National Ground-Water Monitoring Network exposes its own OGC API at a
+# separate, unversioned base.
+NGWMN_OGC_API_URL = f"{BASE_URL}/ngwmn/ogcapi"
+
+# --- state-filter shim -------------------------------------------------------
+# NGWMN's collections expose DIFFERENT state queryables: ``sites`` filters on
+# the full ``state_name`` (e.g. "Wisconsin"), while ``providers`` filters on the
+# two-letter postal ``state`` (uppercase, e.g. "WI"). The state-aware getters
+# take a single ``state`` parameter accepting any US-state encoding (full name,
+# postal code, or FIPS code); ``_resolve_state`` normalizes it (via
+# ``codes.states``) into the one queryable each collection wants.
+#
+# This shim exists only to smooth over that upstream asymmetry.
+# ``tests/ngwmn_test.py::test_state_queryables_still_diverge_upstream`` fails —
+# the signal to remove it — if the API ever unifies the two queryables.
+_STATE_QUERYABLE = {
+    # service -> (upstream queryable name, to_state output format it expects)
+    "sites": ("state_name", "name"),
+    "providers": ("state", "postal"),
+}
+
+
+def _resolve_state(local_vars: dict[str, Any], service: str) -> None:
+    """Translate the user-facing ``state`` parameter into the single state
+    queryable the NGWMN ``service`` collection accepts, normalizing whichever
+    US-state encoding (name, postal, or FIPS) the caller used.
+
+    Mutates ``local_vars`` in place; a no-op for getters that take no ``state``.
+    """
+    given = local_vars.pop("state", None)
+    if given is None:
+        return
+    queryable, fmt = _STATE_QUERYABLE[service]
+    local_vars[queryable] = to_state(given, fmt)
+
+
+# The NGWMN OGC API exposes the feature id under the generic ``id`` column
+# (there is no service-specific id name as there is for the main collections).
+_NGWMN_OUTPUT_ID = "id"
+
+# NGWMN's request shape matches the generic OGC default (no CQL2-only or
+# date-only collections), but its result columns need their own coercion and
+# sort vocabulary: water-level observations are timestamped by ``sample_time``
+# (not the Water Data ``time``) and report depths/levels in feet.
+NGWMN_DIALECT = OgcDialect(
+    time_cols=frozenset({"sample_time"}),
+    numerical_cols=frozenset(
+        {
+            "water_depth_below_land_surface_ft",
+            "water_level_above_site_datum_ft",
+            "water_level_above_navd88_ft",
+        }
+    ),
+    sort_cols=("sample_time", "monitoring_location_id"),
+)
+
+
+def _get(service: str, local_vars: dict[str, Any]) -> tuple[pd.DataFrame, BaseMetadata]:
+    """Marshal a getter's arguments and dispatch to the shared OGC engine.
+
+    Every NGWMN getter ends with this same call; centralizing it keeps the
+    NGWMN base URL, output id, and dialect wired up in exactly one place.
+    """
+    _resolve_state(local_vars, service)
+    args = _get_args(local_vars)
+    return get_ogc_data(
+        args,
+        service,
+        output_id=_NGWMN_OUTPUT_ID,
+        base_url=NGWMN_OGC_API_URL,
+        dialect=NGWMN_DIALECT,
+    )
+
+
+def get_sites(
+    monitoring_location_id: str | Iterable[str] | None = None,
+    agency_code: str | Iterable[str] | None = None,
+    monitoring_location_number: str | Iterable[str] | None = None,
+    altitude: str | Iterable[str] | None = None,
+    national_aquifer_code: str | Iterable[str] | None = None,
+    national_aquifer_description: str | Iterable[str] | None = None,
+    country_code: str | Iterable[str] | None = None,
+    country_name: str | Iterable[str] | None = None,
+    state: str | Iterable[str] | None = None,
+    county_name: str | Iterable[str] | None = None,
+    aquifer_name: str | Iterable[str] | None = None,
+    site_type: str | Iterable[str] | None = None,
+    aquifer_type_code: str | Iterable[str] | None = None,
+    qw_sys_name: str | Iterable[str] | None = None,
+    qw_sn_flag: str | Iterable[str] | None = None,
+    qw_baseline_flag: str | Iterable[str] | None = None,
+    qw_well_chars: str | Iterable[str] | None = None,
+    qw_well_type: str | Iterable[str] | None = None,
+    qw_well_purpose: str | Iterable[str] | None = None,
+    wl_sys_name: str | Iterable[str] | None = None,
+    wl_sn_flag: str | Iterable[str] | None = None,
+    wl_baseline_flag: str | Iterable[str] | None = None,
+    wl_well_chars: str | Iterable[str] | None = None,
+    wl_well_type: str | Iterable[str] | None = None,
+    wl_well_purpose: str | Iterable[str] | None = None,
+    properties: str | Iterable[str] | None = None,
+    skip_geometry: bool | None = None,
+    bbox: list[float] | None = None,
+    limit: int | None = None,
+    convert_type: bool = True,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """Get NGWMN monitoring-location (site) metadata.
+
+    Site records describe each NGWMN monitoring location — its identifier,
+    responsible agency, location, aquifer, and whether it participates in the
+    network's water-quality (``qw_*``) and water-level (``wl_*``) sub-networks.
+
+    Parameters
+    ----------
+    monitoring_location_id : str or iterable of str, optional
+        One or more agency-qualified site identifiers in ``AGENCY-ID`` form
+        (e.g. ``"USGS-423114090161101"``, ``"MBMG-702934"``).
+    agency_code : str or iterable of str, optional
+        Code of the agency that manages the site.
+    monitoring_location_number : str or iterable of str, optional
+        Agency-assigned site number.
+    altitude : str or iterable of str, optional
+        Land-surface altitude at the site.
+    national_aquifer_code, national_aquifer_description : str or iterable, optional
+        National aquifer code / description.
+    country_code, country_name : str or iterable, optional
+        Country filters.
+    state : str or iterable of str, optional
+        State/territory filter. Accepts a full name (``"Wisconsin"``), a
+        two-letter postal code (``"WI"``), or a two-digit ANSI/FIPS code
+        (``"55"``).
+    county_name : str or iterable of str, optional
+        County name filter.
+    aquifer_name, site_type, aquifer_type_code : str or iterable, optional
+        Aquifer name, site type, and aquifer-type code.
+    qw_sys_name, qw_sn_flag, qw_baseline_flag : str or iterable, optional
+        Water-quality sub-network membership flags.
+    qw_well_chars, qw_well_type, qw_well_purpose : str or iterable, optional
+        Water-quality well characteristics, type, and purpose.
+    wl_sys_name, wl_sn_flag, wl_baseline_flag : str or iterable, optional
+        Water-level sub-network membership flags.
+    wl_well_chars, wl_well_type, wl_well_purpose : str or iterable, optional
+        Water-level well characteristics, type, and purpose.
+    properties : str or iterable of str, optional
+        Subset of columns to return. ``None`` (default) returns all columns.
+    skip_geometry : bool, optional
+        When ``True``, omit the geometry column. ``None`` (default) leaves the
+        server default (geometry included).
+    bbox : list of float, optional
+        Bounding box ``[minx, miny, maxx, maxy]`` (CRS 4326) to spatially
+        filter sites.
+    limit : int, optional
+        Per-page size; pagination still follows ``next`` links to completion.
+    convert_type : bool, optional
+        Whether to coerce column dtypes (default ``True``).
+
+    Returns
+    -------
+    pandas.DataFrame or geopandas.GeoDataFrame
+        Site metadata, one row per monitoring location.
+    BaseMetadata
+        Metadata object with the request URL and query time.
+
+    Examples
+    --------
+    .. code::
+
+        >>> # All NGWMN sites in Wisconsin
+        >>> # state accepts a full name, postal code ("WI"), or FIPS ("55")
+        >>> df, md = dataretrieval.ngwmn.get_sites(state="Wisconsin")
+
+        >>> # Specific sites, geometry omitted
+        >>> df, md = dataretrieval.ngwmn.get_sites(
+        ...     monitoring_location_id=["USGS-423114090161101", "MBMG-702934"],
+        ...     skip_geometry=True,
+        ... )
+    """
+    return _get("sites", locals())
+
+
+def get_water_level(
+    monitoring_location_id: str | Iterable[str] | None = None,
+    monitoring_location_obs_number: str | Iterable[str] | None = None,
+    sample_time: str | Iterable[str] | None = None,
+    data_provided_by: str | Iterable[str] | None = None,
+    water_depth_below_land_surface_ft: str | Iterable[str] | None = None,
+    water_level_above_site_datum_ft: str | Iterable[str] | None = None,
+    monitoring_location_vertical_datum: str | Iterable[str] | None = None,
+    water_level_above_navd88_ft: str | Iterable[str] | None = None,
+    datetime: str | Iterable[str] | None = None,
+    properties: str | Iterable[str] | None = None,
+    limit: int | None = None,
+    convert_type: bool = True,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """Get NGWMN water-level observations.
+
+    Parameters
+    ----------
+    monitoring_location_id : str or iterable of str, optional
+        One or more agency-qualified site identifiers (``AGENCY-ID`` form).
+    monitoring_location_obs_number : str or iterable of str, optional
+        Per-site observation number; use to subset a site's observations.
+    sample_time : str or iterable of str, optional
+        Exact sample-time value(s) to match. For a time *range*, use
+        ``datetime`` instead.
+    data_provided_by : str or iterable of str, optional
+        Source organization for the observation.
+    water_depth_below_land_surface_ft : str or iterable, optional
+        Depth-to-water value filter (feet below land surface).
+    water_level_above_site_datum_ft : str or iterable, optional
+        Water-level value filter (feet above the site datum).
+    water_level_above_navd88_ft : str or iterable, optional
+        Water-level value filter (feet above NAVD 88).
+    monitoring_location_vertical_datum : str or iterable of str, optional
+        Vertical datum of the reported water level.
+    datetime : str or iterable of str, optional
+        Temporal filter — a single instant or a two-element ``[start, end]``
+        range (ISO-8601 dates/datetimes); ``".."`` denotes an open end.
+    properties : str or iterable of str, optional
+        Subset of columns to return. ``None`` (default) returns all columns.
+    limit : int, optional
+        Per-page size; pagination still follows ``next`` links to completion.
+    convert_type : bool, optional
+        Whether to coerce column dtypes (default ``True``).
+
+    Returns
+    -------
+    pandas.DataFrame
+        Water-level observations, one row per measurement.
+    BaseMetadata
+        Metadata object with the request URL and query time.
+
+    Examples
+    --------
+    .. code::
+
+        >>> site = "USGS-272838082142201"
+        >>> df, md = dataretrieval.ngwmn.get_water_level(
+        ...     monitoring_location_id=site
+        ... )
+
+        >>> # Restrict to a date range
+        >>> df, md = dataretrieval.ngwmn.get_water_level(
+        ...     monitoring_location_id=site, datetime=["2022-01-01", "2024-01-01"]
+        ... )
+
+        >>> # Multiple sites across agencies
+        >>> df, md = dataretrieval.ngwmn.get_water_level(
+        ...     monitoring_location_id=["USGS-272838082142201", "MBMG-702934"]
+        ... )
+    """
+    return _get("waterLevelObs", locals())
+
+
+def get_lithology(
+    monitoring_location_id: str | Iterable[str] | None = None,
+    monitoring_location_obs_number: str | Iterable[str] | None = None,
+    properties: str | Iterable[str] | None = None,
+    limit: int | None = None,
+    convert_type: bool = True,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """Get NGWMN lithology observations.
+
+    Lithology records describe the geologic materials logged at a monitoring
+    location, with depth intervals and controlled lithology concepts.
+
+    Parameters
+    ----------
+    monitoring_location_id : str or iterable of str, optional
+        One or more agency-qualified site identifiers (``AGENCY-ID`` form).
+    monitoring_location_obs_number : str or iterable of str, optional
+        Per-site observation number; use to subset a site's records.
+    properties : str or iterable of str, optional
+        Subset of columns to return. ``None`` (default) returns all columns.
+    limit : int, optional
+        Per-page size; pagination still follows ``next`` links to completion.
+    convert_type : bool, optional
+        Whether to coerce column dtypes (default ``True``).
+
+    Returns
+    -------
+    pandas.DataFrame
+        Lithology observations, one row per logged interval.
+    BaseMetadata
+        Metadata object with the request URL and query time.
+
+    Examples
+    --------
+    .. code::
+
+        >>> df, md = dataretrieval.ngwmn.get_lithology(
+        ...     monitoring_location_id="AKDNR-535134236016630"
+        ... )
+    """
+    return _get("lithologyObs", locals())
+
+
+def get_well_construction(
+    monitoring_location_id: str | Iterable[str] | None = None,
+    monitoring_location_obs_number: str | Iterable[str] | None = None,
+    material: str | Iterable[str] | None = None,
+    properties: str | Iterable[str] | None = None,
+    limit: int | None = None,
+    convert_type: bool = True,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """Get NGWMN well-construction observations.
+
+    Construction records describe a well's physical build-out — casing,
+    screens, and similar elements — with depth intervals, materials, and
+    diameters.
+
+    Parameters
+    ----------
+    monitoring_location_id : str or iterable of str, optional
+        One or more agency-qualified site identifiers (``AGENCY-ID`` form).
+    monitoring_location_obs_number : str or iterable of str, optional
+        Per-site observation number; use to subset a site's records.
+    material : str or iterable of str, optional
+        Construction-material filter.
+    properties : str or iterable of str, optional
+        Subset of columns to return. ``None`` (default) returns all columns.
+    limit : int, optional
+        Per-page size; pagination still follows ``next`` links to completion.
+    convert_type : bool, optional
+        Whether to coerce column dtypes (default ``True``).
+
+    Returns
+    -------
+    pandas.DataFrame
+        Well-construction observations, one row per construction element.
+    BaseMetadata
+        Metadata object with the request URL and query time.
+
+    Examples
+    --------
+    .. code::
+
+        >>> df, md = dataretrieval.ngwmn.get_well_construction(
+        ...     monitoring_location_id="USGS-272838082142201"
+        ... )
+    """
+    return _get("constructionObs", locals())
+
+
+def get_providers(
+    state: str | Iterable[str] | None = None,
+    agency_code: str | Iterable[str] | None = None,
+    organization_type: str | Iterable[str] | None = None,
+    properties: str | Iterable[str] | None = None,
+    limit: int | None = None,
+    convert_type: bool = True,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """Get NGWMN data-provider records.
+
+    Providers are the organizations that contribute data to the network.
+
+    Parameters
+    ----------
+    state : str or iterable of str, optional
+        State/territory filter. Accepts a full name (``"Wisconsin"``), a
+        two-letter postal code (``"WI"``), or a two-digit ANSI/FIPS code
+        (``"55"``). Only one state at a time — a multi-value state filter
+        returns no records for this collection.
+    agency_code : str or iterable of str, optional
+        Provider agency code.
+    organization_type : str or iterable of str, optional
+        Provider organization type, e.g. ``"NWIS"``.
+    properties : str or iterable of str, optional
+        Subset of columns to return. ``None`` (default) returns all columns.
+    limit : int, optional
+        Per-page size; pagination still follows ``next`` links to completion.
+    convert_type : bool, optional
+        Whether to coerce column dtypes (default ``True``).
+
+    Returns
+    -------
+    pandas.DataFrame
+        Provider records, one row per provider.
+    BaseMetadata
+        Metadata object with the request URL and query time.
+
+    Examples
+    --------
+    .. code::
+
+        >>> df, md = dataretrieval.ngwmn.get_providers(state="WI")
+
+        >>> # a full name (or FIPS code) works too
+        >>> df, md = dataretrieval.ngwmn.get_providers(
+        ...     organization_type="NWIS", state="Wisconsin"
+        ... )
+    """
+    return _get("providers", locals())
diff --git a/dataretrieval/ogc/__init__.py b/dataretrieval/ogc/__init__.py
new file mode 100644
index 00000000..6e259bb5
--- /dev/null
+++ b/dataretrieval/ogc/__init__.py
@@ -0,0 +1 @@
+"""Generic OGC API engine shared by the Water Data and NGWMN getters."""
diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/ogc/chunking.py
similarity index 97%
rename from dataretrieval/waterdata/chunking.py
rename to dataretrieval/ogc/chunking.py
index c0cb1cb3..5f41e8e2 100644
--- a/dataretrieval/waterdata/chunking.py
+++ b/dataretrieval/ogc/chunking.py
@@ -1,6 +1,6 @@
-"""Joint URL-byte chunking for the Water Data OGC getters.
+"""Joint URL-byte chunking for the OGC getters.
 
-A Water Data query has several chunkable axes: every multi-value list
+An OGC query has several chunkable axes: every multi-value list
 parameter (sites, parameter codes, …) plus the cql-text ``filter``,
 which splits along its top-level OR clauses. Any of them can fan the
 URL past the server's ~8 KB byte limit. ``ChunkPlan`` picks a fan-out
@@ -62,7 +62,7 @@
 import random
 from collections.abc import Awaitable, Callable, Iterator
 from contextlib import contextmanager, suppress
-from contextvars import ContextVar
+from contextvars import ContextVar, copy_context
 from dataclasses import dataclass
 from datetime import timedelta
 from typing import Any, ClassVar, cast
@@ -81,7 +81,7 @@
 )
 from dataretrieval.utils import HTTPX_DEFAULTS
 
-from . import _progress
+from . import progress as _progress
 from .filters import (
     _check_numeric_filter_pitfall,
     _is_chunkable,
@@ -91,7 +91,7 @@
 # Empirically the API replies HTTP 414 above ~8200 bytes of full URL —
 # matches nginx's default ``large_client_header_buffers`` of 8 KB. 8000
 # leaves ~200 bytes for request-line framing and proxy variance.
-_WATERDATA_URL_BYTE_LIMIT = 8000
+_OGC_URL_BYTE_LIMIT = 8000
 
 # Any list-shaped kwarg with >1 element is chunked (comma-joined per
 # sub-list in the URL); ~90 OGC params qualify, so we denylist the few
@@ -358,7 +358,7 @@ def get_active_client() -> httpx.AsyncClient | None:
     Return the chunker's currently-published client, or ``None``.
 
     Used by the paginated-loop helpers (e.g.
-    :func:`dataretrieval.waterdata.utils._client_for`) to reuse the
+    :func:`dataretrieval.ogc.engine._client_for`) to reuse the
     per-call connection pool.
 
     Returns
@@ -449,11 +449,12 @@ class ChunkInterrupted(DataRetrievalError):
     .. code-block:: python
 
         import time
-        from dataretrieval.waterdata import get_daily
-        from dataretrieval.waterdata.chunking import ChunkInterrupted
+        from dataretrieval import ChunkInterrupted
 
+        # ``getter`` is any chunked OGC getter — e.g.
+        # ``waterdata.get_daily`` or ``ngwmn.get_water_level``.
         try:
-            df, md = get_daily(monitoring_location_id=long_list_of_sites)
+            df, md = getter(monitoring_location_id=long_list_of_sites)
         except ChunkInterrupted as exc:
             while True:
                 time.sleep(exc.retry_after or 5 * 60)
@@ -1367,6 +1368,15 @@ def __init__(
         self.fetch = fetch
         self.retry_policy = retry_policy
         self.finalize = finalize
+        # Snapshot the ambient context at construction time — i.e. inside the
+        # caller's ``with`` blocks (base URL, dialect, row cap, progress
+        # reporter). :meth:`resume` runs every drive inside this snapshot, so
+        # a *later* ``exc.call.resume()`` — which fires after those ``with``
+        # blocks have exited and reset their ContextVars — still rebuilds
+        # sub-requests against the original API's base URL/dialect rather than
+        # the process defaults. ``build_request`` reads those ContextVars when
+        # it reconstructs each sub-request, so the snapshot must outlive them.
+        self._ctx = copy_context()
         # Completed (frame, response) pairs keyed by sub-args index; sparse
         # (gathered sub-requests complete out of order — see class docstring).
         # ``_run``'s ``track`` closure is the only writer, so ``dict`` insertion
@@ -1534,6 +1544,17 @@ def resume(self) -> tuple[pd.DataFrame, Any]:
             handle is on ``exc.call`` — wait for the underlying
             condition to clear and call ``exc.call.resume()`` again.
         """
+        # Drive inside the snapshot taken at construction (see ``__init__``).
+        # ``start_blocking_portal`` copies the *calling* context into its
+        # worker thread, and running here means that calling context is the
+        # snapshot — so the base URL / dialect / row cap / progress reporter
+        # active when the call was created reach the rebuilt sub-requests,
+        # even when this is a resume fired long after the original ``with``
+        # blocks exited.
+        return self._ctx.run(self._resume_in_context)
+
+    def _resume_in_context(self) -> tuple[pd.DataFrame, Any]:
+        """Body of :meth:`resume`, run inside the captured context."""
         concurrency = _read_concurrency_env()
         with start_blocking_portal() as portal:
             # ``portal.call`` returns ``Any`` because ``functools.partial``
@@ -1709,7 +1730,7 @@ def multi_value_chunked(
         measure each candidate plan.
     url_limit : int, optional
         Byte budget for the request (URL + body). When ``None``
-        (default), the module-level ``_WATERDATA_URL_BYTE_LIMIT`` is
+        (default), the module-level ``_OGC_URL_BYTE_LIMIT`` is
         resolved at call time so test patches via
         ``monkeypatch.setattr`` take effect.
 
@@ -1742,7 +1763,7 @@ def wrapper(
             *,
             finalize: _Finalize = _passthrough_result,
         ) -> tuple[pd.DataFrame, Any]:
-            limit = _WATERDATA_URL_BYTE_LIMIT if url_limit is None else url_limit
+            limit = _OGC_URL_BYTE_LIMIT if url_limit is None else url_limit
             plan = ChunkPlan(args, build_request, limit)
             retry_policy = RetryPolicy.from_env()
             # The concurrency cap is resolved inside ``resume()`` from
diff --git a/dataretrieval/ogc/engine.py b/dataretrieval/ogc/engine.py
new file mode 100644
index 00000000..5be6ed38
--- /dev/null
+++ b/dataretrieval/ogc/engine.py
@@ -0,0 +1,1937 @@
+"""Generic OGC API engine shared by the Water Data and NGWMN getters.
+
+This module holds the API-agnostic machinery for talking to an OGC API
+Features service: request construction (GET comma-joined or POST/CQL2),
+async pagination, response shaping, and the chunked fetch entry point
+:func:`get_ogc_data`. It is deliberately free of any Water-Data-specific
+constants so a sibling package (e.g. NGWMN) can drive it without importing
+``dataretrieval.waterdata``.
+
+API-specific behavior is supplied by the caller:
+
+* ``output_id`` — the user-facing column the wire ``id`` is renamed to,
+  passed explicitly (no service map lives here).
+* ``base_url`` — the OGC API base to target.
+* ``extra_id_cols`` — synthetic id columns to push to the end of a result.
+* ``dialect`` — an :class:`OgcDialect` describing which services need
+  POST/CQL2 and which use date-only (vs. full datetime) time arguments.
+"""
+
+from __future__ import annotations
+
+import copy
+import functools
+import json
+import logging
+import numbers
+import os
+import re
+from collections.abc import (
+    AsyncIterator,
+    Awaitable,
+    Callable,
+    Iterable,
+    Iterator,
+    Mapping,
+    Sequence,
+)
+from contextlib import asynccontextmanager, contextmanager
+from contextvars import ContextVar
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from typing import Any, TypeVar, cast
+from zoneinfo import ZoneInfo
+
+import httpx
+import pandas as pd
+from anyio.from_thread import start_blocking_portal
+
+from dataretrieval import __version__
+from dataretrieval.exceptions import DataRetrievalError, RateLimited, error_for_status
+from dataretrieval.ogc import chunking
+from dataretrieval.ogc import progress as _progress
+from dataretrieval.ogc.chunking import (
+    _QUOTA_HEADER,
+    _safe_elapsed,
+    get_active_client,
+)
+from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata, _get, _network_error
+
+try:
+    import geopandas as gpd
+
+    GEOPANDAS = True
+except ImportError:
+    GEOPANDAS = False
+
+# Set up logger for this module
+logger = logging.getLogger(__name__)
+
+# Whether geopandas is present is a static, environment-level fact, so warn once
+# here at import time rather than per query/chunk. That avoids the warning
+# repeating on every call and avoids it interleaving with the progress line's
+# carriage-return rewrites.
+if not GEOPANDAS:
+    logger.warning(
+        "Geopandas not installed. Geometries will be flattened into pandas DataFrames."
+    )
+
+BASE_URL = "https://api.waterdata.usgs.gov"
+OGC_API_VERSION = "v0"
+OGC_API_URL = f"{BASE_URL}/ogcapi/{OGC_API_VERSION}"
+
+
+@dataclass(frozen=True)
+class OgcDialect:
+    """Per-API quirks the generic request builder needs to know about.
+
+    Attributes
+    ----------
+    cql2_services : frozenset[str]
+        Collections that don't accept comma-separated multi-value GET
+        parameters and so must be queried via POST with a CQL2 JSON body.
+    date_only_services : frozenset[str]
+        Collections whose time arguments are rendered date-only
+        (``YYYY-MM-DD``) rather than as a full UTC datetime. The
+        ``last_modified`` parameter is always rendered as a full datetime
+        regardless of this set.
+    time_cols : frozenset[str]
+        Result columns to coerce to datetime when ``convert_type`` is set.
+        Empty by default, so the generic engine carries no API-specific
+        column knowledge; each API supplies its own.
+    numerical_cols : frozenset[str]
+        Result columns to coerce to numeric when ``convert_type`` is set.
+    sort_cols : tuple[str, ...]
+        Columns to sort the combined result by, in priority order. Sorting
+        is applied only when the first (primary) column is present; any
+        later columns also present are added as secondary keys.
+    """
+
+    cql2_services: frozenset[str] = field(default_factory=frozenset)
+    date_only_services: frozenset[str] = field(default_factory=frozenset)
+    time_cols: frozenset[str] = field(default_factory=frozenset)
+    numerical_cols: frozenset[str] = field(default_factory=frozenset)
+    sort_cols: tuple[str, ...] = field(default_factory=tuple)
+
+
+# Default dialect: a plain OGC API with no CQL2-only collections and no
+# date-only collections (every time argument rendered as a full UTC datetime).
+_DEFAULT_DIALECT = OgcDialect()
+
+
+def _switch_arg_id(ls: dict[str, Any], id_name: str, service: str) -> dict[str, Any]:
+    """
+    Switch argument id from its package-specific identifier to the standardized "id" key
+    that the API recognizes.
+
+    If `ls` does not already have an "id" key, sets it from either the
+    service-derived id key or the expected id column name. If neither key
+    exists, "id" is left unset. The original service-specific id keys are
+    removed regardless.
+
+    Parameters
+    ----------
+    ls : Dict[str, Any]
+        The dictionary containing identifier keys to be standardized.
+    id_name : str
+        The name of the specific identifier key to look for.
+    service : str
+        The service name.
+
+    Returns
+    -------
+    Dict[str, Any]
+        The modified dictionary with the "id" key set appropriately.
+
+    Examples
+    --------
+    For service "time-series-metadata", the function will look for either
+    "time_series_metadata_id" or "time_series_id" and change the key to simply
+    "id".
+    """
+
+    service_id = service.replace("-", "_") + "_id"
+
+    if "id" not in ls:
+        if service_id in ls:
+            ls["id"] = ls[service_id]
+        elif id_name in ls:
+            ls["id"] = ls[id_name]
+
+    # Remove the original keys regardless of whether they were used
+    ls.pop(service_id, None)
+    ls.pop(id_name, None)
+
+    return ls
+
+
+def _switch_properties_id(
+    properties: list[str] | None, id_name: str, service: str
+) -> list[str]:
+    """
+    Build the wire ``properties`` list, dropping every id alias and
+    ``geometry``.
+
+    The feature ``id`` is always returned and is renamed to the
+    service-specific id column (e.g. ``daily_id``) in post-processing, so
+    it must not be requested as a property: several collections (e.g.
+    ``daily``, ``continuous``) reject ``id`` in ``properties`` with an
+    HTTP 400. ``geometry`` is likewise excluded because it is controlled
+    by ``skip_geometry``. Any service-specific id name (``daily_id``,
+    ``monitoring_location_id``, …) and the bare ``id`` are dropped, and
+    remaining hyphens are normalized to underscores. Returns an empty
+    list when `properties` is empty or None — the URL then omits the
+    ``properties`` filter and the result is shaped by :func:`_arrange_cols`.
+
+    Parameters
+    ----------
+    properties : Optional[List[str]]
+        A list containing the properties or column names to be pulled from the
+        service, or None.
+    id_name : str
+        The service-specific id column name to drop (e.g. ``daily_id``).
+    service : str
+        The service name.
+
+    Returns
+    -------
+    List[str]
+        The wire ``properties`` with id aliases and ``geometry`` removed
+        and hyphens normalized.
+
+    Examples
+    --------
+    For service "daily" with ``properties=["daily_id", "value", "geometry"]``,
+    returns ``["value"]`` — ``daily_id`` and ``geometry`` are dropped, while
+    the ``daily_id`` column still appears in the result, renamed from the
+    always-returned feature ``id``.
+    """
+    if not properties:
+        return []
+    service_id = service.replace("-", "_") + "_id"
+    # The feature ``id`` always comes back (renamed to the service id
+    # downstream) and several collections reject it as a selectable
+    # property; ``geometry`` is controlled by ``skip_geometry``. Drop both,
+    # plus the service-specific id column (``id_name``) and the name derived
+    # straight from the service (``service_id``).
+    drop = {"id", "geometry", id_name, service_id}
+    normalized = (p.replace("-", "_") for p in properties)
+    return [p for p in normalized if p not in drop]
+
+
+_DATETIME_FORMATS = (
+    "%Y-%m-%dT%H:%M:%S.%f%z",
+    "%Y-%m-%dT%H:%M:%S%z",
+    "%Y-%m-%dT%H:%M:%S.%f",
+    "%Y-%m-%dT%H:%M:%S",
+    "%Y-%m-%d %H:%M:%S.%f",
+    "%Y-%m-%d %H:%M:%S",
+    "%Y-%m-%d",
+)
+
+# Anchored to ``[Pp]\d`` so a normal word containing ``p`` (e.g. ``"Apr"``)
+# doesn't get mis-classified as an ISO 8601 duration; the optional ``T``
+# admits time-only forms like ``PT36H``.
+_DURATION_RE = re.compile(r"^[Pp]T?\d")
+
+# OGC API parameters that carry a date/datetime value (single string,
+# two-element range, or interval/duration string) rather than a multi-value
+# string list. Used by ``_construct_api_requests`` to keep them out of the
+# POST/CQL2 multi-value path and to route them through ``_format_api_dates``,
+# and by the default ``_get_args`` no-normalize set to bypass string-iterable
+# normalization.
+_DATE_RANGE_PARAMS = frozenset(
+    {"datetime", "last_modified", "begin", "begin_utc", "end", "end_utc", "time"}
+)
+
+
+def _parse_datetime(value: str) -> datetime | None:
+    """Parse a single datetime string against the supported formats.
+
+    Returns a ``datetime`` (tz-aware iff the input carried a UTC offset),
+    or ``None`` if no format matched.
+    """
+    # ``datetime.strptime`` accepts a numeric offset like ``+00:00`` but not
+    # the ``Z`` shorthand, so normalize trailing ``Z`` first.
+    candidate = value[:-1] + "+00:00" if value.endswith("Z") else value
+    for fmt in _DATETIME_FORMATS:
+        try:
+            return datetime.strptime(candidate, fmt)
+        except ValueError:
+            continue
+    return None
+
+
+def _format_one(dt: str | None, *, date: bool) -> str | None:
+    """Format a single datetime element for inclusion in the API time arg."""
+    if pd.isna(dt) or dt == "" or dt is None:
+        return ".."
+    parsed = _parse_datetime(dt)
+    if parsed is None:
+        return None
+    if date:
+        return parsed.strftime("%Y-%m-%d")
+    # Naive inputs are interpreted in the system local zone (for backwards
+    # compatibility). Use ``.astimezone()`` rather than a fixed offset so each
+    # value is resolved against the DST rules for ITS OWN date — a frozen
+    # ``datetime.now()`` offset shifted off-season inputs by an hour.
+    aware = parsed if parsed.tzinfo is not None else parsed.astimezone()
+    return aware.astimezone(ZoneInfo("UTC")).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def _format_api_dates(
+    datetime_input: str | Sequence[str | None] | None, date: bool = False
+) -> str | None:
+    """
+    Formats date or datetime input(s) for use with an API.
+
+    Handles single values or ranges, and converting to ISO 8601 or date-only
+    formats as needed.
+
+    Parameters
+    ----------
+    datetime_input : Union[str, List[Optional[str]], None]
+        A single date/datetime string or a list of one or two date/datetime
+        strings. Accepts formats like "%Y-%m-%d %H:%M:%S", ISO 8601 (with or
+        without ``Z``/numeric offset), or relative periods (e.g., "P7D" /
+        "PT36H"). Range endpoints may be ``None``/``NaN``/empty to denote a
+        half-bounded range.
+    date : bool, optional
+        If True, uses only the date portion ("YYYY-MM-DD"). If False (default),
+        returns full datetime in UTC ISO 8601 format ("YYYY-MM-DDTHH:MM:SSZ").
+
+    Returns
+    -------
+    Union[str, None]
+        - If input is a single value, returns the formatted date/datetime string
+        or None if parsing fails.
+        - If input is a list of two values, returns a date/datetime range string
+        separated by "/" (e.g., "YYYY-MM-DD/YYYY-MM-DD" or
+        "YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ").
+        - Returns None if input is empty, all NA, or cannot be parsed.
+
+    Raises
+    ------
+    ValueError
+        If `datetime_input` contains more than two values.
+
+    Notes
+    -----
+    - A single blank/NA value returns None. In a two-value range, a blank/NA
+    endpoint is rendered as ``".."`` to denote an open bound (e.g.
+    ``"2024-01-01/.."``); the range is only None when *every* element is
+    blank/NA or any non-NA element fails to parse.
+    - Supports ISO 8601 durations such as "P7D" and "PT36H" and pre-formatted
+    intervals containing ``"/"``; both are passed through unchanged.
+    - Converts datetimes to UTC and formats as ISO 8601 with 'Z' suffix when
+    `date` is False. Inputs with an explicit offset (``Z`` or ``+HH:MM``) are
+    converted from that offset to UTC; naive inputs are interpreted in the
+    local time zone for backwards compatibility.
+    """
+    if datetime_input is None:
+        return None
+
+    # Convert single string to list for uniform processing
+    if isinstance(datetime_input, str):
+        datetime_input = [datetime_input]
+    elif isinstance(datetime_input, Mapping):
+        # `list(mapping)` returns keys, which silently accepts the wrong shape.
+        raise TypeError(
+            f"date input must be a string or sequence of strings, "
+            f"not {type(datetime_input).__name__}."
+        )
+    elif not isinstance(datetime_input, (list, tuple)):
+        # Materialize any other iterable (pandas.Series, numpy.ndarray,
+        # generator, ...) so the len()/subscript operations below work.
+        datetime_input = list(datetime_input)
+
+    # Check for null or all NA and return None
+    if all(pd.isna(dt) or dt == "" or dt is None for dt in datetime_input):
+        return None
+
+    if len(datetime_input) > 2:
+        raise ValueError("datetime_input should only include 1-2 values")
+
+    # Pass through duration ("P7D", "PT36H") and pre-formatted interval ("a/b")
+    # strings untouched.
+    if len(datetime_input) == 1 and isinstance(datetime_input[0], str):
+        single = datetime_input[0]
+        if _DURATION_RE.match(single) or "/" in single:
+            return single
+
+    # element invalidates the range.
+    formatted: list[str] = []
+    for dt in datetime_input:
+        one = _format_one(dt, date=date)
+        if one is None:
+            return None
+        formatted.append(one)
+    return "/".join(formatted)
+
+
+def _cql2_param(args: dict[str, Any]) -> str:
+    """
+    Convert query parameters to CQL2 JSON format for POST requests.
+
+    Parameters
+    ----------
+    args : Dict[str, Any]
+        Dictionary of query parameters to convert to CQL2 format.
+
+    Returns
+    -------
+    str
+        Compact JSON string representation of the CQL2 query.
+
+    Notes
+    -----
+    Serialized with the tightest separators (no indentation or
+    whitespace). The body counts against the server's ~8 KB request-size
+    limit and against :func:`chunking._request_bytes` when planning
+    chunks, so every saved byte fits more values per POST: compact
+    encoding roughly halves the per-value cost versus pretty-printing,
+    which roughly doubles how many monitoring-location ids fit in one
+    sub-request and so halves the chunk count for large id lists.
+    """
+    filters = []
+    for key, values in args.items():
+        filters.append({"op": "in", "args": [{"property": key}, values]})
+
+    query = {"op": "and", "args": filters}
+
+    return json.dumps(query, separators=(",", ":"))
+
+
+def _default_headers() -> dict[str, str]:
+    """
+    Generate default HTTP headers for API requests.
+
+    Returns
+    -------
+    dict
+        A dictionary containing default headers including 'Accept-Encoding',
+        'Accept', 'User-Agent', and 'lang'. If the environment variable
+        'API_USGS_PAT' is set, its value is included as the 'X-Api-Key' header.
+    """
+    headers = {
+        "Accept-Encoding": "compress, gzip",
+        "Accept": "application/json",
+        "User-Agent": f"python-dataretrieval/{__version__}",
+        "lang": "en-US",
+    }
+    token = os.getenv("API_USGS_PAT")
+    if token:
+        headers["X-Api-Key"] = token
+    return headers
+
+
+def _check_ogc_requests(endpoint: str, req_type: str = "queryables") -> dict[str, Any]:
+    """
+    Sends an HTTP GET request to the specified OGC endpoint and request type,
+    returning the JSON response.
+
+    Parameters
+    ----------
+    endpoint : str
+        The OGC collection endpoint to query (e.g. the service/collection id).
+    req_type : str, optional
+        The type of request to make. Must be either "queryables" or "schema"
+        (default is "queryables").
+
+    Returns
+    -------
+    dict
+        The JSON response from the OGC endpoint.
+
+    Raises
+    ------
+    ValueError
+        If req_type is not "queryables" or "schema".
+    DataRetrievalError
+        From :func:`_raise_for_non_200` on any non-200 (the typed subclass for
+        the status) — same typed contract as the main data path so callers can
+        use one ``except`` clause everywhere.
+    """
+    if req_type not in ("queryables", "schema"):
+        raise ValueError(f"req_type must be 'queryables' or 'schema', got {req_type!r}")
+    url = f"{_ogc_base_url_var.get()}/collections/{endpoint}/{req_type}"
+    resp = _get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
+    _raise_for_non_200(resp)
+    # ``Response.json`` is typed ``Any``; the OGC queryables/schema endpoints
+    # return a JSON object, and callers index it as a dict.
+    return cast("dict[str, Any]", resp.json())
+
+
+def _error_body(resp: httpx.Response) -> str:
+    """
+    Build an informative error message from an HTTP response.
+
+    Parameters
+    ----------
+    resp : httpx.Response
+        The HTTP response object to extract the error message from.
+
+    Returns
+    -------
+    str
+        An error message string assembled per status code:
+
+        * **429** — predefined message describing the rate-limit and pointing
+          at the API-token path; the response body is not consulted.
+        * **403** — predefined message describing the most common cause
+          (query exceeding server limits); the response body is not
+          consulted.
+        * **other statuses** — attempts ``resp.json()`` and renders
+          ``"<status>: <code>. <description>."`` from the JSON error
+          envelope. If the body is not JSON (e.g. an HTML 502 from a
+          gateway), falls back to ``"<status>: <reason>. <snippet>"`` with
+          the first 200 characters of ``resp.text``; an empty body
+          degrades to ``"<status>: <reason>."``.
+    """
+    status = resp.status_code
+    if status == 429:
+        return (
+            "429: Too many requests made. Please obtain an API token "
+            "or try again later."
+        )
+    elif status == 403:
+        return (
+            "403: Query request denied. Possible reasons include "
+            "query exceeding server limits."
+        )
+    try:
+        j_txt = resp.json()
+    except ValueError:
+        snippet = (resp.text or "").strip()[:200]
+        reason = resp.reason_phrase or "Error"
+        if snippet:
+            return f"{status}: {reason}. {snippet}"
+        return f"{status}: {reason}."
+    return (
+        f"{status}: {j_txt.get('code', 'Unknown type')}. "
+        f"{j_txt.get('description', 'No description provided')}."
+    )
+
+
+def _parse_retry_after(value: str | None) -> float | None:
+    """
+    Parse a USGS ``Retry-After`` header into seconds.
+
+    Parameters
+    ----------
+    value : str or None
+        The raw header value, or ``None`` if absent.
+
+    Returns
+    -------
+    float or None
+        Non-negative delta-seconds, clamped at zero. ``None`` when the
+        header is absent or unparseable; ``ChunkedCall`` treats
+        ``None`` as "fall back to my own retry policy".
+
+    Notes
+    -----
+    USGS sends ``Retry-After`` as integer delta-seconds (empirically
+    verified — e.g. ``Retry-After: 2619``). The HTTP spec also allows
+    HTTP-date form, but USGS doesn't use it, so this function doesn't
+    bother parsing it.
+    """
+    if not value:
+        return None
+    try:
+        return max(0.0, float(value.strip()))
+    except ValueError:
+        return None
+
+
+def _raise_for_non_200(resp: httpx.Response) -> None:
+    """
+    Raise a typed exception for any non-200 response.
+
+    Routes through :func:`_error_body` (USGS-API-aware: handles
+    429/403 specially, extracts ``code``/``description`` from JSON
+    error bodies) rather than ``Response.raise_for_status``, which
+    raises ``HTTPStatusError`` with a generic message.
+
+    Parameters
+    ----------
+    resp : httpx.Response
+        The HTTP response to inspect.
+
+    Raises
+    ------
+    DataRetrievalError
+        The typed subclass for the status (see
+        :func:`dataretrieval.exceptions.error_for_status` for the mapping). The
+        transient types (:class:`~dataretrieval.exceptions.TransientError`) are
+        distinguished so ``ChunkedCall`` can wrap them as a resumable
+        :class:`~dataretrieval.ogc.chunking.QuotaExhausted` /
+        :class:`~dataretrieval.ogc.chunking.ServiceInterrupted`; a fatal
+        :class:`~dataretrieval.exceptions.HTTPError` (not a ``TransientError``)
+        the chunker won't resume.
+    """
+    status = resp.status_code
+    if status < 400:
+        return
+    raise error_for_status(
+        status,
+        _error_body(resp),
+        retry_after=_parse_retry_after(resp.headers.get("Retry-After")),
+    )
+
+
+def _paginated_failure_message(pages_collected: int, cause: BaseException) -> str:
+    """
+    Build a user-facing message for a mid-pagination failure.
+
+    The API exposes no resume cursor, so the caller's only recovery is
+    to retry the whole call — the message lists the practical knobs,
+    tailored to whether the failure was rate-limit (429) or something
+    else.
+
+    Parameters
+    ----------
+    pages_collected : int
+        Number of pages successfully fetched before the failure.
+    cause : BaseException
+        The underlying exception that interrupted pagination.
+
+    Returns
+    -------
+    str
+        A message suitable for the ``DataRetrievalError`` that the
+        paginated fetch paths raise from the original exception.
+    """
+    cause_str = str(cause).removesuffix(".")
+    # Some ``httpx`` exceptions (e.g. ``TimeoutException()`` with no args)
+    # stringify to empty; fall back to the class name so the
+    # returned message is always informative.
+    if not cause_str.strip():
+        cause_str = type(cause).__name__
+    if isinstance(cause, RateLimited):
+        action = "wait for the rate-limit window to reset and retry"
+    else:
+        action = "retry the request (possibly after a short backoff)"
+    return (
+        f"Paginated request failed after collecting {pages_collected} "
+        f"page(s): {cause_str}. To recover: {action}, reduce the "
+        f"request size (e.g. fewer locations, a shorter time range, or "
+        f"a smaller ``limit``), or obtain an API token."
+    )
+
+
+def _ogc_query_params(
+    params: dict[str, Any],
+    *,
+    properties: list[str] | None,
+    bbox: list[float] | None,
+    limit: int | None,
+    skip_geometry: bool | None,
+) -> dict[str, Any]:
+    """Add the shared OGC query knobs to ``params`` (mutated in place).
+
+    Factors out the ``skipGeometry``/``limit``/``bbox``/``properties`` block
+    common to every OGC request so the typed getters
+    (:func:`_construct_api_requests`) and the generalized CQL2 path
+    (:func:`_construct_cql_request`) build identical URL parameters.
+
+    ``skip_geometry=None`` leaves ``skipGeometry`` unset (the server defaults to
+    including geometry); the typed getters always pass a bool, so their behavior
+    is unchanged.
+    """
+    if skip_geometry is not None:
+        params["skipGeometry"] = skip_geometry
+    params["limit"] = 50000 if limit is None or limit > 50000 else limit
+    # `len()` instead of truthiness: a numpy ndarray would raise on `if bbox:`.
+    if bbox is not None and len(bbox) > 0:
+        params["bbox"] = ",".join(map(str, bbox))
+    if properties:
+        params["properties"] = ",".join(properties)
+    return params
+
+
+def _construct_api_requests(
+    service: str,
+    properties: list[str] | None = None,
+    bbox: list[float] | None = None,
+    limit: int | None = None,
+    skip_geometry: bool = False,
+    **kwargs: Any,
+) -> httpx.Request:
+    """
+    Constructs an HTTP request object for the specified water data API service.
+
+    For most services, list parameters are comma-joined and sent as a single
+    GET request (e.g. ``parameter_code=["00060","00010"]`` becomes
+    ``parameter_code=00060,00010`` in the URL). For services the active dialect
+    flags as CQL2-only (``dialect.cql2_services``, e.g. the Water Data API's
+    ``monitoring-locations``), a POST request with CQL2 JSON is used instead.
+
+    Parameters
+    ----------
+    service : str
+        The name of the API service to query (e.g., "daily").
+    properties : Optional[List[str]], optional
+        List of property names to include in the request.
+    bbox : Optional[List[float]], optional
+        Bounding box coordinates as a list of floats.
+    limit : Optional[int], optional
+        Maximum number of results to return per request.
+    skip_geometry : bool, optional
+        Whether to exclude geometry from the response (default is False).
+    **kwargs
+        Additional query parameters, including date/time filters and other
+        API-specific options.
+
+    Returns
+    -------
+    httpx.Request
+        The constructed HTTP request object ready to be sent.
+
+    Notes
+    -----
+    - Date/time parameters are automatically formatted to ISO8601.
+    """
+    service_url = f"{_ogc_base_url_var.get()}/collections/{service}/items"
+    dialect = _dialect_var.get()
+
+    # Format date/time parameters to ISO8601 first — both routing paths need it.
+    for key in _DATE_RANGE_PARAMS:
+        if key in kwargs:
+            kwargs[key] = _format_api_dates(
+                kwargs[key],
+                date=(service in dialect.date_only_services and key != "last_modified"),
+            )
+
+    if service in dialect.cql2_services:
+        # POST with CQL2 JSON: multi-value params go in the request body.
+        # The date-range loop above has already collapsed any _DATE_RANGE_PARAMS
+        # value to a string, so the list/tuple check below cannot match them.
+        post_params = {
+            k: v
+            for k, v in kwargs.items()
+            if isinstance(v, (list, tuple)) and len(v) > 1
+        }
+        params = {k: v for k, v in kwargs.items() if k not in post_params}
+    else:
+        # GET with comma-separated values: join list/tuple values into one string.
+        # Skip empty lists/tuples so they're omitted rather than emitted as a
+        # filterless ``&param=`` (which the server reads as "match empty").
+        post_params = {}
+        params = {
+            k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v
+            for k, v in kwargs.items()
+            if not (isinstance(v, (list, tuple)) and len(v) == 0)
+        }
+
+    _ogc_query_params(
+        params,
+        properties=properties,
+        bbox=bbox,
+        limit=limit,
+        skip_geometry=skip_geometry,
+    )
+
+    # Translate CQL filter Python names to the hyphenated URL parameter that
+    # the OGC API expects. The Python kwarg is `filter_lang` because hyphens
+    # aren't valid in Python identifiers.
+    if "filter_lang" in params:
+        params["filter-lang"] = params.pop("filter_lang")
+
+    headers = _default_headers()
+
+    if post_params:
+        headers["Content-Type"] = "application/query-cql-json"
+        return httpx.Request(
+            method="POST",
+            url=service_url,
+            headers=headers,
+            content=_cql2_param(post_params),
+            params=params,
+        )
+    return httpx.Request(
+        method="GET",
+        url=service_url,
+        headers=headers,
+        params=params,
+    )
+
+
+def _construct_cql_request(
+    service: str,
+    cql_body: str,
+    *,
+    properties: list[str] | None = None,
+    bbox: list[float] | None = None,
+    limit: int | None = None,
+    skip_geometry: bool | None = None,
+) -> httpx.Request:
+    """Build a POST/CQL2 request from a verbatim CQL2 body.
+
+    The OGC-API counterpart to :func:`_construct_api_requests` for the
+    generalized :func:`~dataretrieval.waterdata.api.get_cql` path: the
+    caller supplies an already-serialized CQL2 JSON document (any predicate the
+    grammar allows), sent unchanged as the request body, while
+    ``properties``/``bbox``/``limit``/``skip_geometry`` go on the URL via the
+    shared :func:`_ogc_query_params` — so a generalized query and an equivalent
+    typed getter produce the same URL parameters.
+
+    Parameters
+    ----------
+    service : str
+        OGC collection name (e.g. ``"daily"``).
+    cql_body : str
+        Serialized CQL2 JSON document, sent as the POST body verbatim.
+    properties, bbox, limit, skip_geometry
+        See :func:`_ogc_query_params`. ``properties`` are wire-format
+        (``id``-translated) names.
+
+    Returns
+    -------
+    httpx.Request
+        A POST request with ``Content-Type: application/query-cql-json``.
+    """
+    service_url = f"{_ogc_base_url_var.get()}/collections/{service}/items"
+    params = _ogc_query_params(
+        {},
+        properties=properties,
+        bbox=bbox,
+        limit=limit,
+        skip_geometry=skip_geometry,
+    )
+    headers = _default_headers()
+    headers["Content-Type"] = "application/query-cql-json"
+    return httpx.Request(
+        method="POST",
+        url=service_url,
+        headers=headers,
+        content=cql_body,
+        params=params,
+    )
+
+
+def _next_req_url(
+    resp: httpx.Response, *, body: dict[str, Any] | None = None
+) -> str | None:
+    """
+    Extracts the URL for the next page of results from an HTTP response from a
+    water data endpoint.
+
+    Parameters
+    ----------
+    resp : httpx.Response
+        The HTTP response object containing JSON data and headers.
+    body : dict, optional
+        Pre-parsed JSON body for ``resp``. When provided, skips the
+        ``resp.json()`` call — useful when the caller has already
+        decoded the body for its own use (avoids a second parse pass).
+
+    Returns
+    -------
+    Optional[str]
+        The URL for the next page of results if available, otherwise None.
+
+    Notes
+    -----
+    - Returns None when the response carries no features.
+    - Expects the response JSON to contain a "links" list with objects having
+    "rel" and "href" keys.
+    - Checks for the "next" relation in the "links" to determine the next URL.
+    """
+    if body is None:
+        body = resp.json()
+    # Stop paging when the response carries no features. Key off ``features``
+    # rather than ``numberReturned``: the main Water Data API reports
+    # ``numberReturned`` but the NGWMN OGC API omits it, so trusting it would
+    # refuse to follow a ``next`` link on a page that actually carries
+    # features (mirrors the same guard in :func:`_get_resp_data`).
+    if not (body.get("features") or []):
+        return None
+    for link in body.get("links", []):
+        if link.get("rel") != "next":
+            continue
+        href = link.get("href")
+        if not href:
+            return None
+        # Refuse to follow a next-page link to a different host —
+        # the request's headers/auth were minted for the original
+        # host and shouldn't leak to whatever a poisoned response
+        # body might supply. Guarded against mock-shaped ``resp.url``
+        # attributes (tests sometimes set strings or ``MagicMock``)
+        # by falling open when host extraction isn't reliable.
+        next_host: str | None
+        cur_host: str | None
+        try:
+            next_host = httpx.URL(href).host
+            resp_url = (
+                resp.url
+                if isinstance(resp.url, httpx.URL)
+                else httpx.URL(str(resp.url))
+            )
+            cur_host = resp_url.host
+        except (httpx.InvalidURL, TypeError):
+            next_host = cur_host = None
+        if next_host and cur_host and next_host != cur_host:
+            raise RuntimeError(
+                f"Refusing to follow cross-host next-page URL: "
+                f"{next_host} != {cur_host}"
+            )
+        # ``href`` comes from the JSON ``links`` array (typed ``Any``); the
+        # ``not href`` guard above already excluded empty/None, and it is a
+        # URL string (passed to ``httpx.URL`` above).
+        return cast("str", href)
+    return None
+
+
+def _empty_feature_frame(geopd: bool) -> pd.DataFrame:
+    """Empty result frame for a page that carries no features.
+
+    Returns a ``GeoDataFrame`` when geopandas is available so a downstream
+    ``pd.concat([empty_page, geo_page])`` doesn't downgrade a geopandas
+    user's result to a plain ``DataFrame`` (stripping geometry/CRS). The
+    single home for this empty-page contract, shared by the feature-frame
+    builders that flatten GeoJSON pages.
+    """
+    return gpd.GeoDataFrame() if geopd else pd.DataFrame()
+
+
+def _attach_coordinates(df: pd.DataFrame, features: list[dict[str, Any]]) -> None:
+    """Attach a ``geometry`` column of raw coordinate lists (in place) when
+    any feature carries geometry. Shared by the non-geopandas GeoJSON
+    feature-frame builders.
+    """
+    geoms = [(f.get("geometry") or {}).get("coordinates") for f in features]
+    if any(g is not None for g in geoms):
+        df["geometry"] = geoms
+
+
+def _get_resp_data(
+    resp: httpx.Response,
+    geopd: bool,
+    *,
+    body: dict[str, Any] | None = None,
+) -> pd.DataFrame:
+    """
+    Extracts and normalizes data from an HTTP response containing GeoJSON features.
+
+    Parameters
+    ----------
+    resp : httpx.Response
+        The HTTP response object expected to contain a JSON body
+        with a "features" key.
+    geopd : bool
+        Indicates whether geopandas is installed and should be used to
+        handle geometries.
+    body : dict, optional
+        Pre-parsed JSON body for ``resp``. When provided, skips the
+        ``resp.json()`` call — useful when the caller has already
+        decoded the body for its own use (avoids a second parse pass).
+
+    Returns
+    -------
+    gpd.GeoDataFrame or pd.DataFrame
+        A ``GeoDataFrame`` when ``geopd`` is True; otherwise a plain
+        ``DataFrame`` carrying the feature properties plus an ``id``
+        column (always present, possibly all-None) and a ``geometry``
+        column (coordinates list) when at least one feature includes
+        geometry. Returns an empty ``DataFrame`` when no features are
+        returned.
+
+    Notes
+    -----
+    The non-geopandas branch builds the frame directly from each
+    feature's ``properties`` dict, plus the top-level ``id`` and
+    ``geometry.coordinates`` columns — the ``id`` column is always
+    added (so the downstream rename to the service-specific output id
+    works even on an all-None id), while the ``geometry`` column is
+    added only when at least one feature carries geometry. This skips
+    the GeoJSON envelope entirely, so
+    newly-added Feature-level fields (e.g. ``geometry.type`` after
+    USGS migrated to full GeoJSON geometry objects) can't leak into
+    the result frame; no reactive drop-list needs maintenance every
+    time the upstream schema grows.
+    """
+    if body is None:
+        body = resp.json()
+    # Key the empty-result short-circuit off ``features`` rather than
+    # ``numberReturned``: the main Water Data API reports ``numberReturned``,
+    # but the NGWMN OGC API omits it, so trusting it would discard pages that
+    # actually carry features. An absent/empty ``features`` is also the real
+    # schema-drift shape (a 200 with no features) — treat it as empty rather
+    # than crash with a ``KeyError`` downstream, which ``_paginate`` would
+    # mistake for a transient transport error. ``_empty_feature_frame``
+    # preserves the GeoDataFrame type on the short-circuit (see its docstring).
+    features = body.get("features") or []
+    if not features:
+        return _empty_feature_frame(geopd)
+
+    if not geopd:
+        df = pd.json_normalize([f.get("properties") or {} for f in features], sep="_")
+        # Always materialize the ``id`` column (may be all-None) so
+        # ``_arrange_cols``'s ``df.rename(columns={"id": output_id})``
+        # produces the documented service-specific output_id column
+        # (daily_id, channel_measurements_id, …) even if the upstream
+        # response carried no feature-level id.
+        df["id"] = [f.get("id") for f in features]
+        _attach_coordinates(df, features)
+        return df
+
+    # Organize json into geodataframe and make sure id column comes along.
+    # NGWMN observation collections (water levels, lithology, …) return
+    # features with no ``geometry`` key at all, which
+    # ``GeoDataFrame.from_features`` can't handle (it indexes
+    # ``feature["geometry"]`` directly). Default the key to ``None`` for only
+    # those features so the call is safe; the all-null check below then yields
+    # a plain DataFrame. Features that already carry geometry (the common
+    # sites case) are passed through without a per-feature dict copy.
+    df = gpd.GeoDataFrame.from_features(
+        [f if "geometry" in f else {**f, "geometry": None} for f in features]
+    )
+    # Mirror the non-geopandas branch's defensive ``f.get("id")`` so a feature
+    # missing a top-level ``id`` yields None rather than a KeyError.
+    df["id"] = [f.get("id") for f in features]
+    df = df[["id"] + [col for col in df.columns if col != "id"]]
+
+    # If no geometry present, then return pandas dataframe. A geodataframe
+    # is not needed.
+    if df["geometry"].isnull().all():
+        df = pd.DataFrame(df.drop(columns="geometry"))
+
+    return df
+
+
+@asynccontextmanager
+async def _client_for(
+    client: httpx.AsyncClient | None,
+) -> AsyncIterator[httpx.AsyncClient]:
+    """
+    Yield a usable async client, picking the best available source.
+
+    Resolution order:
+
+    1. ``client`` if the caller supplied one (borrowed; not closed
+       here — the caller owns its lifecycle).
+    2. The chunker's shared async client if we're inside a
+       :class:`~dataretrieval.ogc.chunking.ChunkedCall` run (per
+       :func:`chunking.get_active_client`). Borrowed; the chunker
+       closes it on exit.
+    3. A fresh short-lived ``httpx.AsyncClient`` opened here and closed
+       on context exit.
+
+    Parameters
+    ----------
+    client : httpx.AsyncClient or None
+        A caller-owned client to borrow, or ``None`` to defer to the
+        chunker's shared client or a temporary one.
+
+    Yields
+    ------
+    httpx.AsyncClient
+        The chosen client.
+    """
+    if client is not None:
+        yield client
+        return
+    shared = get_active_client()
+    if shared is not None:
+        yield shared
+        return
+    async with httpx.AsyncClient(**HTTPX_DEFAULTS) as new:
+        yield new
+
+
+def _aggregate_paginated_response(
+    initial: httpx.Response,
+    last: httpx.Response,
+    total_elapsed: timedelta,
+) -> httpx.Response:
+    """
+    Build a single response covering a paginated call.
+
+    Returns a shallow copy of ``initial`` with ``.headers`` set to the
+    LAST page's (so downstream sees current ``x-ratelimit-remaining``)
+    and ``.elapsed`` set to total wall-clock. The canonical
+    ``initial.url`` is preserved (it's the user's original query).
+    Both ``initial`` and ``last`` are left unmutated, mirroring the
+    convention of
+    :func:`dataretrieval.ogc.chunking._combine_chunk_responses`.
+
+    Parameters
+    ----------
+    initial : httpx.Response
+        First-page response (the canonical one for ``md.url``).
+    last : httpx.Response
+        Last-page response — supplies the headers to copy over.
+    total_elapsed : datetime.timedelta
+        Cumulative wall-clock across every page, including ``initial``.
+
+    Returns
+    -------
+    httpx.Response
+        A shallow copy of ``initial`` with ``.headers`` set to a fresh
+        ``httpx.Headers`` and ``.elapsed`` set to the cumulative
+        wall-clock. ``initial.headers`` / ``initial.elapsed`` are
+        never mutated, so callers holding a pre-pagination reference
+        still see the original first-page values.
+    """
+    final = copy.copy(initial)
+    final.headers = httpx.Headers(last.headers)
+    final.elapsed = total_elapsed
+    return final
+
+
+_Cursor = TypeVar("_Cursor")
+
+# Optional cap on the total rows a single paginated call accumulates before it
+# stops following ``next`` links. ``None`` (the default the data getters use)
+# means "no cap — fetch the whole series". Set via :func:`_row_cap` so the deep
+# ``_paginate`` loop can honor it without threading the value through the
+# generic chunker; this mirrors the ``_progress`` ambient-reporter pattern.
+_row_cap_var: ContextVar[int | None] = ContextVar("ogc_row_cap", default=None)
+
+
+@contextmanager
+def _row_cap(max_rows: int | None) -> Iterator[None]:
+    """Cap the rows any :func:`_paginate` under this context will
+    accumulate (``None`` = uncapped). Used by :func:`get_reference_table`
+    to preview large tables without downloading every page."""
+    token = _row_cap_var.set(max_rows)
+    try:
+        yield
+    finally:
+        _row_cap_var.reset(token)
+
+
+# OGC base URL for the active request. ``get_ogc_data`` sets it per call so the
+# shared request builder (:func:`_construct_api_requests`) can target either the
+# main Water Data API or the NGWMN sub-API without threading the value through
+# the generic chunker; this mirrors the ``_row_cap`` ambient pattern. The
+# default is the main API, so every existing getter is unaffected.
+_ogc_base_url_var: ContextVar[str] = ContextVar("ogc_base_url", default=OGC_API_URL)
+
+
+@contextmanager
+def _ogc_base_url(base_url: str) -> Iterator[None]:
+    """Point :func:`_construct_api_requests` (and the chunk planner that calls
+    it) at ``base_url`` for the duration of the block. Used by
+    :func:`get_ogc_data` to serve NGWMN collections from their own OGC base."""
+    token = _ogc_base_url_var.set(base_url)
+    try:
+        yield
+    finally:
+        _ogc_base_url_var.reset(token)
+
+
+# Per-call OGC dialect (which services need POST/CQL2, which use date-only time
+# args). ``get_ogc_data`` sets it so the shared request builder
+# (:func:`_construct_api_requests`) can adapt to the active API without
+# threading the value through the generic chunker; this mirrors the
+# ``_ogc_base_url`` ambient pattern. The default is a plain OGC API.
+_dialect_var: ContextVar[OgcDialect] = ContextVar(
+    "ogc_dialect", default=_DEFAULT_DIALECT
+)
+
+
+@contextmanager
+def _dialect(dialect: OgcDialect) -> Iterator[None]:
+    """Make ``dialect`` the active :class:`OgcDialect` that
+    :func:`_construct_api_requests` reads for CQL2-vs-GET routing and
+    date-only formatting, for the duration of the block."""
+    token = _dialect_var.set(dialect)
+    try:
+        yield
+    finally:
+        _dialect_var.reset(token)
+
+
+async def _paginate(
+    initial_req: httpx.Request,
+    *,
+    parse_response: Callable[[httpx.Response], tuple[pd.DataFrame, _Cursor | None]],
+    follow_up: Callable[[_Cursor, httpx.AsyncClient], Awaitable[httpx.Response]],
+    client: httpx.AsyncClient | None = None,
+) -> tuple[pd.DataFrame, httpx.Response]:
+    """
+    Drive a paginated request to completion over an
+    :class:`httpx.AsyncClient`.
+
+    The common shape behind the paginated fetch paths (e.g.
+    :func:`_walk_pages`): send the initial request, then loop calling
+    ``follow_up`` until ``parse_response`` reports a ``None`` cursor,
+    accumulating frames and elapsed time. Any mid-pagination failure
+    raises ``DataRetrievalError`` wrapping the cause — the API exposes no
+    resume cursor, so the caller's only recovery is to retry the whole
+    call. Issuing HTTP asynchronously lets the multiple sub-requests of a
+    chunked call run concurrently under
+    :meth:`~dataretrieval.ogc.chunking.ChunkedCall._run`.
+
+    Parameters
+    ----------
+    initial_req : httpx.Request
+        First-page request to send.
+    parse_response : callable
+        ``resp -> (df, next_cursor_or_None)``. Returns the page's
+        DataFrame and the cursor (URL, token, …) used to drive
+        ``follow_up`` for the next page; ``None`` terminates the loop.
+    follow_up : callable
+        ``(cursor, client) -> Awaitable[httpx.Response]``. Builds and
+        sends the next-page request.
+    client : httpx.AsyncClient, optional
+        Caller-borrowed client. ``None`` (default) means use the
+        chunker's shared client (if inside a chunked call) or open
+        a temporary one.
+
+    Returns
+    -------
+    df : pandas.DataFrame
+        Concatenation of every page's parsed frame.
+    response : httpx.Response
+        A shallow copy of the first-page response, with ``.headers``
+        rebuilt as a fresh ``httpx.Headers`` reflecting the last page and
+        ``.elapsed`` set to cumulative wall-clock. The canonical URL is
+        preserved from the first page. The original first-page response
+        is not mutated.
+
+    Raises
+    ------
+    DataRetrievalError
+        On a non-200 initial response, the typed subclass for the status from
+        :func:`_raise_for_non_200` (a
+        :class:`~dataretrieval.exceptions.TransientError` for a retryable
+        429 / 5xx, otherwise a fatal :class:`~dataretrieval.exceptions.HTTPError`);
+        or, on an initial-page parse failure or any subsequent-page failure, a
+        base ``DataRetrievalError`` wrapping the cause (built by
+        :func:`_paginated_failure_message`, original exception on ``__cause__``).
+    httpx.HTTPError
+        Network-level failures on the *initial* request (e.g.
+        ``ConnectError``, ``TimeoutException``) propagate unmodified
+        so callers can branch on the specific type; equivalent
+        failures on subsequent pages are wrapped per above.
+    """
+    logger.debug("Requesting: %s", initial_req.url)
+    reporter = _progress.current()
+    async with _client_for(client) as sess:
+        resp = await sess.send(initial_req)
+        _raise_for_non_200(resp)
+        initial_response = resp
+        total_elapsed = _safe_elapsed(resp)
+
+        try:
+            df, cursor = parse_response(resp)
+        except Exception as e:  # noqa: BLE001
+            # Initial-page parse failures (malformed JSON, missing
+            # ``features``, schema drift) get the same wrapped-message
+            # treatment as follow-up failures so callers see a consistent
+            # diagnostic regardless of which page broke.
+            logger.warning("Initial response parse failed.")
+            raise DataRetrievalError(_paginated_failure_message(0, e)) from e
+        dfs = [df]
+        # Stop following ``next`` links once the optional row cap is reached
+        # (see :func:`_row_cap`); ``None`` means uncapped. The concatenation
+        # is sliced to the cap below so a final over-budget page can't exceed it.
+        cap = _row_cap_var.get()
+        nrows = len(df)
+        if reporter is not None:
+            reporter.set_rate_remaining(
+                resp.headers.get(_QUOTA_HEADER),
+                limit=resp.headers.get("x-ratelimit-limit"),
+            )
+            reporter.add_page(rows=len(df))
+        while cursor is not None and (cap is None or nrows < cap):
+            try:
+                resp = await follow_up(cursor, sess)
+                _raise_for_non_200(resp)
+                df, cursor = parse_response(resp)
+                dfs.append(df)
+                nrows += len(df)
+                total_elapsed += _safe_elapsed(resp)
+                if reporter is not None:
+                    reporter.set_rate_remaining(
+                        resp.headers.get(_QUOTA_HEADER),
+                        limit=resp.headers.get("x-ratelimit-limit"),
+                    )
+                    reporter.add_page(rows=len(df))
+            except Exception as e:  # noqa: BLE001
+                logger.warning(
+                    "Request failed at cursor %r. Data download interrupted.",
+                    cursor,
+                )
+                raise DataRetrievalError(_paginated_failure_message(len(dfs), e)) from e
+
+        # Aggregate headers / elapsed onto a COPY of the initial
+        # response so the user's caller never sees an in-place
+        # mutation of the response object they may have inspected
+        # mid-pagination via a hook or test fixture.
+        final_response = _aggregate_paginated_response(
+            initial_response, resp, total_elapsed
+        )
+        result = pd.concat(dfs, ignore_index=True)
+        if cap is not None:
+            result = result.head(cap)
+        return result, final_response
+
+
+def _ogc_parse_response(
+    resp: httpx.Response, *, geopd: bool
+) -> tuple[pd.DataFrame, str | None]:
+    """Parse one OGC API page: extract the DataFrame and the next-page URL.
+
+    The parse strategy :func:`_walk_pages` hands to
+    :func:`_paginate`. Coerces falsy cursors (empty href, etc.) to
+    ``None`` so the paginate loop's ``while cursor is not None``
+    terminates instead of spinning on a meaningless value.
+    """
+    body = resp.json()
+    return (
+        _get_resp_data(resp, geopd=geopd, body=body),
+        _next_req_url(resp, body=body) or None,
+    )
+
+
+async def _walk_pages(
+    geopd: bool,
+    req: httpx.Request,
+    client: httpx.AsyncClient | None = None,
+) -> tuple[pd.DataFrame, httpx.Response]:
+    """
+    Iterate paginated OGC API responses asynchronously and aggregate
+    them into one DataFrame.
+
+    Thin wrapper that hands off to :func:`_paginate` with
+    OGC-specific strategies: pages are parsed via :func:`_get_resp_data`
+    (through :func:`_ogc_parse_response`) and the next-page cursor is the
+    URL from the response's ``links`` array (per :func:`_next_req_url`).
+
+    Parameters
+    ----------
+    geopd : bool
+        Whether geopandas is installed (drives geometry handling).
+    req : httpx.Request
+        The initial HTTP request to send.
+    client : httpx.AsyncClient, optional
+        Caller-borrowed client; ``None`` defers client management to
+        :func:`_paginate`.
+
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame containing the aggregated results from all pages.
+    httpx.Response
+        Aggregated response — initial-request URL (for query identity),
+        final page's headers (so downstream sees current rate-limit
+        state), and cumulative ``elapsed`` summed across pages.
+
+    Raises
+    ------
+    DataRetrievalError
+        See :func:`_paginate`.
+    httpx.HTTPError
+        See :func:`_paginate`.
+    """
+    method = req.method  # ``httpx.Request.method`` is already upper-cased.
+    headers = req.headers
+    content = req.content if method == "POST" else None
+
+    async def follow_up(cursor: str, sess: httpx.AsyncClient) -> httpx.Response:
+        return await sess.request(method, cursor, headers=headers, content=content)
+
+    return await _paginate(
+        req,
+        parse_response=functools.partial(_ogc_parse_response, geopd=geopd),
+        follow_up=follow_up,
+        client=client,
+    )
+
+
+def _deal_with_empty(
+    return_list: pd.DataFrame, properties: list[str] | None, service: str
+) -> pd.DataFrame:
+    """
+    Handles empty DataFrame results by returning a DataFrame with appropriate columns.
+
+    If `return_list` is empty, determines the column names to use:
+        - If `properties` is not provided or contains only NaN values,
+          retrieves schema properties from the specified service.
+        - Otherwise, uses the provided `properties` list as column names.
+
+    Parameters
+    ----------
+    return_list : pd.DataFrame
+        The DataFrame to check for emptiness.
+    properties : Optional[List[str]]
+        List of property names to use as columns, or None.
+    service : str
+        The service endpoint to query for schema properties if needed.
+
+    Returns
+    -------
+    pd.DataFrame
+        The original DataFrame if not empty, otherwise an empty
+        DataFrame with the appropriate columns.
+    """
+    if return_list.empty:
+        if not properties or all(pd.isna(properties)):
+            schema = _check_ogc_requests(endpoint=service, req_type="schema")
+            properties = list(schema.get("properties", {}).keys())
+        return pd.DataFrame(columns=properties)
+    return return_list
+
+
+def _arrange_cols(
+    df: pd.DataFrame,
+    properties: list[str] | None,
+    output_id: str,
+    extra_id_cols: frozenset[str] | set[str] = frozenset(),
+) -> pd.DataFrame:
+    """
+    Rearranges and renames columns in a DataFrame based on provided
+    properties and the service output id.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The input DataFrame whose columns are to be rearranged or renamed.
+    properties : Optional[List[str]]
+        A list of column names to possibly rename. If None or contains
+        only NaN, the function renames 'id' to output_id.
+    output_id : str
+        The name to which the 'id' column should be renamed if applicable.
+    extra_id_cols : set or frozenset, optional
+        Synthetic, meaningless-to-user id columns to move to the end of the
+        result frame when the wire ``id`` is returned (i.e. ``properties`` was
+        not specified). Defaults to an empty set (no reordering).
+
+    Returns
+    -------
+    pd.DataFrame or gpd.GeoDataFrame
+        The DataFrame with columns rearranged and/or renamed according
+        to the specified properties and output_id.
+    """
+
+    # Rename id column to output_id
+    df = df.rename(columns={"id": output_id})
+
+    if properties and not all(pd.isna(properties)):
+        # Don't alias the caller's list — we mutate below.
+        local_properties = list(properties)
+        if "geometry" in df.columns and "geometry" not in local_properties:
+            local_properties.append("geometry")
+        # 'id' is a valid service column, but expose it under the
+        # service-specific output_id name instead.
+        if "id" in local_properties:
+            local_properties[local_properties.index("id")] = output_id
+        df = df.loc[:, [col for col in local_properties if col in df.columns]]
+
+    # Move meaningless-to-user, extra id columns to the end
+    # of the dataframe, if they exist
+    extra_id_col = set(df.columns).intersection(extra_id_cols)
+
+    # If the arbitrary id column is returned (either due to properties
+    # being none or NaN), then move it to the end of the dataframe, but
+    # if part of properties, keep in requested order
+    if extra_id_col and (properties is None or all(pd.isna(properties))):
+        id_col_order = [col for col in df.columns if col not in extra_id_col] + list(
+            extra_id_col
+        )
+        df = df.loc[:, id_col_order]
+
+    return df
+
+
+def _type_cols(df: pd.DataFrame, dialect: OgcDialect) -> pd.DataFrame:
+    """
+    Casts columns into appropriate types per the API ``dialect``.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The input DataFrame.
+    dialect : OgcDialect
+        Supplies ``time_cols`` / ``numerical_cols`` — which columns to
+        coerce to datetime/numeric. The engine itself holds no
+        API-specific column knowledge.
+
+    Returns
+    -------
+    pd.DataFrame
+        The DataFrame with columns cast to appropriate types.
+
+    """
+    cols = set(df.columns)
+    for col in cols.intersection(dialect.time_cols):
+        df[col] = pd.to_datetime(df[col], errors="coerce")
+
+    for col in cols.intersection(dialect.numerical_cols):
+        df[col] = pd.to_numeric(df[col], errors="coerce")
+
+    return df
+
+
+def _sort_rows(df: pd.DataFrame, dialect: OgcDialect) -> pd.DataFrame:
+    """
+    Sorts rows by the API ``dialect``'s ``sort_cols`` (in priority order).
+
+    Sorting is applied only when the primary (first) sort column is
+    present; any later sort columns also present become secondary keys.
+    This mirrors the historical Water Data behavior (sort by ``time``,
+    then ``monitoring_location_id``) while letting other APIs key off
+    their own columns (e.g. NGWMN's ``sample_time``).
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The input DataFrame.
+    dialect : OgcDialect
+        Supplies ``sort_cols``.
+
+    Returns
+    -------
+    pd.DataFrame
+        The DataFrame with rows ordered per the dialect.
+
+    """
+    if not dialect.sort_cols or dialect.sort_cols[0] not in df.columns:
+        return df
+    present = [c for c in dialect.sort_cols if c in df.columns]
+    return df.sort_values(by=present, ignore_index=True)
+
+
+# Matches a lowercase letter or digit immediately followed by an uppercase
+# letter — the camelCase/PascalCase word boundary where a ``_`` is inserted.
+# A letter/digit boundary is intentionally NOT split (so ``navd88`` stays put).
+_CAMEL_BOUNDARY_RE = re.compile(r"([a-z0-9])([A-Z])")
+
+
+def _to_snake_case(name: str) -> str:
+    """Convert a camelCase/PascalCase column name to snake_case.
+
+    Inserts an underscore only at a lowercase-or-digit followed by an
+    uppercase boundary, then lowercases the whole string. Names that are
+    already snake_case or all-lowercase are returned unchanged; runs of
+    capitals (e.g. ``someXMLField``) are handled best-effort.
+
+    Examples
+    --------
+    >>> _to_snake_case("waterLevelObs")
+    'water_level_obs'
+    >>> _to_snake_case("monitoring_location_id")
+    'monitoring_location_id'
+    >>> _to_snake_case("navd88")
+    'navd88'
+    """
+    return _CAMEL_BOUNDARY_RE.sub(r"\1_\2", name).lower()
+
+
+def _finalize_ogc(
+    frame: pd.DataFrame,
+    response: httpx.Response,
+    *,
+    properties: list[str] | None,
+    output_id: str,
+    convert_type: bool,
+    service: str,
+    max_rows: int | None = None,
+    extra_id_cols: frozenset[str] | set[str] = frozenset(),
+    dialect: OgcDialect = _DEFAULT_DIALECT,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """Shape a combined OGC result into the user-facing ``(df, md)``.
+
+    The single home for the OGC getters' result shaping: empties
+    normalized, column names normalized to snake_case, types coerced (when
+    ``convert_type``), the wire ``id`` renamed and columns ordered, rows
+    sorted, optionally truncated to ``max_rows``, and the response wrapped
+    as :class:`~dataretrieval.utils.BaseMetadata`.
+
+    Injected into the chunker as its ``finalize`` hook (see
+    :data:`~dataretrieval.ogc.chunking._Finalize`) so the
+    un-interrupted return *and* a resumed ``ChunkInterrupted.call.resume()``
+    produce the same post-processed ``(DataFrame, BaseMetadata)`` shape, not
+    the chunker's raw frame and bare ``httpx.Response``.
+
+    ``max_rows`` is applied here (after dedup/sort, on the *combined* frame)
+    rather than only per-sub-request, so a chunked call's total is bounded
+    to exactly ``max_rows`` and a resumed call honors the cap too — the
+    per-``_paginate`` ``_row_cap`` is only an early-stop download bound.
+    """
+    frame = _deal_with_empty(frame, properties, service)
+    # Normalize to PEP-8 snake_case column names *first*, so the dialect's
+    # ``time_cols``/``numerical_cols``/``sort_cols`` (all snake_case) match
+    # regardless of whether the API returns snake_case (Water Data, where
+    # this is a no-op) or camelCase (a sibling OGC API). Doing it before
+    # type coercion is what makes ``convert_type`` reach a camelCase field.
+    renames = {
+        col: snake
+        for col in frame.columns
+        if isinstance(col, str) and (snake := _to_snake_case(col)) != col
+    }
+    if renames:
+        frame = frame.rename(columns=renames)
+    if convert_type:
+        frame = _type_cols(frame, dialect)
+    frame = _arrange_cols(frame, properties, output_id, extra_id_cols)
+    frame = _sort_rows(frame, dialect)
+    if max_rows is not None:
+        frame = frame.head(max_rows)
+    return frame, BaseMetadata(response)
+
+
+def get_ogc_data(
+    args: dict[str, Any],
+    service: str,
+    output_id: str,
+    *,
+    max_rows: int | None = None,
+    base_url: str = OGC_API_URL,
+    extra_id_cols: frozenset[str] | set[str] = frozenset(),
+    dialect: OgcDialect | None = None,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """
+    Retrieves OGC (Open Geospatial Consortium) data from a specified
+    endpoint and returns it as a pandas DataFrame with metadata.
+
+    This function prepares request arguments, constructs API requests,
+    handles pagination, processes the results, and formats output
+    according to the specified parameters.
+
+    Parameters
+    ----------
+    args : Dict[str, Any]
+        Dictionary of request arguments for the OGC service.
+    service : str
+        The OGC API collection name (e.g., ``"daily"``,
+        ``"monitoring-locations"``, ``"continuous"``).
+    output_id : str
+        The user-facing id column the wire ``id`` is renamed to. Required —
+        the per-API service-to-id map lives in the caller, not here.
+    max_rows : int, optional
+        Stop paginating once this many rows have been collected and
+        truncate the result to exactly ``max_rows``. ``None`` (default)
+        fetches the full result. Intended for cheap previews of large,
+        un-chunked tables (e.g. :func:`get_reference_table`).
+    base_url : str, optional
+        OGC API base URL to target. Defaults to the main Water Data API.
+    extra_id_cols : set or frozenset, optional
+        Synthetic id columns to push to the end of a result frame (see
+        :func:`_arrange_cols`). Defaults to an empty set.
+    dialect : OgcDialect, optional
+        Per-API request quirks (CQL2-only services, date-only services).
+        Defaults to a plain OGC API with neither.
+
+    Returns
+    -------
+    pd.DataFrame or gpd.GeoDataFrame
+        A DataFrame containing the retrieved and processed OGC data.
+    BaseMetadata
+        A metadata object containing request information including URL and query time.
+
+    Notes
+    -----
+    - The function does not mutate the input `args` dictionary.
+    - Handles optional arguments such as `convert_type`.
+    - Applies column cleanup and reordering based on service and properties.
+    """
+    # Enforce a genuine positive integer: a float (even ``10.0``) or ``bool``
+    # would pass a bare ``< 1`` check and then crash deep in
+    # ``pd.DataFrame.head`` with an opaque ``TypeError`` after HTTP I/O has
+    # already fired. ``numbers.Integral`` (not ``int``) so numpy integers —
+    # e.g. ``max_rows`` derived from a numpy/pandas computation — are accepted;
+    # ``bool`` is an ``Integral`` subtype, so exclude it explicitly.
+    if max_rows is not None and (
+        not isinstance(max_rows, numbers.Integral)
+        or isinstance(max_rows, bool)
+        or max_rows < 1
+    ):
+        raise ValueError(f"max_rows must be a positive integer (got {max_rows!r}).")
+
+    if dialect is None:
+        dialect = _DEFAULT_DIALECT
+
+    args = args.copy()
+    args["service"] = service
+    args = _switch_arg_id(args, id_name=output_id, service=service)
+    # Capture `properties` before the id-switch so post-processing sees
+    # the user-facing names, not the wire-format ones.
+    properties = args.get("properties")
+    args["properties"] = _switch_properties_id(
+        properties, id_name=output_id, service=service
+    )
+    convert_type = args.pop("convert_type", False)
+    args = {k: v for k, v in args.items() if v is not None}
+
+    # Post-processing is injected into the chunker rather than applied here,
+    # so it runs on *every* exit: the normal return AND a later
+    # ``exc.call.resume()`` after a ChunkInterrupted (which never re-enters
+    # this function). ``_finalize_ogc`` is the single source of result shape;
+    # it also applies ``max_rows`` to the *combined* frame so the cap is the
+    # exact total even when the plan chunks or the call is resumed, while
+    # ``_row_cap`` below only early-stops each sub-request's pagination.
+    finalize = functools.partial(
+        _finalize_ogc,
+        properties=properties,
+        output_id=output_id,
+        convert_type=convert_type,
+        service=service,
+        max_rows=max_rows,
+        extra_id_cols=extra_id_cols,
+        dialect=dialect,
+    )
+    with _progress.progress_context(service=service), _row_cap(max_rows):
+        with _ogc_base_url(base_url), _dialect(dialect):
+            return _fetch_once(args, finalize=finalize)
+
+
+@chunking.multi_value_chunked(build_request=_construct_api_requests)
+async def _fetch_once(
+    args: dict[str, Any],
+) -> tuple[pd.DataFrame, httpx.Response]:
+    """Send one prepared-args OGC request asynchronously; return the
+    frame + response.
+
+    ``@chunking.multi_value_chunked`` models every multi-value list
+    parameter and the cql-text filter as a chunkable axis, greedy-halves
+    the biggest chunk across all axes until each sub-request URL fits,
+    and iterates the cartesian product. With no chunkable inputs the
+    decorator passes args through unchanged. The decorator gathers every
+    sub-request over one shared :class:`httpx.AsyncClient` (concurrency
+    bounded by a semaphore, sized from ``API_USGS_CONCURRENT``)
+    and returns a *synchronous* wrapper, so ``get_ogc_data`` keeps calling
+    ``_fetch_once(args, finalize=...)`` synchronously. The return shape is
+    ``(frame, response)``.
+    """
+    req = _construct_api_requests(**args)
+    return await _walk_pages(geopd=GEOPANDAS, req=req)
+
+
+def _run_sync(
+    make_coro: Callable[[], Awaitable[tuple[pd.DataFrame, httpx.Response]]],
+    *,
+    service: str,
+) -> tuple[pd.DataFrame, httpx.Response]:
+    """Drive an async OGC fetch to completion from synchronous code.
+
+    Opens the service progress context and runs ``make_coro()`` through a
+    short-lived ``anyio`` blocking portal (a worker thread), so the
+    non-chunked getters work whether or not the caller is already inside an
+    event loop (Jupyter/async apps). The portal copies the calling context,
+    so the active progress reporter still reaches the sub-requests.
+
+    Shared by the non-chunked fetch paths; the chunked OGC getters
+    drive their own portal
+    inside :meth:`chunking.ChunkedCall.resume`.
+    """
+    with _progress.progress_context(service=service):
+        with start_blocking_portal() as portal:
+            try:
+                return portal.call(make_coro)
+            except httpx.TransportError as exc:
+                # The initial-request connection failure ``_paginate`` lets
+                # through raw; mid-pagination failures are already typed.
+                # Report the base URL actually targeted (NGWMN/sibling APIs
+                # set their own via ``_ogc_base_url``), not a hardcoded host.
+                raise _network_error(_ogc_base_url_var.get(), exc) from exc
+
+
+# ``AGENCY-ID``: a hyphen-separated agency prefix and local id. The local id
+# may itself contain hyphens (``\S+`` after the first separator) — NGWMN
+# aggregates many non-USGS agencies whose local ids aren't bare digits, so
+# only the agency prefix is constrained to be hyphen/space-free.
+_MONITORING_LOCATION_ID_RE = re.compile(r"[^-\s]+-\S+")
+
+# Default set of iterable-shaped params that ``_get_args`` must NOT push
+# through ``_normalize_str_iterable`` (date-range params may carry
+# ``pd.NaT``/None or interval strings; ``bbox`` is ``list[float]``). Callers
+# with extra numeric params (e.g. the Water Data API's ``water_year``,
+# ``thresholds``) pass their own superset.
+_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"bbox"}
+
+
+def _normalize_str_iterable(
+    value: str | Iterable[str] | None,
+    param_name: str = "value",
+) -> str | list[str] | None:
+    """Validate that ``value`` is None, a string, or an iterable of strings.
+
+    Non-string iterables (``list``, ``tuple``, ``pandas.Series``,
+    ``pandas.Index``, ``numpy.ndarray``, generators) are materialized to a
+    ``list`` so downstream code that branches on ``isinstance(v, (list,
+    tuple))`` keeps working. ``Mapping`` types are rejected because
+    iterating a mapping yields keys, not values.
+
+    Parameters
+    ----------
+    value : None, str, or iterable of str
+    param_name : str, optional
+        Used in error messages. Defaults to ``"value"``.
+
+    Returns
+    -------
+    None, str, or list of str
+
+    Raises
+    ------
+    TypeError
+        If the input isn't ``None``, ``str``, or a non-``Mapping``
+        iterable; or if any iterable element isn't a string.
+    """
+    if value is None:
+        return None
+    if isinstance(value, str):
+        return value
+    if isinstance(value, Mapping) or not isinstance(value, Iterable):
+        raise TypeError(
+            f"{param_name} must be a string or iterable of strings, "
+            f"not {type(value).__name__} (got {value!r})."
+        )
+    values: list[str] = []
+    for v in value:
+        if not isinstance(v, str):
+            raise TypeError(
+                f"{param_name} elements must be strings, "
+                f"not {type(v).__name__} (got {v!r})."
+            )
+        values.append(v)
+    return values
+
+
+def _as_str_list(
+    value: str | Iterable[str] | None,
+    param_name: str = "value",
+) -> list[str] | None:
+    """Normalize ``value`` to ``list[str]`` (``None`` passes through).
+
+    Wraps a bare ``str`` in a single-element list — so a later
+    ``",".join(...)`` doesn't iterate it character-by-character — and
+    materializes any other iterable via :func:`_normalize_str_iterable`.
+    """
+    normalized = _normalize_str_iterable(value, param_name)
+    if isinstance(normalized, str):
+        return [normalized]
+    return normalized
+
+
+def _check_monitoring_location_id(
+    monitoring_location_id: str | Iterable[str] | None,
+) -> str | list[str] | None:
+    """Validate and normalize a ``monitoring_location_id`` value.
+
+    Combines :func:`_normalize_str_iterable` with the AGENCY-ID format
+    check that is unique to ``monitoring_location_id`` (the OGC spec
+    requires a hyphen separator, e.g. ``USGS-01646500``).
+
+    Parameters
+    ----------
+    monitoring_location_id : None, str, or iterable of str
+        See :func:`_normalize_str_iterable`. Each string is additionally
+        required to match the AGENCY-ID hyphen-separated format.
+
+    Returns
+    -------
+    None, str, or list of str
+
+    Raises
+    ------
+    TypeError
+        If the input isn't ``None``, ``str``, or a non-``Mapping``
+        iterable; or if any iterable element isn't a string.
+    ValueError
+        If any identifier doesn't contain a hyphen separator
+        (per the OGC API spec: AGENCY-ID format, e.g. ``USGS-01646500``).
+    """
+    try:
+        value = _normalize_str_iterable(
+            monitoring_location_id, "monitoring_location_id"
+        )
+    except TypeError as exc:
+        # Re-raise with the AGENCY-ID hint the generic helper doesn't carry.
+        raise TypeError(
+            f"{exc} Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'."
+        ) from None
+    if value is None:
+        return None
+    for item in (value,) if isinstance(value, str) else value:
+        _check_id_format(item)
+    return value
+
+
+def _check_id_format(value: str) -> None:
+    """Raise ``ValueError`` if ``value`` is not in ``AGENCY-ID`` format."""
+    if not _MONITORING_LOCATION_ID_RE.fullmatch(value):
+        raise ValueError(
+            f"Invalid monitoring_location_id: {value!r}. "
+            f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'."
+        )
+
+
+def _get_args(
+    local_vars: dict[str, Any],
+    exclude: set[str] | None = None,
+    *,
+    no_normalize: frozenset[str] | set[str] = _NO_NORMALIZE_PARAMS,
+) -> dict[str, Any]:
+    """
+    Build the API-request kwargs dict from a getter's ``locals()``.
+
+    Drops bookkeeping keys (``service``, ``output_id``, anything in
+    ``exclude``) and ``None``-valued kwargs, then normalizes the
+    remaining values:
+
+    - ``monitoring_location_id`` is validated against the AGENCY-ID
+      format (per :func:`_check_monitoring_location_id`).
+    - ``properties`` is materialized to ``list[str]`` (a bare string
+      gets wrapped in a single-element list so downstream
+      ``",".join(properties)`` doesn't iterate per character).
+    - A non-string iterable in ``no_normalize`` (numeric params
+      such as ``water_year``, ``bbox``, ``thresholds``) is materialized
+      to a ``list`` with its element types preserved (no string
+      normalization), so the GET comma-join and the chunker — which test
+      ``list``/``tuple`` — handle it instead of ``str()``-ing the whole
+      array.
+    - Any other ``Iterable[str]`` (i.e. not in ``no_normalize``)
+      is materialized to ``list[str]`` via
+      :func:`_normalize_str_iterable` so downstream code that branches
+      on ``isinstance(v, (list, tuple))`` works for ``pandas.Series``,
+      ``numpy.ndarray``, generators, etc.
+    - Scalars and strings pass through unchanged.
+
+    Parameters
+    ----------
+    local_vars : dict[str, Any]
+        Dictionary of local variables, typically from ``locals()``.
+    exclude : set[str], optional
+        Additional keys to exclude from the resulting dictionary.
+    no_normalize : set[str], optional
+        Iterable-shaped params whose element types must be preserved
+        (no string normalization). Defaults to the generic date-range +
+        ``bbox`` set; callers with extra numeric params pass a superset.
+
+    Returns
+    -------
+    dict[str, Any]
+        Filtered and normalized arguments for API requests.
+    """
+    to_exclude = {"service", "output_id"}
+    if exclude:
+        to_exclude.update(exclude)
+
+    args: dict[str, Any] = {}
+    for k, v in local_vars.items():
+        if k in to_exclude or v is None:
+            continue
+        if k == "monitoring_location_id":
+            args[k] = _check_monitoring_location_id(v)
+        elif k == "properties":
+            args[k] = _as_str_list(v, k)
+        elif k in no_normalize and isinstance(v, Iterable) and not isinstance(v, str):
+            # Numeric params (water_year, bbox, thresholds, …) keep their
+            # element types — no string-normalization — but a non-string
+            # iterable (numpy array, pandas Series, generator) is materialized
+            # to a list so the GET comma-join and the chunker, which test
+            # ``list``/``tuple``, handle it instead of str()-ing the whole
+            # array. ``.tolist()`` yields native int/float; ``list()`` covers
+            # generators and other iterables. Scalars/strings fall through.
+            args[k] = v.tolist() if hasattr(v, "tolist") else list(v)
+        elif isinstance(v, str) or not isinstance(v, Iterable):
+            args[k] = v
+        else:
+            args[k] = _normalize_str_iterable(v, k)
+    return args
diff --git a/dataretrieval/waterdata/filters.py b/dataretrieval/ogc/filters.py
similarity index 88%
rename from dataretrieval/waterdata/filters.py
rename to dataretrieval/ogc/filters.py
index 5e1c0a67..8ec55bf0 100644
--- a/dataretrieval/waterdata/filters.py
+++ b/dataretrieval/ogc/filters.py
@@ -1,4 +1,4 @@
-"""CQL ``filter`` support for the Water Data OGC getters.
+"""CQL ``filter`` support for the OGC getters.
 
 Public:
 
@@ -8,6 +8,8 @@
 planner: ``_split_top_level_or`` (clause partitioning),
 ``_is_chunkable`` (filter-language gate), and
 ``_check_numeric_filter_pitfall`` (the lexicographic-comparison guard).
+``_quote_cql_str`` escapes a single CQL-text string literal, shared by any
+getter that *builds* a CQL filter (e.g. ``waterdata.ratings``).
 
 Other CQL shapes (``AND``, ``NOT``, ``LIKE``, spatial/temporal
 predicates, function calls) are forwarded verbatim — only top-level
@@ -49,6 +51,18 @@
 _QUOTED_STR_RE = re.compile(r"'[^']*'")
 
 
+def _quote_cql_str(value: str) -> str:
+    """Escape a single-quoted CQL2-text literal by doubling embedded quotes.
+
+    CQL2 text escapes a ``'`` inside a string literal by doubling it, so
+    ``O'Brien`` becomes ``O''Brien`` (wrap the result in ``'…'`` at the call
+    site). Defends against malformed filters / injection on arbitrary user
+    input. Shared by every getter that builds a CQL-text literal (e.g. the
+    STAC ``/search`` filter in ``waterdata.ratings``).
+    """
+    return value.replace("'", "''")
+
+
 def _split_top_level_or(expr: str) -> list[str]:
     """Split ``expr`` at each top-level ``OR``, respecting quotes and parens.
 
diff --git a/dataretrieval/waterdata/_progress.py b/dataretrieval/ogc/progress.py
similarity index 96%
rename from dataretrieval/waterdata/_progress.py
rename to dataretrieval/ogc/progress.py
index 0e4963cd..6177c30f 100644
--- a/dataretrieval/waterdata/_progress.py
+++ b/dataretrieval/ogc/progress.py
@@ -1,16 +1,13 @@
-"""A single self-updating status line for paginated / chunked Water Data queries.
+"""A single self-updating status line for paginated / chunked OGC queries.
 
-Water Data getters fan out two ways the caller can't see: large multi-value
+OGC getters fan out two ways the caller can't see: large multi-value
 requests are split into URL-length-safe *chunks* (``chunking`` module), and each
 request follows ``next`` links across an unknown number of *pages*
-(``utils._paginate``). This module surfaces that work as one line on stderr,
+(``engine._paginate``). This module surfaces that work as one line on stderr,
 rewritten in place as data arrives::
 
     Retrieving: daily · 6 pages · 2,881 rows · 995/1,000 requests remaining
 
-It replaces the per-page ``logger.info`` calls that previously narrated the same
-events one line at a time.
-
 The active reporter lives in a :class:`~contextvars.ContextVar` rather than being
 threaded through every signature: progress is a cross-cutting concern that the
 chunk orchestrator (outer, chunk counts) and the page-walking loop (inner,
@@ -47,7 +44,7 @@ def _group_int(value: str) -> str:
 # state. (It does not give concurrent queries sharing one stderr separate
 # lines — they would still interleave.)
 _active: contextvars.ContextVar[ProgressReporter | None] = contextvars.ContextVar(
-    "waterdata_progress", default=None
+    "ogc_progress", default=None
 )
 
 # Where to register for an API key. Surfaced once when a query runs without an
diff --git a/dataretrieval/waterdata/__init__.py b/dataretrieval/waterdata/__init__.py
index 9b5ca610..7d3fce45 100644
--- a/dataretrieval/waterdata/__init__.py
+++ b/dataretrieval/waterdata/__init__.py
@@ -9,6 +9,8 @@
 
 from __future__ import annotations
 
+from dataretrieval.ogc.filters import FILTER_LANG
+
 # Public API exports
 from .api import (
     get_channel,
@@ -30,7 +32,6 @@
     get_stats_por,
     get_time_series_metadata,
 )
-from .filters import FILTER_LANG
 from .nearest import get_nearest_continuous
 from .ratings import get_ratings
 from .types import (
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
index 336aa09b..b47ea5d7 100644
--- a/dataretrieval/waterdata/api.py
+++ b/dataretrieval/waterdata/api.py
@@ -16,6 +16,7 @@
 import httpx
 import pandas as pd
 
+from dataretrieval.ogc.filters import FILTER_LANG
 from dataretrieval.utils import (
     HTTPX_DEFAULTS,
     BaseMetadata,
@@ -23,7 +24,7 @@
     _get,
     to_str,
 )
-from dataretrieval.waterdata.filters import FILTER_LANG
+from dataretrieval.waterdata import stats
 from dataretrieval.waterdata.types import (
     CODE_SERVICES,
     METADATA_COLLECTIONS,
@@ -45,8 +46,8 @@
     _run_sync,
     _switch_properties_id,
     _walk_pages,
+    _with_state,
     get_ogc_data,
-    get_stats_data,
 )
 
 # Set up logger for this module
@@ -200,7 +201,7 @@ def get_daily(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -212,6 +213,13 @@ def get_daily(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -252,7 +260,7 @@ def get_daily(
         >>> # multiple sub-requests so the URL stays under the server's byte
         >>> # limit. Combined output looks like a single query.
         >>> sites_df, _ = dataretrieval.waterdata.get_monitoring_locations(
-        ...     state_name="Ohio",
+        ...     state="Ohio",
         ...     site_type="Stream",
         ... )
         >>> df, md = dataretrieval.waterdata.get_daily(
@@ -408,7 +416,7 @@ def get_continuous(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -420,6 +428,13 @@ def get_continuous(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -433,7 +448,7 @@ def get_continuous(
         ... )
 
         >>> # Pull several disjoint time windows in one call via a CQL
-        >>> # ``filter``. See ``dataretrieval.waterdata.filters`` for the
+        >>> # ``filter``. See ``dataretrieval.ogc.filters`` for the
         >>> # full grammar, auto-chunking, and pitfalls.
         >>> df, md = dataretrieval.waterdata.get_continuous(
         ...     monitoring_location_id="USGS-02238500",
@@ -464,6 +479,7 @@ def get_monitoring_locations(
     district_code: str | Iterable[str] | None = None,
     country_code: str | Iterable[str] | None = None,
     country_name: str | Iterable[str] | None = None,
+    state: str | Iterable[str] | None = None,
     state_code: str | Iterable[str] | None = None,
     state_name: str | Iterable[str] | None = None,
     county_code: str | Iterable[str] | None = None,
@@ -545,6 +561,10 @@ def get_monitoring_locations(
         The code for the country in which the monitoring location is located.
     country_name : string or iterable of strings, optional
         The name of the country in which the monitoring location is located.
+    state : string or iterable of strings, optional
+        State/territory filter (the recommended parameter). Accepts a full name
+        (``"Wisconsin"``), a two-letter postal code (``"WI"``), or a two-digit
+        ANSI/FIPS code (``"55"``).
     state_code : string or iterable of strings, optional
         State code. A two-digit ANSI code (formerly FIPS code) as defined by
         the American National Standards Institute, to define States and
@@ -713,7 +733,7 @@ def get_monitoring_locations(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -725,6 +745,13 @@ def get_monitoring_locations(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -744,8 +771,9 @@ def get_monitoring_locations(
     """
     service = "monitoring-locations"
 
-    # Build argument dictionary, omitting None values
-    args = _get_args(locals())
+    # Build argument dictionary, omitting None values (resolving the unified
+    # `state` argument into the OGC `state_name` queryable).
+    args = _get_args(_with_state(locals(), to="name", into="state_name"))
 
     return get_ogc_data(args, service)
 
@@ -757,6 +785,7 @@ def get_time_series_metadata(
     properties: str | Iterable[str] | None = None,
     statistic_id: str | Iterable[str] | None = None,
     hydrologic_unit_code: str | Iterable[str] | None = None,
+    state: str | Iterable[str] | None = None,
     state_name: str | Iterable[str] | None = None,
     last_modified: str | Iterable[str] | None = None,
     begin: str | Iterable[str] | None = None,
@@ -823,6 +852,10 @@ def get_time_series_metadata(
         to the largest (regions). Each hydrologic unit is identified by a unique
         hydrologic unit code (HUC) consisting of two to eight digits based on the
         four levels of classification in the hydrologic unit system.
+    state : string or iterable of strings, optional
+        State/territory filter (the recommended parameter). Accepts a full name
+        (``"Wisconsin"``), a two-letter postal code (``"WI"``), or a two-digit
+        ANSI/FIPS code (``"55"``).
     state_name : string or iterable of strings, optional
         The name of the state or state equivalent in which the monitoring location
         is located.
@@ -937,7 +970,7 @@ def get_time_series_metadata(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -949,6 +982,13 @@ def get_time_series_metadata(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -968,8 +1008,9 @@ def get_time_series_metadata(
     """
     service = "time-series-metadata"
 
-    # Build argument dictionary, omitting None values
-    args = _get_args(locals())
+    # Build argument dictionary, omitting None values (resolving the unified
+    # `state` argument into the OGC `state_name` queryable).
+    args = _get_args(_with_state(locals(), to="name", into="state_name"))
 
     return get_ogc_data(args, service)
 
@@ -998,6 +1039,7 @@ def get_combined_metadata(
     district_code: str | Iterable[str] | None = None,
     country_code: str | Iterable[str] | None = None,
     country_name: str | Iterable[str] | None = None,
+    state: str | Iterable[str] | None = None,
     state_code: str | Iterable[str] | None = None,
     state_name: str | Iterable[str] | None = None,
     county_code: str | Iterable[str] | None = None,
@@ -1106,6 +1148,10 @@ def get_combined_metadata(
         interval (``"start/end"``, optionally half-bounded with ``..``),
         or an ISO 8601 duration (e.g. ``"P1M"``, ``"PT36H"``). See
         :func:`get_time_series_metadata` for the full grammar.
+    state : string or iterable of strings, optional
+        State/territory filter (the recommended parameter). Accepts a full
+        name (``"Wisconsin"``), a two-letter postal code (``"WI"``), or a
+        two-digit ANSI/FIPS code (``"55"``).
     state_name, county_name, hydrologic_unit_code, site_type, \
 site_type_code : string or iterable of strings, optional
         Common location-catalog filters carried over from the
@@ -1131,7 +1177,7 @@ def get_combined_metadata(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -1143,6 +1189,13 @@ def get_combined_metadata(
     md : :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object pertaining to the query.
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -1160,7 +1213,7 @@ def get_combined_metadata(
 
         >>> # Every series in a single county, useful for area-of-interest workflows
         >>> df, md = dataretrieval.waterdata.get_combined_metadata(
-        ...     state_name="Wisconsin", county_name="Dane County"
+        ...     state="Wisconsin", county_name="Dane County"
         ... )
 
         >>> # Inventory across multiple HUCs, restricted to streams and springs
@@ -1198,7 +1251,8 @@ def get_combined_metadata(
     """
     service = "combined-metadata"
 
-    args = _get_args(locals())
+    # Resolve the unified `state` argument into the OGC `state_name` queryable.
+    args = _get_args(_with_state(locals(), to="name", into="state_name"))
 
     return get_ogc_data(args, service)
 
@@ -1347,7 +1401,7 @@ def get_latest_continuous(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -1359,6 +1413,13 @@ def get_latest_continuous(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -1543,7 +1604,7 @@ def get_latest_daily(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -1555,6 +1616,13 @@ def get_latest_daily(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -1731,7 +1799,7 @@ def get_field_measurements(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -1743,6 +1811,13 @@ def get_field_measurements(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -1846,7 +1921,7 @@ def get_field_measurements_metadata(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -1858,6 +1933,13 @@ def get_field_measurements_metadata(
     md : :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object pertaining to the query.
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -1969,7 +2051,7 @@ def get_peaks(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -1981,6 +2063,13 @@ def get_peaks(
     md : :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object pertaining to the query.
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -2063,6 +2152,13 @@ def get_reference_table(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object including the URL request and query time.
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
@@ -2443,6 +2539,7 @@ def get_stats_por(
     approval_status: str | None = None,
     computation_type: str | Iterable[str] | None = None,
     country_code: str | Iterable[str] | None = None,
+    state: str | Iterable[str] | None = None,
     state_code: str | Iterable[str] | None = None,
     county_code: str | Iterable[str] | None = None,
     start_date: str | None = None,
@@ -2453,6 +2550,7 @@ def get_stats_por(
     site_type_code: str | Iterable[str] | None = None,
     site_type_name: str | Iterable[str] | None = None,
     parameter_code: str | Iterable[str] | None = None,
+    normal_type: str | None = None,
     expand_percentiles: bool = True,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Get day-of-year and month-of-year water data statistics from the
@@ -2478,6 +2576,10 @@ def get_stats_por(
         arithmetic_mean, maximum, median, minimum, percentile.
     country_code: string, optional
         Country query parameter. API defaults to "US".
+    state: string or iterable of strings, optional
+        State/territory filter (the recommended parameter). Accepts a full name
+        ("Wisconsin"), a two-letter postal code ("WI"), or a two-digit
+        ANSI/FIPS code ("55").
     state_code: string, optional
         State query parameter. Takes the format "US:XX", where XX is
         the two-digit state code. API defaults to "US:42" (Pennsylvania).
@@ -2514,6 +2616,10 @@ def get_stats_por(
         measured and the units of measure. A complete list of parameter codes
         and associated groupings can be found at
         https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
+    normal_type : string, optional
+        Filter the returned normals to a single period. If unspecified
+        (default), all matching data are returned. Available values:
+        "DOY" (day-of-year) and "MOY" (month-of-year).
     expand_percentiles : boolean
         Percentile data for a given day of year or month of year by default
         are returned from the service as lists of string values and percentile
@@ -2563,9 +2669,12 @@ def get_stats_por(
         ... )
     """
     # Build argument dictionary, omitting None values
-    params = _get_args(locals(), exclude={"expand_percentiles"})
+    params = _get_args(
+        _with_state(locals(), to="fips_us", into="state_code"),
+        exclude={"expand_percentiles"},
+    )
 
-    return get_stats_data(
+    return stats.get_data(
         args=params, service="observationNormals", expand_percentiles=expand_percentiles
     )
 
@@ -2574,6 +2683,7 @@ def get_stats_date_range(
     approval_status: str | None = None,
     computation_type: str | Iterable[str] | None = None,
     country_code: str | Iterable[str] | None = None,
+    state: str | Iterable[str] | None = None,
     state_code: str | Iterable[str] | None = None,
     county_code: str | Iterable[str] | None = None,
     start_date: str | None = None,
@@ -2584,6 +2694,7 @@ def get_stats_date_range(
     site_type_code: str | Iterable[str] | None = None,
     site_type_name: str | Iterable[str] | None = None,
     parameter_code: str | Iterable[str] | None = None,
+    interval_type: str | Iterable[str] | None = None,
     expand_percentiles: bool = True,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
     """Get monthly and annual water data statistics from the USGS Water Data API.
@@ -2608,6 +2719,10 @@ def get_stats_date_range(
         arithmetic_mean, maximum, median, minimum, percentile.
     country_code: string, optional
         Country query parameter. API defaults to "US".
+    state: string or iterable of strings, optional
+        State/territory filter (the recommended parameter). Accepts a full name
+        ("Wisconsin"), a two-letter postal code ("WI"), or a two-digit
+        ANSI/FIPS code ("55").
     state_code: string, optional
         State query parameter. Takes the format "US:XX", where XX is
         the two-digit state code. API defaults to "US:42" (Pennsylvania).
@@ -2649,6 +2764,10 @@ def get_stats_date_range(
         measured and the units of measure. A complete list of parameter codes
         and associated groupings can be found at
         https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
+    interval_type : string or iterable of strings, optional
+        Filter the returned intervals to one or more periods. If unspecified
+        (default), all matching data are returned. Available values:
+        "M" (month), "CY" (calendar year), and "WY" (water year).
     expand_percentiles : boolean
         Percentile data for a given day of year or month of year by default
         are returned from the service as lists of string values and percentile
@@ -2682,7 +2801,7 @@ def get_stats_date_range(
         >>> # Get monthly and yearly medians for streamflow at streams in Rhode Island
         >>> # from calendar year 2024.
         >>> df, md = dataretrieval.waterdata.get_stats_date_range(
-        ...     state_code="US:44",  # State code for Rhode Island
+        ...     state="RI",  # Rhode Island (postal code, name, or FIPS all work)
         ...     parameter_code="00060",
         ...     site_type_code="ST",
         ...     start_date="2024-01-01",
@@ -2699,9 +2818,12 @@ def get_stats_date_range(
         ... )
     """
     # Build argument dictionary, omitting None values
-    params = _get_args(locals(), exclude={"expand_percentiles"})
+    params = _get_args(
+        _with_state(locals(), to="fips_us", into="state_code"),
+        exclude={"expand_percentiles"},
+    )
 
-    return get_stats_data(
+    return stats.get_data(
         args=params,
         service="observationIntervals",
         expand_percentiles=expand_percentiles,
@@ -2865,7 +2987,7 @@ def get_channel(
     filter, filter_lang : optional
         Server-side CQL filter passed through as the OGC ``filter`` /
         ``filter-lang`` query parameters. See
-        :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
+        :mod:`dataretrieval.ogc.filters` for syntax, auto-chunking,
         and the lexicographic-comparison pitfall.
     convert_type : boolean, optional
         If True, converts columns to appropriate types.
@@ -2877,6 +2999,13 @@ def get_channel(
     md: :obj:`dataretrieval.utils.BaseMetadata`
         A custom metadata object
 
+    Raises
+    ------
+    ChunkInterrupted
+        A transient failure (429 / 5xx / timeout) interrupted the request
+        after the built-in retries. Completed work is preserved; resume
+        with ``exc.call.resume()`` (see :doc:`/userguide/errors`).
+
     Examples
     --------
     .. code::
diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
index 2ffe5089..de4ba0d4 100644
--- a/dataretrieval/waterdata/ratings.py
+++ b/dataretrieval/waterdata/ratings.py
@@ -18,6 +18,7 @@
 import pandas as pd
 
 from dataretrieval.exceptions import DataRetrievalError
+from dataretrieval.ogc.filters import _quote_cql_str
 from dataretrieval.rdb import extract_rdb_comment, read_rdb
 from dataretrieval.utils import HTTPX_DEFAULTS, _get
 
@@ -206,15 +207,6 @@ def _as_list(x: str | Iterable[str]) -> list[str]:
     return [x] if isinstance(x, str) else list(x)
 
 
-def _quote_cql_str(value: str) -> str:
-    """Escape a single-quoted CQL literal by doubling embedded quotes.
-
-    Defends against malformed filters / injection on arbitrary user input,
-    even though valid USGS monitoring-location IDs cannot contain a quote.
-    """
-    return value.replace("'", "''")
-
-
 def _build_filter(
     monitoring_location_id: str | list[str] | None,
     file_type: str | None,
diff --git a/dataretrieval/waterdata/stats.py b/dataretrieval/waterdata/stats.py
new file mode 100644
index 00000000..608f73ee
--- /dev/null
+++ b/dataretrieval/waterdata/stats.py
@@ -0,0 +1,293 @@
+"""USGS Water Data Statistics API client.
+
+Wraps ``https://api.waterdata.usgs.gov/statistics/v0`` — the daily-statistics
+service (period-of-record and date-range normals/intervals). This is a
+*separate*, non-OGC API: it has no chunkable multi-value axes, so it drives
+:func:`engine._paginate` directly through a blocking portal rather than going
+through ``multi_value_chunked``. The typed getters ``get_stats_por`` and
+``get_stats_date_range`` in :mod:`dataretrieval.waterdata.api` call
+:func:`get_data` here.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import httpx
+import pandas as pd
+
+from dataretrieval.ogc.engine import (
+    BASE_URL,
+    GEOPANDAS,
+    _attach_coordinates,
+    _default_headers,
+    _empty_feature_frame,
+    _paginate,
+    _run_sync,
+)
+from dataretrieval.utils import BaseMetadata
+
+# ``_handle_nesting``'s geopandas branch calls ``gpd.GeoDataFrame.from_features``
+# directly, so this module needs its own bound ``gpd`` name. Import it under the
+# same guard the engine uses; when geopandas is absent ``gpd`` is left unbound
+# (``GEOPANDAS`` is ``False``, so the stats path never touches it). The
+# empty-page short-circuit instead delegates to ``engine._empty_feature_frame``,
+# which resolves the engine's ``gpd`` — so an empty-page test patches
+# ``engine.gpd`` while the populated geopandas branch uses ``stats.gpd``.
+try:
+    import geopandas as gpd
+except ImportError:  # pragma: no cover - exercised only without geopandas
+    pass
+
+STATISTICS_API_VERSION = "v0"
+STATISTICS_API_URL = f"{BASE_URL}/statistics/{STATISTICS_API_VERSION}"
+
+
+def _handle_nesting(
+    body: dict[str, Any],
+    geopd: bool = False,
+) -> pd.DataFrame:
+    """
+    Takes nested json from stats service and flattens into a dataframe with
+    one row per monitoring location, parameter, and statistic.
+
+    Parameters
+    ----------
+    body : Dict[str, Any]
+        The JSON response body from the statistics service containing nested data.
+    geopd : bool, optional
+        Whether ``geopandas`` is available — when ``True`` the returned
+        frame is a ``GeoDataFrame``; when ``False`` (default) a plain
+        ``pd.DataFrame`` is returned with geometry flattened.
+
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame containing the flattened statistical data.
+
+    Notes
+    -----
+    The non-geopandas branch uses the same schema-aware extraction as
+    :func:`engine._get_resp_data`: it builds the per-feature outer frame
+    directly from each feature's ``properties`` (minus the nested
+    ``data`` field, which is unrolled separately below via the
+    ``record_path`` json_normalize), then adds ``geometry`` only when
+    present. Unlike :func:`engine._get_resp_data`, no top-level ``id``
+    column is added — stats features don't carry one, so this matches the
+    geopandas branch. Skipping the GeoJSON envelope keeps newly-added
+    fields like ``geometry.type`` from leaking into the result.
+    """
+    if body is None:
+        return _empty_feature_frame(geopd)
+
+    # An empty (or missing) features list — a real mid-pagination
+    # shape — would otherwise crash the downstream merge with
+    # ``KeyError: 'monitoring_location_id'`` because neither df nor
+    # dat would carry the merge key. ``_empty_feature_frame`` bails out
+    # with a geo-typed empty frame so a later ``pd.concat`` with non-empty
+    # geo pages doesn't downgrade to a plain DataFrame and strip geometry/CRS.
+    features = body.get("features") or []
+    if not features:
+        return _empty_feature_frame(geopd)
+
+    # The geopd-missing warning is emitted once at import (see engine module);
+    # doing it here would log per page.
+    if not geopd:
+        outer_props = [
+            {k: v for k, v in (f.get("properties") or {}).items() if k != "data"}
+            for f in features
+        ]
+        df = pd.json_normalize(outer_props, sep=".")
+        df.columns = df.columns.str.split(".").str[-1]
+        # Stats features don't carry a top-level ``id`` field — the
+        # geopandas branch (``GeoDataFrame.from_features``) doesn't
+        # surface one either, so the non-geopd branch stays
+        # consistent by NOT adding an id column.
+        _attach_coordinates(df, features)
+    else:
+        # Default a missing ``geometry`` key to ``None`` per feature so
+        # ``from_features`` (which indexes ``feature["geometry"]`` directly)
+        # can't ``KeyError`` on a stats feature that omits geometry — mirrors
+        # the guard in :func:`engine._get_resp_data`.
+        df = gpd.GeoDataFrame.from_features(
+            [f if "geometry" in f else {**f, "geometry": None} for f in features]
+        ).drop(columns=["data"], errors="ignore")
+
+    # Unnest json features, properties, data, and values while retaining necessary
+    # metadata to merge with main dataframe.
+    dat = pd.json_normalize(
+        body,
+        record_path=["features", "properties", "data", "values"],
+        meta=[
+            ["features", "properties", "monitoring_location_id"],
+            ["features", "properties", "data", "parameter_code"],
+            ["features", "properties", "data", "unit_of_measure"],
+            ["features", "properties", "data", "parent_time_series_id"],
+        ],
+        meta_prefix="",
+        errors="ignore",
+    )
+    dat.columns = dat.columns.str.split(".").str[-1]
+
+    return df.merge(dat, on="monitoring_location_id", how="left")
+
+
+def _expand_percentiles(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Takes percentile value and thresholds columns containing lists
+    of values and turns each list element into its own row in the
+    original dataframe. Exploded ``'nan'`` values are dropped. If
+    no percentile data exist, it adds a percentile column and
+    populates it with the percentile assigned to min, max, and
+    median.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The dataframe returned from using one of the statistics services.
+
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame containing the flattened percentile data.
+    """
+    if len(df) > 0:
+        if "percentile" in df["computation"].unique():
+            # Explode percentile lists into rows called "value" and "percentile"
+            percentiles = df.loc[df["computation"] == "percentile"]
+            percentiles_explode = percentiles[
+                ["computation_id", "values", "percentiles"]
+            ].explode(["values", "percentiles"], ignore_index=True)
+            percentiles_explode = percentiles_explode.loc[
+                percentiles_explode["values"] != "nan"
+            ]
+            percentiles_explode["value"] = pd.to_numeric(percentiles_explode["values"])
+            percentiles_explode["percentile"] = pd.to_numeric(
+                percentiles_explode["percentiles"]
+            )
+            percentiles_explode = percentiles_explode.drop(
+                columns=["values", "percentiles"]
+            )
+
+            # Merge exploded values back to other metadata/geometry
+            percentiles = percentiles.drop(
+                columns=["values", "percentiles", "value"], errors="ignore"
+            ).merge(percentiles_explode, on="computation_id", how="left")
+
+            # Concatenate back to original
+            dfs = pd.concat(
+                [df.loc[df["computation"] != "percentile"], percentiles]
+            ).drop(columns=["values", "percentiles"])
+        else:
+            dfs = df
+            dfs["percentile"] = pd.NA
+
+        # Give min, max, median a percentile value
+        dfs.loc[dfs["computation"] == "maximum", "percentile"] = 100
+        dfs.loc[dfs["computation"] == "minimum", "percentile"] = 0
+        dfs.loc[dfs["computation"] == "median", "percentile"] = 50
+
+        # Make sure numeric
+        dfs["percentile"] = pd.to_numeric(dfs["percentile"])
+
+        # Move percentile column
+        cols = dfs.columns.tolist()
+        cols.remove("percentile")
+        col_index = cols.index("value") + 1
+        cols.insert(col_index, "percentile")
+
+        return dfs[cols]
+
+    else:
+        return df
+
+
+def get_data(
+    args: dict[str, Any],
+    service: str,
+    expand_percentiles: bool,
+    client: httpx.AsyncClient | None = None,
+) -> tuple[pd.DataFrame, BaseMetadata]:
+    """
+    Retrieves statistical data from a specified endpoint and returns it
+    as a pandas DataFrame with metadata.
+
+    This function prepares request arguments, constructs API requests,
+    handles pagination, processes results, and formats output according
+    to the specified parameters.
+
+    The stats path doesn't go through ``multi_value_chunked`` (its query
+    shape has no chunkable list axes), so it drives :func:`engine._paginate`
+    directly through an ``anyio`` blocking portal. The portal runs the
+    pagination loop in a short-lived worker thread, so this works whether
+    or not the caller is already inside an event loop.
+
+    Parameters
+    ----------
+    args : Dict[str, Any]
+        Dictionary of request arguments for the statistics service.
+    service : str
+        The statistics service type (for example,
+        "observationNormals" or "observationIntervals").
+    expand_percentiles : bool
+        Determines whether the percentiles column is expanded so that
+        each percentile gets its own row in the returned dataframe. If
+        True and the user requests a computation_type other than
+        percentiles, a percentile column is still returned.
+    client : httpx.AsyncClient, optional
+        Caller-borrowed async client. ``None`` (default) opens a
+        temporary one inside the portal. Primarily a test seam.
+
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame containing the retrieved and processed statistical data.
+    BaseMetadata
+        A metadata object containing request information including URL and query time.
+
+    Raises
+    ------
+    DataRetrievalError
+        The typed subclass for an HTTP error response (see :func:`engine._paginate`);
+        or :class:`~dataretrieval.exceptions.NetworkError` if the initial request
+        can't reach the service (timeout / DNS), the ``httpx`` exception chained
+        on ``__cause__``.
+    """
+
+    url = f"{STATISTICS_API_URL}/{service}"
+    req = httpx.Request(
+        method="GET",
+        url=url,
+        headers=_default_headers(),
+        params=args,
+    )
+    method = req.method
+    headers = req.headers
+
+    def parse_response(resp: httpx.Response) -> tuple[pd.DataFrame, str | None]:
+        body = resp.json()
+        # Coerce falsy cursors ("", 0) to None so _paginate terminates.
+        # USGS uses "next": null at end-of-stream, but defensive coerce
+        # protects against any "" sentinel a future schema might use.
+        return _handle_nesting(body, geopd=GEOPANDAS), body.get("next") or None
+
+    async def follow_up(cursor: str, sess: httpx.AsyncClient) -> httpx.Response:
+        # Build a fresh params dict per page so the caller's ``args``
+        # is never mutated.
+        return await sess.request(
+            method, url=url, params={**args, "next_token": cursor}, headers=headers
+        )
+
+    async def _run() -> tuple[pd.DataFrame, httpx.Response]:
+        return await _paginate(
+            req,
+            parse_response=parse_response,
+            follow_up=follow_up,
+            client=client,
+        )
+
+    df, response = _run_sync(_run, service=service)
+
+    if expand_percentiles:
+        df = _expand_percentiles(df)
+    return df, BaseMetadata(response)
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index e8c18094..4dc7afd3 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -1,1452 +1,180 @@
-from __future__ import annotations
-
-import copy
-import functools
-import json
-import logging
-import numbers
-import os
-import re
-from collections.abc import (
-    AsyncIterator,
-    Awaitable,
-    Callable,
-    Iterable,
-    Iterator,
-    Mapping,
-    Sequence,
-)
-from contextlib import asynccontextmanager, contextmanager
-from contextvars import ContextVar
-from datetime import datetime, timedelta
-from typing import Any, TypeVar, cast, get_args
-from zoneinfo import ZoneInfo
-
-import httpx
-import pandas as pd
-from anyio.from_thread import start_blocking_portal
-
-from dataretrieval import __version__
-from dataretrieval.exceptions import DataRetrievalError, RateLimited, error_for_status
-from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata, _get, _network_error
-from dataretrieval.waterdata import _progress, chunking
-from dataretrieval.waterdata.chunking import (
-    _QUOTA_HEADER,
-    _safe_elapsed,
-    get_active_client,
-)
-from dataretrieval.waterdata.types import (
-    PROFILE_LOOKUP,
-    PROFILES,
-    SERVICES,
-)
-
-try:
-    import geopandas as gpd
-
-    GEOPANDAS = True
-except ImportError:
-    GEOPANDAS = False
-
-# Set up logger for this module
-logger = logging.getLogger(__name__)
-
-# Whether geopandas is present is a static, environment-level fact, so warn once
-# here at import time rather than per query/chunk. That avoids the warning
-# repeating on every call and avoids it interleaving with the progress line's
-# carriage-return rewrites.
-if not GEOPANDAS:
-    logger.warning(
-        "Geopandas not installed. Geometries will be flattened into pandas DataFrames."
-    )
-
-BASE_URL = "https://api.waterdata.usgs.gov"
-OGC_API_VERSION = "v0"
-OGC_API_URL = f"{BASE_URL}/ogcapi/{OGC_API_VERSION}"
-SAMPLES_URL = f"{BASE_URL}/samples-data"
-STATISTICS_API_VERSION = "v0"
-STATISTICS_API_URL = f"{BASE_URL}/statistics/{STATISTICS_API_VERSION}"
-
-# Maps each OGC waterdata service to its user-facing ``id`` column (the name the
-# typed getters rename the wire ``id`` to, e.g. ``daily`` -> ``daily_id``).
-# ``get_cql`` validates its ``service`` argument against these keys and
-# uses the value as the ``output_id`` for result shaping. Keep in sync with the
-# ``types.WATERDATA_SERVICES`` Literal (same keys).
-_OUTPUT_ID_BY_SERVICE: dict[str, str] = {
-    "channel-measurements": "channel_measurements_id",
-    "combined-metadata": "combined_meta_id",
-    "continuous": "continuous_id",
-    "daily": "daily_id",
-    "field-measurements": "field_measurement_id",
-    "field-measurements-metadata": "field_series_id",
-    "latest-continuous": "latest_continuous_id",
-    "latest-daily": "latest_daily_id",
-    "monitoring-locations": "monitoring_location_id",
-    "peaks": "peak_id",
-    "time-series-metadata": "time_series_id",
-}
-
-# Every service's output id EXCEPT the two that are genuinely user-facing
-# (``monitoring_location_id`` and ``time_series_id``). The rest are synthetic
-# per-record ids that ``_arrange_cols`` moves to the end of a result frame.
-# Derived from ``_OUTPUT_ID_BY_SERVICE`` so adding a service can't silently
-# leave a stray id column at the front again.
-_EXTRA_ID_COLS = set(_OUTPUT_ID_BY_SERVICE.values()) - {
-    "monitoring_location_id",
-    "time_series_id",
-}
-
-
-def _switch_arg_id(ls: dict[str, Any], id_name: str, service: str) -> dict[str, Any]:
-    """
-    Switch argument id from its package-specific identifier to the standardized "id" key
-    that the API recognizes.
-
-    If `ls` does not already have an "id" key, sets it from either the
-    service-derived id key or the expected id column name. If neither key
-    exists, "id" is left unset. The original service-specific id keys are
-    removed regardless.
-
-    Parameters
-    ----------
-    ls : Dict[str, Any]
-        The dictionary containing identifier keys to be standardized.
-    id_name : str
-        The name of the specific identifier key to look for.
-    service : str
-        The service name.
-
-    Returns
-    -------
-    Dict[str, Any]
-        The modified dictionary with the "id" key set appropriately.
-
-    Examples
-    --------
-    For service "time-series-metadata", the function will look for either
-    "time_series_metadata_id" or "time_series_id" and change the key to simply
-    "id".
-    """
-
-    service_id = service.replace("-", "_") + "_id"
-
-    if "id" not in ls:
-        if service_id in ls:
-            ls["id"] = ls[service_id]
-        elif id_name in ls:
-            ls["id"] = ls[id_name]
-
-    # Remove the original keys regardless of whether they were used
-    ls.pop(service_id, None)
-    ls.pop(id_name, None)
-
-    return ls
-
-
-def _switch_properties_id(
-    properties: list[str] | None, id_name: str, service: str
-) -> list[str]:
-    """
-    Build the wire ``properties`` list, dropping every id alias and
-    ``geometry``.
-
-    The feature ``id`` is always returned and is renamed to the
-    service-specific id column (e.g. ``daily_id``) in post-processing, so
-    it must not be requested as a property: several collections (e.g.
-    ``daily``, ``continuous``) reject ``id`` in ``properties`` with an
-    HTTP 400. ``geometry`` is likewise excluded because it is controlled
-    by ``skip_geometry``. Any service-specific id name (``daily_id``,
-    ``monitoring_location_id``, …) and the bare ``id`` are dropped, and
-    remaining hyphens are normalized to underscores. Returns an empty
-    list when `properties` is empty or None — the URL then omits the
-    ``properties`` filter and the result is shaped by :func:`_arrange_cols`.
-
-    Parameters
-    ----------
-    properties : Optional[List[str]]
-        A list containing the properties or column names to be pulled from the
-        service, or None.
-    id_name : str
-        The service-specific id column name to drop (e.g. ``daily_id``).
-    service : str
-        The service name.
-
-    Returns
-    -------
-    List[str]
-        The wire ``properties`` with id aliases and ``geometry`` removed
-        and hyphens normalized.
-
-    Examples
-    --------
-    For service "daily" with ``properties=["daily_id", "value", "geometry"]``,
-    returns ``["value"]`` — ``daily_id`` and ``geometry`` are dropped, while
-    the ``daily_id`` column still appears in the result, renamed from the
-    always-returned feature ``id``.
-    """
-    if not properties:
-        return []
-    service_id = service.replace("-", "_") + "_id"
-    # The feature ``id`` always comes back (renamed to the service id
-    # downstream) and several collections reject it as a selectable
-    # property; ``geometry`` is controlled by ``skip_geometry``. Drop both,
-    # plus the service-specific id column (``id_name``) and the name derived
-    # straight from the service (``service_id``).
-    drop = {"id", "geometry", id_name, service_id}
-    normalized = (p.replace("-", "_") for p in properties)
-    return [p for p in normalized if p not in drop]
-
-
-_DATETIME_FORMATS = (
-    "%Y-%m-%dT%H:%M:%S.%f%z",
-    "%Y-%m-%dT%H:%M:%S%z",
-    "%Y-%m-%dT%H:%M:%S.%f",
-    "%Y-%m-%dT%H:%M:%S",
-    "%Y-%m-%d %H:%M:%S.%f",
-    "%Y-%m-%d %H:%M:%S",
-    "%Y-%m-%d",
-)
-
-# Anchored to ``[Pp]\d`` so a normal word containing ``p`` (e.g. ``"Apr"``)
-# doesn't get mis-classified as an ISO 8601 duration; the optional ``T``
-# admits time-only forms like ``PT36H``.
-_DURATION_RE = re.compile(r"^[Pp]T?\d")
-
-# OGC API parameters that carry a date/datetime value (single string,
-# two-element range, or interval/duration string) rather than a multi-value
-# string list. Used by ``_construct_api_requests`` to keep them out of the
-# POST/CQL2 multi-value path and to route them through ``_format_api_dates``,
-# and by ``_NO_NORMALIZE_PARAMS`` to bypass string-iterable normalization.
-_DATE_RANGE_PARAMS = frozenset(
-    {"datetime", "last_modified", "begin", "begin_utc", "end", "end_utc", "time"}
-)
-
-# Services that don't support comma-separated values for multi-value GET
-# parameters and require POST with CQL2 JSON instead.
-_CQL2_REQUIRED_SERVICES = frozenset({"monitoring-locations"})
-
-
-def _parse_datetime(value: str) -> datetime | None:
-    """Parse a single datetime string against the supported formats.
-
-    Returns a ``datetime`` (tz-aware iff the input carried a UTC offset),
-    or ``None`` if no format matched.
-    """
-    # ``datetime.strptime`` accepts a numeric offset like ``+00:00`` but not
-    # the ``Z`` shorthand, so normalize trailing ``Z`` first.
-    candidate = value[:-1] + "+00:00" if value.endswith("Z") else value
-    for fmt in _DATETIME_FORMATS:
-        try:
-            return datetime.strptime(candidate, fmt)
-        except ValueError:
-            continue
-    return None
-
-
-def _format_one(dt: str | None, *, date: bool) -> str | None:
-    """Format a single datetime element for inclusion in the API time arg."""
-    if pd.isna(dt) or dt == "" or dt is None:
-        return ".."
-    parsed = _parse_datetime(dt)
-    if parsed is None:
-        return None
-    if date:
-        return parsed.strftime("%Y-%m-%d")
-    # Naive inputs are interpreted in the system local zone (for backwards
-    # compatibility). Use ``.astimezone()`` rather than a fixed offset so each
-    # value is resolved against the DST rules for ITS OWN date — a frozen
-    # ``datetime.now()`` offset shifted off-season inputs by an hour.
-    aware = parsed if parsed.tzinfo is not None else parsed.astimezone()
-    return aware.astimezone(ZoneInfo("UTC")).strftime("%Y-%m-%dT%H:%M:%SZ")
-
-
-def _format_api_dates(
-    datetime_input: str | Sequence[str | None] | None, date: bool = False
-) -> str | None:
-    """
-    Formats date or datetime input(s) for use with an API.
-
-    Handles single values or ranges, and converting to ISO 8601 or date-only
-    formats as needed.
-
-    Parameters
-    ----------
-    datetime_input : Union[str, List[Optional[str]], None]
-        A single date/datetime string or a list of one or two date/datetime
-        strings. Accepts formats like "%Y-%m-%d %H:%M:%S", ISO 8601 (with or
-        without ``Z``/numeric offset), or relative periods (e.g., "P7D" /
-        "PT36H"). Range endpoints may be ``None``/``NaN``/empty to denote a
-        half-bounded range.
-    date : bool, optional
-        If True, uses only the date portion ("YYYY-MM-DD"). If False (default),
-        returns full datetime in UTC ISO 8601 format ("YYYY-MM-DDTHH:MM:SSZ").
-
-    Returns
-    -------
-    Union[str, None]
-        - If input is a single value, returns the formatted date/datetime string
-        or None if parsing fails.
-        - If input is a list of two values, returns a date/datetime range string
-        separated by "/" (e.g., "YYYY-MM-DD/YYYY-MM-DD" or
-        "YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ").
-        - Returns None if input is empty, all NA, or cannot be parsed.
-
-    Raises
-    ------
-    ValueError
-        If `datetime_input` contains more than two values.
-
-    Notes
-    -----
-    - A single blank/NA value returns None. In a two-value range, a blank/NA
-    endpoint is rendered as ``".."`` to denote an open bound (e.g.
-    ``"2024-01-01/.."``); the range is only None when *every* element is
-    blank/NA or any non-NA element fails to parse.
-    - Supports ISO 8601 durations such as "P7D" and "PT36H" and pre-formatted
-    intervals containing ``"/"``; both are passed through unchanged.
-    - Converts datetimes to UTC and formats as ISO 8601 with 'Z' suffix when
-    `date` is False. Inputs with an explicit offset (``Z`` or ``+HH:MM``) are
-    converted from that offset to UTC; naive inputs are interpreted in the
-    local time zone for backwards compatibility.
-    """
-    if datetime_input is None:
-        return None
-
-    # Convert single string to list for uniform processing
-    if isinstance(datetime_input, str):
-        datetime_input = [datetime_input]
-    elif isinstance(datetime_input, Mapping):
-        # `list(mapping)` returns keys, which silently accepts the wrong shape.
-        raise TypeError(
-            f"date input must be a string or sequence of strings, "
-            f"not {type(datetime_input).__name__}."
-        )
-    elif not isinstance(datetime_input, (list, tuple)):
-        # Materialize any other iterable (pandas.Series, numpy.ndarray,
-        # generator, ...) so the len()/subscript operations below work.
-        datetime_input = list(datetime_input)
-
-    # Check for null or all NA and return None
-    if all(pd.isna(dt) or dt == "" or dt is None for dt in datetime_input):
-        return None
-
-    if len(datetime_input) > 2:
-        raise ValueError("datetime_input should only include 1-2 values")
-
-    # Pass through duration ("P7D", "PT36H") and pre-formatted interval ("a/b")
-    # strings untouched.
-    if len(datetime_input) == 1 and isinstance(datetime_input[0], str):
-        single = datetime_input[0]
-        if _DURATION_RE.match(single) or "/" in single:
-            return single
-
-    # element invalidates the range.
-    formatted: list[str] = []
-    for dt in datetime_input:
-        one = _format_one(dt, date=date)
-        if one is None:
-            return None
-        formatted.append(one)
-    return "/".join(formatted)
-
-
-def _cql2_param(args: dict[str, Any]) -> str:
-    """
-    Convert query parameters to CQL2 JSON format for POST requests.
-
-    Parameters
-    ----------
-    args : Dict[str, Any]
-        Dictionary of query parameters to convert to CQL2 format.
-
-    Returns
-    -------
-    str
-        Compact JSON string representation of the CQL2 query.
-
-    Notes
-    -----
-    Serialized with the tightest separators (no indentation or
-    whitespace). The body counts against the server's ~8 KB request-size
-    limit and against :func:`chunking._request_bytes` when planning
-    chunks, so every saved byte fits more values per POST: compact
-    encoding roughly halves the per-value cost versus pretty-printing,
-    which roughly doubles how many monitoring-location ids fit in one
-    sub-request and so halves the chunk count for large id lists.
-    """
-    filters = []
-    for key, values in args.items():
-        filters.append({"op": "in", "args": [{"property": key}, values]})
-
-    query = {"op": "and", "args": filters}
-
-    return json.dumps(query, separators=(",", ":"))
-
-
-def _default_headers() -> dict[str, str]:
-    """
-    Generate default HTTP headers for API requests.
-
-    Returns
-    -------
-    dict
-        A dictionary containing default headers including 'Accept-Encoding',
-        'Accept', 'User-Agent', and 'lang'. If the environment variable
-        'API_USGS_PAT' is set, its value is included as the 'X-Api-Key' header.
-    """
-    headers = {
-        "Accept-Encoding": "compress, gzip",
-        "Accept": "application/json",
-        "User-Agent": f"python-dataretrieval/{__version__}",
-        "lang": "en-US",
-    }
-    token = os.getenv("API_USGS_PAT")
-    if token:
-        headers["X-Api-Key"] = token
-    return headers
-
-
-def _check_ogc_requests(
-    endpoint: str = "daily", req_type: str = "queryables"
-) -> dict[str, Any]:
-    """
-    Sends an HTTP GET request to the specified OGC endpoint and request type,
-    returning the JSON response.
-
-    Parameters
-    ----------
-    endpoint : str, optional
-        The OGC collection endpoint to query (default is "daily").
-    req_type : str, optional
-        The type of request to make. Must be either "queryables" or "schema"
-        (default is "queryables").
-
-    Returns
-    -------
-    dict
-        The JSON response from the OGC endpoint.
-
-    Raises
-    ------
-    ValueError
-        If req_type is not "queryables" or "schema".
-    DataRetrievalError
-        From :func:`_raise_for_non_200` on any non-200 (the typed subclass for
-        the status) — same typed contract as the main data path so callers can
-        use one ``except`` clause everywhere.
-    """
-    if req_type not in ("queryables", "schema"):
-        raise ValueError(f"req_type must be 'queryables' or 'schema', got {req_type!r}")
-    url = f"{OGC_API_URL}/collections/{endpoint}/{req_type}"
-    resp = _get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
-    _raise_for_non_200(resp)
-    # ``Response.json`` is typed ``Any``; the OGC queryables/schema endpoints
-    # return a JSON object, and callers index it as a dict.
-    return cast("dict[str, Any]", resp.json())
-
-
-def _error_body(resp: httpx.Response) -> str:
-    """
-    Build an informative error message from an HTTP response.
-
-    Parameters
-    ----------
-    resp : httpx.Response
-        The HTTP response object to extract the error message from.
-
-    Returns
-    -------
-    str
-        An error message string assembled per status code:
-
-        * **429** — predefined message describing the rate-limit and pointing
-          at the API-token path; the response body is not consulted.
-        * **403** — predefined message describing the most common cause
-          (query exceeding server limits); the response body is not
-          consulted.
-        * **other statuses** — attempts ``resp.json()`` and renders
-          ``"<status>: <code>. <description>."`` from the JSON error
-          envelope. If the body is not JSON (e.g. an HTML 502 from a
-          gateway), falls back to ``"<status>: <reason>. <snippet>"`` with
-          the first 200 characters of ``resp.text``; an empty body
-          degrades to ``"<status>: <reason>."``.
-    """
-    status = resp.status_code
-    if status == 429:
-        return (
-            "429: Too many requests made. Please obtain an API token "
-            "or try again later."
-        )
-    elif status == 403:
-        return (
-            "403: Query request denied. Possible reasons include "
-            "query exceeding server limits."
-        )
-    try:
-        j_txt = resp.json()
-    except ValueError:
-        snippet = (resp.text or "").strip()[:200]
-        reason = resp.reason_phrase or "Error"
-        if snippet:
-            return f"{status}: {reason}. {snippet}"
-        return f"{status}: {reason}."
-    return (
-        f"{status}: {j_txt.get('code', 'Unknown type')}. "
-        f"{j_txt.get('description', 'No description provided')}."
-    )
-
-
-def _parse_retry_after(value: str | None) -> float | None:
-    """
-    Parse a USGS ``Retry-After`` header into seconds.
-
-    Parameters
-    ----------
-    value : str or None
-        The raw header value, or ``None`` if absent.
-
-    Returns
-    -------
-    float or None
-        Non-negative delta-seconds, clamped at zero. ``None`` when the
-        header is absent or unparseable; ``ChunkedCall`` treats
-        ``None`` as "fall back to my own retry policy".
-
-    Notes
-    -----
-    USGS sends ``Retry-After`` as integer delta-seconds (empirically
-    verified — e.g. ``Retry-After: 2619``). The HTTP spec also allows
-    HTTP-date form, but USGS doesn't use it, so this function doesn't
-    bother parsing it.
-    """
-    if not value:
-        return None
-    try:
-        return max(0.0, float(value.strip()))
-    except ValueError:
-        return None
-
-
-def _raise_for_non_200(resp: httpx.Response) -> None:
-    """
-    Raise a typed exception for any non-200 response.
-
-    Routes through :func:`_error_body` (USGS-API-aware: handles
-    429/403 specially, extracts ``code``/``description`` from JSON
-    error bodies) rather than ``Response.raise_for_status``, which
-    raises ``HTTPStatusError`` with a generic message.
-
-    Parameters
-    ----------
-    resp : httpx.Response
-        The HTTP response to inspect.
-
-    Raises
-    ------
-    DataRetrievalError
-        The typed subclass for the status (see
-        :func:`dataretrieval.exceptions.error_for_status` for the mapping). The
-        transient types (:class:`~dataretrieval.exceptions.TransientError`) are
-        distinguished so ``ChunkedCall`` can wrap them as a resumable
-        :class:`~dataretrieval.waterdata.chunking.QuotaExhausted` /
-        :class:`~dataretrieval.waterdata.chunking.ServiceInterrupted`; a fatal
-        :class:`~dataretrieval.exceptions.HTTPError` (not a ``TransientError``)
-        the chunker won't resume.
-    """
-    status = resp.status_code
-    if status < 400:
-        return
-    raise error_for_status(
-        status,
-        _error_body(resp),
-        retry_after=_parse_retry_after(resp.headers.get("Retry-After")),
-    )
-
-
-def _paginated_failure_message(pages_collected: int, cause: BaseException) -> str:
-    """
-    Build a user-facing message for a mid-pagination failure.
-
-    The API exposes no resume cursor, so the caller's only recovery is
-    to retry the whole call — the message lists the practical knobs,
-    tailored to whether the failure was rate-limit (429) or something
-    else.
-
-    Parameters
-    ----------
-    pages_collected : int
-        Number of pages successfully fetched before the failure.
-    cause : BaseException
-        The underlying exception that interrupted pagination.
-
-    Returns
-    -------
-    str
-        A message suitable for the ``DataRetrievalError`` that
-        ``_walk_pages`` and ``get_stats_data`` raise from the
-        original exception.
-    """
-    cause_str = str(cause).removesuffix(".")
-    # Some ``httpx`` exceptions (e.g. ``TimeoutException()`` with no args)
-    # stringify to empty; fall back to the class name so the
-    # returned message is always informative.
-    if not cause_str.strip():
-        cause_str = type(cause).__name__
-    if isinstance(cause, RateLimited):
-        action = "wait for the rate-limit window to reset and retry"
-    else:
-        action = "retry the request (possibly after a short backoff)"
-    return (
-        f"Paginated request failed after collecting {pages_collected} "
-        f"page(s): {cause_str}. To recover: {action}, reduce the "
-        f"request size (e.g. fewer locations, a shorter time range, or "
-        f"a smaller ``limit``), or obtain an API token."
-    )
-
-
-def _ogc_query_params(
-    params: dict[str, Any],
-    *,
-    properties: list[str] | None,
-    bbox: list[float] | None,
-    limit: int | None,
-    skip_geometry: bool | None,
-) -> dict[str, Any]:
-    """Add the shared OGC query knobs to ``params`` (mutated in place).
-
-    Factors out the ``skipGeometry``/``limit``/``bbox``/``properties`` block
-    common to every OGC request so the typed getters
-    (:func:`_construct_api_requests`) and the generalized CQL2 path
-    (:func:`_construct_cql_request`) build identical URL parameters.
-
-    ``skip_geometry=None`` leaves ``skipGeometry`` unset (the server defaults to
-    including geometry); the typed getters always pass a bool, so their behavior
-    is unchanged.
-    """
-    if skip_geometry is not None:
-        params["skipGeometry"] = skip_geometry
-    params["limit"] = 50000 if limit is None or limit > 50000 else limit
-    # `len()` instead of truthiness: a numpy ndarray would raise on `if bbox:`.
-    if bbox is not None and len(bbox) > 0:
-        params["bbox"] = ",".join(map(str, bbox))
-    if properties:
-        params["properties"] = ",".join(properties)
-    return params
-
-
-def _construct_api_requests(
-    service: str,
-    properties: list[str] | None = None,
-    bbox: list[float] | None = None,
-    limit: int | None = None,
-    skip_geometry: bool = False,
-    **kwargs: Any,
-) -> httpx.Request:
-    """
-    Constructs an HTTP request object for the specified water data API service.
-
-    For most services, list parameters are comma-joined and sent as a single
-    GET request (e.g. ``parameter_code=["00060","00010"]`` becomes
-    ``parameter_code=00060,00010`` in the URL). For services that do not
-    support comma-separated values (currently only ``monitoring-locations``),
-    a POST request with CQL2 JSON is used instead.
-
-    Parameters
-    ----------
-    service : str
-        The name of the API service to query (e.g., "daily").
-    properties : Optional[List[str]], optional
-        List of property names to include in the request.
-    bbox : Optional[List[float]], optional
-        Bounding box coordinates as a list of floats.
-    limit : Optional[int], optional
-        Maximum number of results to return per request.
-    skip_geometry : bool, optional
-        Whether to exclude geometry from the response (default is False).
-    **kwargs
-        Additional query parameters, including date/time filters and other
-        API-specific options.
-
-    Returns
-    -------
-    httpx.Request
-        The constructed HTTP request object ready to be sent.
-
-    Notes
-    -----
-    - Date/time parameters are automatically formatted to ISO8601.
-    """
-    service_url = f"{OGC_API_URL}/collections/{service}/items"
-
-    # Format date/time parameters to ISO8601 first — both routing paths need it.
-    for key in _DATE_RANGE_PARAMS:
-        if key in kwargs:
-            kwargs[key] = _format_api_dates(
-                kwargs[key],
-                date=(service == "daily" and key != "last_modified"),
-            )
-
-    if service in _CQL2_REQUIRED_SERVICES:
-        # POST with CQL2 JSON: multi-value params go in the request body.
-        # The date-range loop above has already collapsed any _DATE_RANGE_PARAMS
-        # value to a string, so the list/tuple check below cannot match them.
-        post_params = {
-            k: v
-            for k, v in kwargs.items()
-            if isinstance(v, (list, tuple)) and len(v) > 1
-        }
-        params = {k: v for k, v in kwargs.items() if k not in post_params}
-    else:
-        # GET with comma-separated values: join list/tuple values into one string.
-        # Skip empty lists/tuples so they're omitted rather than emitted as a
-        # filterless ``&param=`` (which the server reads as "match empty").
-        post_params = {}
-        params = {
-            k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v
-            for k, v in kwargs.items()
-            if not (isinstance(v, (list, tuple)) and len(v) == 0)
-        }
-
-    _ogc_query_params(
-        params,
-        properties=properties,
-        bbox=bbox,
-        limit=limit,
-        skip_geometry=skip_geometry,
-    )
-
-    # Translate CQL filter Python names to the hyphenated URL parameter that
-    # the OGC API expects. The Python kwarg is `filter_lang` because hyphens
-    # aren't valid in Python identifiers.
-    if "filter_lang" in params:
-        params["filter-lang"] = params.pop("filter_lang")
-
-    headers = _default_headers()
-
-    if post_params:
-        headers["Content-Type"] = "application/query-cql-json"
-        return httpx.Request(
-            method="POST",
-            url=service_url,
-            headers=headers,
-            content=_cql2_param(post_params),
-            params=params,
-        )
-    return httpx.Request(
-        method="GET",
-        url=service_url,
-        headers=headers,
-        params=params,
-    )
-
-
-def _construct_cql_request(
-    service: str,
-    cql_body: str,
-    *,
-    properties: list[str] | None = None,
-    bbox: list[float] | None = None,
-    limit: int | None = None,
-    skip_geometry: bool | None = None,
-) -> httpx.Request:
-    """Build a POST/CQL2 request from a verbatim CQL2 body.
-
-    The OGC-API counterpart to :func:`_construct_api_requests` for the
-    generalized :func:`~dataretrieval.waterdata.api.get_cql` path: the
-    caller supplies an already-serialized CQL2 JSON document (any predicate the
-    grammar allows), sent unchanged as the request body, while
-    ``properties``/``bbox``/``limit``/``skip_geometry`` go on the URL via the
-    shared :func:`_ogc_query_params` — so a generalized query and an equivalent
-    typed getter produce the same URL parameters.
-
-    Parameters
-    ----------
-    service : str
-        OGC collection name (e.g. ``"daily"``).
-    cql_body : str
-        Serialized CQL2 JSON document, sent as the POST body verbatim.
-    properties, bbox, limit, skip_geometry
-        See :func:`_ogc_query_params`. ``properties`` are wire-format
-        (``id``-translated) names.
-
-    Returns
-    -------
-    httpx.Request
-        A POST request with ``Content-Type: application/query-cql-json``.
-    """
-    service_url = f"{OGC_API_URL}/collections/{service}/items"
-    params = _ogc_query_params(
-        {},
-        properties=properties,
-        bbox=bbox,
-        limit=limit,
-        skip_geometry=skip_geometry,
-    )
-    headers = _default_headers()
-    headers["Content-Type"] = "application/query-cql-json"
-    return httpx.Request(
-        method="POST",
-        url=service_url,
-        headers=headers,
-        content=cql_body,
-        params=params,
-    )
-
-
-def _next_req_url(
-    resp: httpx.Response, *, body: dict[str, Any] | None = None
-) -> str | None:
-    """
-    Extracts the URL for the next page of results from an HTTP response from a
-    water data endpoint.
-
-    Parameters
-    ----------
-    resp : httpx.Response
-        The HTTP response object containing JSON data and headers.
-    body : dict, optional
-        Pre-parsed JSON body for ``resp``. When provided, skips the
-        ``resp.json()`` call — useful when the caller has already
-        decoded the body for its own use (avoids a second parse pass).
-
-    Returns
-    -------
-    Optional[str]
-        The URL for the next page of results if available, otherwise None.
-
-    Notes
-    -----
-    - Returns None when the response carries no features.
-    - Expects the response JSON to contain a "links" list with objects having
-    "rel" and "href" keys.
-    - Checks for the "next" relation in the "links" to determine the next URL.
-    """
-    if body is None:
-        body = resp.json()
-    if not body.get("numberReturned"):
-        return None
-    for link in body.get("links", []):
-        if link.get("rel") != "next":
-            continue
-        href = link.get("href")
-        if not href:
-            return None
-        # Refuse to follow a next-page link to a different host —
-        # the request's headers/auth were minted for the original
-        # host and shouldn't leak to whatever a poisoned response
-        # body might supply. Guarded against mock-shaped ``resp.url``
-        # attributes (tests sometimes set strings or ``MagicMock``)
-        # by falling open when host extraction isn't reliable.
-        next_host: str | None
-        cur_host: str | None
-        try:
-            next_host = httpx.URL(href).host
-            resp_url = (
-                resp.url
-                if isinstance(resp.url, httpx.URL)
-                else httpx.URL(str(resp.url))
-            )
-            cur_host = resp_url.host
-        except (httpx.InvalidURL, TypeError):
-            next_host = cur_host = None
-        if next_host and cur_host and next_host != cur_host:
-            raise RuntimeError(
-                f"Refusing to follow cross-host next-page URL: "
-                f"{next_host} != {cur_host}"
-            )
-        # ``href`` comes from the JSON ``links`` array (typed ``Any``); the
-        # ``not href`` guard above already excluded empty/None, and it is a
-        # URL string (passed to ``httpx.URL`` above).
-        return cast("str", href)
-    return None
-
-
-def _get_resp_data(
-    resp: httpx.Response,
-    geopd: bool,
-    *,
-    body: dict[str, Any] | None = None,
-) -> pd.DataFrame:
-    """
-    Extracts and normalizes data from an HTTP response containing GeoJSON features.
-
-    Parameters
-    ----------
-    resp : httpx.Response
-        The HTTP response object expected to contain a JSON body
-        with a "features" key.
-    geopd : bool
-        Indicates whether geopandas is installed and should be used to
-        handle geometries.
-    body : dict, optional
-        Pre-parsed JSON body for ``resp``. When provided, skips the
-        ``resp.json()`` call — useful when the caller has already
-        decoded the body for its own use (avoids a second parse pass).
-
-    Returns
-    -------
-    gpd.GeoDataFrame or pd.DataFrame
-        A ``GeoDataFrame`` when ``geopd`` is True; otherwise a plain
-        ``DataFrame`` carrying the feature properties plus an ``id``
-        column (always present, possibly all-None) and a ``geometry``
-        column (coordinates list) when at least one feature includes
-        geometry. Returns an empty ``DataFrame`` when no features are
-        returned.
-
-    Notes
-    -----
-    The non-geopandas branch builds the frame directly from each
-    feature's ``properties`` dict, plus the top-level ``id`` and
-    ``geometry.coordinates`` columns — the ``id`` column is always
-    added (so the downstream rename to the service-specific output id
-    works even on an all-None id), while the ``geometry`` column is
-    added only when at least one feature carries geometry. This skips
-    the GeoJSON envelope entirely, so
-    newly-added Feature-level fields (e.g. ``geometry.type`` after
-    USGS migrated to full GeoJSON geometry objects) can't leak into
-    the result frame; no reactive drop-list needs maintenance every
-    time the upstream schema grows.
-    """
-    if body is None:
-        body = resp.json()
-    if not body.get("numberReturned"):
-        # Preserve the GeoDataFrame type on empty short-circuit so a
-        # downstream ``pd.concat([empty_page, geo_page])`` doesn't
-        # downgrade the geopd-installed user's result to a plain
-        # DataFrame (stripping geometry/CRS).
-        return gpd.GeoDataFrame() if geopd else pd.DataFrame()
-
-    # Defensive: a 200 with ``numberReturned > 0`` but missing
-    # ``features`` is a real schema-drift shape (mirrors the guard in
-    # ``_handle_stats_nesting``). Treat as empty rather than crash with
-    # ``KeyError`` — the wrapped failure would otherwise look like a
-    # transient transport error to ``_paginate``'s exception handler.
-    features = body.get("features") or []
-    if not features:
-        return gpd.GeoDataFrame() if geopd else pd.DataFrame()
-
-    if not geopd:
-        df = pd.json_normalize([f.get("properties") or {} for f in features], sep="_")
-        # Always materialize the ``id`` column (may be all-None) so
-        # ``_arrange_cols``'s ``df.rename(columns={"id": output_id})``
-        # produces the documented service-specific output_id column
-        # (daily_id, channel_measurements_id, …) even if the upstream
-        # response carried no feature-level id.
-        df["id"] = [f.get("id") for f in features]
-        geoms = [(f.get("geometry") or {}).get("coordinates") for f in features]
-        if any(g is not None for g in geoms):
-            df["geometry"] = geoms
-        return df
-
-    # Organize json into geodataframe and make sure id column comes along.
-    df = gpd.GeoDataFrame.from_features(features)
-    # Mirror the non-geopandas branch's defensive ``f.get("id")`` so a feature
-    # missing a top-level ``id`` yields None rather than a KeyError.
-    df["id"] = [f.get("id") for f in features]
-    df = df[["id"] + [col for col in df.columns if col != "id"]]
-
-    # If no geometry present, then return pandas dataframe. A geodataframe
-    # is not needed.
-    if df["geometry"].isnull().all():
-        df = pd.DataFrame(df.drop(columns="geometry"))
-
-    return df
-
-
-@asynccontextmanager
-async def _client_for(
-    client: httpx.AsyncClient | None,
-) -> AsyncIterator[httpx.AsyncClient]:
-    """
-    Yield a usable async client, picking the best available source.
-
-    Resolution order:
-
-    1. ``client`` if the caller supplied one (borrowed; not closed
-       here — the caller owns its lifecycle).
-    2. The chunker's shared async client if we're inside a
-       :class:`~dataretrieval.waterdata.chunking.ChunkedCall` run (per
-       :func:`chunking.get_active_client`). Borrowed; the chunker
-       closes it on exit.
-    3. A fresh short-lived ``httpx.AsyncClient`` opened here and closed
-       on context exit.
-
-    Parameters
-    ----------
-    client : httpx.AsyncClient or None
-        A caller-owned client to borrow, or ``None`` to defer to the
-        chunker's shared client or a temporary one.
-
-    Yields
-    ------
-    httpx.AsyncClient
-        The chosen client.
-    """
-    if client is not None:
-        yield client
-        return
-    shared = get_active_client()
-    if shared is not None:
-        yield shared
-        return
-    async with httpx.AsyncClient(**HTTPX_DEFAULTS) as new:
-        yield new
-
-
-def _aggregate_paginated_response(
-    initial: httpx.Response,
-    last: httpx.Response,
-    total_elapsed: timedelta,
-) -> httpx.Response:
-    """
-    Build a single response covering a paginated call.
-
-    Returns a shallow copy of ``initial`` with ``.headers`` set to the
-    LAST page's (so downstream sees current ``x-ratelimit-remaining``)
-    and ``.elapsed`` set to total wall-clock. The canonical
-    ``initial.url`` is preserved (it's the user's original query).
-    Both ``initial`` and ``last`` are left unmutated, mirroring the
-    convention of
-    :func:`dataretrieval.waterdata.chunking._combine_chunk_responses`.
-
-    Parameters
-    ----------
-    initial : httpx.Response
-        First-page response (the canonical one for ``md.url``).
-    last : httpx.Response
-        Last-page response — supplies the headers to copy over.
-    total_elapsed : datetime.timedelta
-        Cumulative wall-clock across every page, including ``initial``.
-
-    Returns
-    -------
-    httpx.Response
-        A shallow copy of ``initial`` with ``.headers`` set to a fresh
-        ``httpx.Headers`` and ``.elapsed`` set to the cumulative
-        wall-clock. ``initial.headers`` / ``initial.elapsed`` are
-        never mutated, so callers holding a pre-pagination reference
-        still see the original first-page values.
-    """
-    final = copy.copy(initial)
-    final.headers = httpx.Headers(last.headers)
-    final.elapsed = total_elapsed
-    return final
-
-
-_Cursor = TypeVar("_Cursor")
-
-# Optional cap on the total rows a single paginated call accumulates before it
-# stops following ``next`` links. ``None`` (the default the data getters use)
-# means "no cap — fetch the whole series". Set via :func:`_row_cap` so the deep
-# ``_paginate`` loop can honor it without threading the value through the
-# generic chunker; this mirrors the ``_progress`` ambient-reporter pattern.
-_row_cap_var: ContextVar[int | None] = ContextVar("waterdata_row_cap", default=None)
-
-
-@contextmanager
-def _row_cap(max_rows: int | None) -> Iterator[None]:
-    """Cap the rows any :func:`_paginate` under this context will
-    accumulate (``None`` = uncapped). Used by :func:`get_reference_table`
-    to preview large tables without downloading every page."""
-    token = _row_cap_var.set(max_rows)
-    try:
-        yield
-    finally:
-        _row_cap_var.reset(token)
-
-
-async def _paginate(
-    initial_req: httpx.Request,
-    *,
-    parse_response: Callable[[httpx.Response], tuple[pd.DataFrame, _Cursor | None]],
-    follow_up: Callable[[_Cursor, httpx.AsyncClient], Awaitable[httpx.Response]],
-    client: httpx.AsyncClient | None = None,
-) -> tuple[pd.DataFrame, httpx.Response]:
-    """
-    Drive a paginated request to completion over an
-    :class:`httpx.AsyncClient`.
-
-    The common shape behind :func:`_walk_pages` and
-    :func:`get_stats_data`: send the initial request, then loop calling
-    ``follow_up`` until ``parse_response`` reports a ``None`` cursor,
-    accumulating frames and elapsed time. Any mid-pagination failure
-    raises ``DataRetrievalError`` wrapping the cause — the API exposes no
-    resume cursor, so the caller's only recovery is to retry the whole
-    call. Issuing HTTP asynchronously lets the multiple sub-requests of a
-    chunked call run concurrently under
-    :meth:`~dataretrieval.waterdata.chunking.ChunkedCall._run`.
-
-    Parameters
-    ----------
-    initial_req : httpx.Request
-        First-page request to send.
-    parse_response : callable
-        ``resp -> (df, next_cursor_or_None)``. Returns the page's
-        DataFrame and the cursor (URL, token, …) used to drive
-        ``follow_up`` for the next page; ``None`` terminates the loop.
-    follow_up : callable
-        ``(cursor, client) -> Awaitable[httpx.Response]``. Builds and
-        sends the next-page request.
-    client : httpx.AsyncClient, optional
-        Caller-borrowed client. ``None`` (default) means use the
-        chunker's shared client (if inside a chunked call) or open
-        a temporary one.
-
-    Returns
-    -------
-    df : pandas.DataFrame
-        Concatenation of every page's parsed frame.
-    response : httpx.Response
-        A shallow copy of the first-page response, with ``.headers``
-        rebuilt as a fresh ``httpx.Headers`` reflecting the last page and
-        ``.elapsed`` set to cumulative wall-clock. The canonical URL is
-        preserved from the first page. The original first-page response
-        is not mutated.
-
-    Raises
-    ------
-    DataRetrievalError
-        On a non-200 initial response, the typed subclass for the status from
-        :func:`_raise_for_non_200` (a
-        :class:`~dataretrieval.exceptions.TransientError` for a retryable
-        429 / 5xx, otherwise a fatal :class:`~dataretrieval.exceptions.HTTPError`);
-        or, on an initial-page parse failure or any subsequent-page failure, a
-        base ``DataRetrievalError`` wrapping the cause (built by
-        :func:`_paginated_failure_message`, original exception on ``__cause__``).
-    httpx.HTTPError
-        Network-level failures on the *initial* request (e.g.
-        ``ConnectError``, ``TimeoutException``) propagate unmodified
-        so callers can branch on the specific type; equivalent
-        failures on subsequent pages are wrapped per above.
-    """
-    logger.debug("Requesting: %s", initial_req.url)
-    reporter = _progress.current()
-    async with _client_for(client) as sess:
-        resp = await sess.send(initial_req)
-        _raise_for_non_200(resp)
-        initial_response = resp
-        total_elapsed = _safe_elapsed(resp)
-
-        try:
-            df, cursor = parse_response(resp)
-        except Exception as e:  # noqa: BLE001
-            # Initial-page parse failures (malformed JSON, missing
-            # ``features``, schema drift) get the same wrapped-message
-            # treatment as follow-up failures so callers see a consistent
-            # diagnostic regardless of which page broke.
-            logger.warning("Initial response parse failed.")
-            raise DataRetrievalError(_paginated_failure_message(0, e)) from e
-        dfs = [df]
-        # Stop following ``next`` links once the optional row cap is reached
-        # (see :func:`_row_cap`); ``None`` means uncapped. The concatenation
-        # is sliced to the cap below so a final over-budget page can't exceed it.
-        cap = _row_cap_var.get()
-        nrows = len(df)
-        if reporter is not None:
-            reporter.set_rate_remaining(
-                resp.headers.get(_QUOTA_HEADER),
-                limit=resp.headers.get("x-ratelimit-limit"),
-            )
-            reporter.add_page(rows=len(df))
-        while cursor is not None and (cap is None or nrows < cap):
-            try:
-                resp = await follow_up(cursor, sess)
-                _raise_for_non_200(resp)
-                df, cursor = parse_response(resp)
-                dfs.append(df)
-                nrows += len(df)
-                total_elapsed += _safe_elapsed(resp)
-                if reporter is not None:
-                    reporter.set_rate_remaining(
-                        resp.headers.get(_QUOTA_HEADER),
-                        limit=resp.headers.get("x-ratelimit-limit"),
-                    )
-                    reporter.add_page(rows=len(df))
-            except Exception as e:  # noqa: BLE001
-                logger.warning(
-                    "Request failed at cursor %r. Data download interrupted.",
-                    cursor,
-                )
-                raise DataRetrievalError(_paginated_failure_message(len(dfs), e)) from e
-
-        # Aggregate headers / elapsed onto a COPY of the initial
-        # response so the user's caller never sees an in-place
-        # mutation of the response object they may have inspected
-        # mid-pagination via a hook or test fixture.
-        final_response = _aggregate_paginated_response(
-            initial_response, resp, total_elapsed
-        )
-        result = pd.concat(dfs, ignore_index=True)
-        if cap is not None:
-            result = result.head(cap)
-        return result, final_response
-
-
-def _ogc_parse_response(
-    resp: httpx.Response, *, geopd: bool
-) -> tuple[pd.DataFrame, str | None]:
-    """Parse one OGC API page: extract the DataFrame and the next-page URL.
-
-    The parse strategy :func:`_walk_pages` hands to
-    :func:`_paginate`. Coerces falsy cursors (empty href, etc.) to
-    ``None`` so the paginate loop's ``while cursor is not None``
-    terminates instead of spinning on a meaningless value.
-    """
-    body = resp.json()
-    return (
-        _get_resp_data(resp, geopd=geopd, body=body),
-        _next_req_url(resp, body=body) or None,
-    )
-
-
-async def _walk_pages(
-    geopd: bool,
-    req: httpx.Request,
-    client: httpx.AsyncClient | None = None,
-) -> tuple[pd.DataFrame, httpx.Response]:
-    """
-    Iterate paginated OGC API responses asynchronously and aggregate
-    them into one DataFrame.
-
-    Thin wrapper that hands off to :func:`_paginate` with
-    OGC-specific strategies: pages are parsed via :func:`_get_resp_data`
-    (through :func:`_ogc_parse_response`) and the next-page cursor is the
-    URL from the response's ``links`` array (per :func:`_next_req_url`).
-
-    Parameters
-    ----------
-    geopd : bool
-        Whether geopandas is installed (drives geometry handling).
-    req : httpx.Request
-        The initial HTTP request to send.
-    client : httpx.AsyncClient, optional
-        Caller-borrowed client; ``None`` defers client management to
-        :func:`_paginate`.
-
-    Returns
-    -------
-    pd.DataFrame
-        A DataFrame containing the aggregated results from all pages.
-    httpx.Response
-        Aggregated response — initial-request URL (for query identity),
-        final page's headers (so downstream sees current rate-limit
-        state), and cumulative ``elapsed`` summed across pages.
-
-    Raises
-    ------
-    DataRetrievalError
-        See :func:`_paginate`.
-    httpx.HTTPError
-        See :func:`_paginate`.
-    """
-    method = req.method  # ``httpx.Request.method`` is already upper-cased.
-    headers = req.headers
-    content = req.content if method == "POST" else None
-
-    async def follow_up(cursor: str, sess: httpx.AsyncClient) -> httpx.Response:
-        return await sess.request(method, cursor, headers=headers, content=content)
-
-    return await _paginate(
-        req,
-        parse_response=functools.partial(_ogc_parse_response, geopd=geopd),
-        follow_up=follow_up,
-        client=client,
-    )
-
-
-def _deal_with_empty(
-    return_list: pd.DataFrame, properties: list[str] | None, service: str
-) -> pd.DataFrame:
-    """
-    Handles empty DataFrame results by returning a DataFrame with appropriate columns.
-
-    If `return_list` is empty, determines the column names to use:
-        - If `properties` is not provided or contains only NaN values,
-          retrieves schema properties from the specified service.
-        - Otherwise, uses the provided `properties` list as column names.
-
-    Parameters
-    ----------
-    return_list : pd.DataFrame
-        The DataFrame to check for emptiness.
-    properties : Optional[List[str]]
-        List of property names to use as columns, or None.
-    service : str
-        The service endpoint to query for schema properties if needed.
-
-    Returns
-    -------
-    pd.DataFrame
-        The original DataFrame if not empty, otherwise an empty
-        DataFrame with the appropriate columns.
-    """
-    if return_list.empty:
-        if not properties or all(pd.isna(properties)):
-            schema = _check_ogc_requests(endpoint=service, req_type="schema")
-            properties = list(schema.get("properties", {}).keys())
-        return pd.DataFrame(columns=properties)
-    return return_list
+"""Water Data API layer over the generic OGC engine.
+
+The API-agnostic OGC machinery (request construction, pagination, response
+shaping, the chunked ``get_ogc_data`` entry point) lives in
+:mod:`dataretrieval.ogc.engine`. This module is the Water-Data-specific layer
+on top of it: it supplies the service-to-id map, the CQL2/date-only dialect,
+profile validation, and a thin ``get_ogc_data`` wrapper that injects the
+Water Data defaults. (The statistics path lives in its own
+:mod:`dataretrieval.waterdata.stats` module.) Every engine symbol the Water Data
+getters (``api.py``, ``ratings.py``, ``nearest.py``) and the test suite import
+from here is re-exported below.
+"""
 
+from __future__ import annotations
 
-def _arrange_cols(
-    df: pd.DataFrame, properties: list[str] | None, output_id: str
-) -> pd.DataFrame:
-    """
-    Rearranges and renames columns in a DataFrame based on provided
-    properties and the service output id.
-
-    Parameters
-    ----------
-    df : pd.DataFrame
-        The input DataFrame whose columns are to be rearranged or renamed.
-    properties : Optional[List[str]]
-        A list of column names to possibly rename. If None or contains
-        only NaN, the function renames 'id' to output_id.
-    output_id : str
-        The name to which the 'id' column should be renamed if applicable.
-
-    Returns
-    -------
-    pd.DataFrame or gpd.GeoDataFrame
-        The DataFrame with columns rearranged and/or renamed according
-        to the specified properties and output_id.
-    """
-
-    # Rename id column to output_id
-    df = df.rename(columns={"id": output_id})
-
-    if properties and not all(pd.isna(properties)):
-        # Don't alias the caller's list — we mutate below.
-        local_properties = list(properties)
-        if "geometry" in df.columns and "geometry" not in local_properties:
-            local_properties.append("geometry")
-        # 'id' is a valid service column, but expose it under the
-        # service-specific output_id name instead.
-        if "id" in local_properties:
-            local_properties[local_properties.index("id")] = output_id
-        df = df.loc[:, [col for col in local_properties if col in df.columns]]
-
-    # Move meaningless-to-user, extra id columns to the end
-    # of the dataframe, if they exist
-    extra_id_col = set(df.columns).intersection(_EXTRA_ID_COLS)
-
-    # If the arbitrary id column is returned (either due to properties
-    # being none or NaN), then move it to the end of the dataframe, but
-    # if part of properties, keep in requested order
-    if extra_id_col and (properties is None or all(pd.isna(properties))):
-        id_col_order = [col for col in df.columns if col not in extra_id_col] + list(
-            extra_id_col
-        )
-        df = df.loc[:, id_col_order]
-
-    return df
-
-
-def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Casts columns into appropriate types.
-
-    Parameters
-    ----------
-    df : pd.DataFrame
-        The input DataFrame containing water data.
+from typing import Any, get_args
 
-    Returns
-    -------
-    pd.DataFrame
-        The DataFrame with columns cast to appropriate types.
+import httpx
+import pandas as pd
 
-    """
-    cols = set(df.columns)
-    numerical_cols = [
-        "altitude",
-        "altitude_accuracy",
-        "contributing_drainage_area",
-        "drainage_area",
-        "hole_constructed_depth",
-        "value",
-        "well_constructed_depth",
-    ]
-    time_cols = [
-        "begin",
-        "begin_utc",
-        "construction_date",
-        "end",
-        "end_utc",
-        "last_modified",
-        "time",
-    ]
+from dataretrieval.codes.states import to_state
+from dataretrieval.ogc import engine
+from dataretrieval.ogc.engine import (
+    _DATE_RANGE_PARAMS,
+    _DURATION_RE,
+    BASE_URL,
+    GEOPANDAS,
+    OGC_API_URL,
+    OgcDialect,
+    _arrange_cols,
+    _as_str_list,
+    _check_id_format,
+    _check_monitoring_location_id,
+    _check_ogc_requests,
+    _construct_api_requests,
+    _construct_cql_request,
+    _deal_with_empty,
+    _default_headers,
+    _error_body,
+    _format_api_dates,
+    _get_resp_data,
+    _next_req_url,
+    _normalize_str_iterable,
+    _paginate,
+    _paginated_failure_message,
+    _parse_retry_after,
+    _raise_for_non_200,
+    _row_cap,
+    _run_sync,
+    _switch_properties_id,
+    _to_snake_case,
+    _walk_pages,
+)
+from dataretrieval.ogc.engine import (
+    _get_args as _engine_get_args,
+)
+from dataretrieval.utils import BaseMetadata
+from dataretrieval.waterdata.types import (
+    PROFILE_LOOKUP,
+    PROFILES,
+    SERVICES,
+)
 
-    for col in cols.intersection(time_cols):
-        df[col] = pd.to_datetime(df[col], errors="coerce")
+SAMPLES_URL = f"{BASE_URL}/samples-data"
 
-    for col in cols.intersection(numerical_cols):
-        df[col] = pd.to_numeric(df[col], errors="coerce")
+# Maps each OGC waterdata service to its user-facing ``id`` column (the name the
+# typed getters rename the wire ``id`` to, e.g. ``daily`` -> ``daily_id``).
+# ``get_cql`` validates its ``service`` argument against these keys and
+# uses the value as the ``output_id`` for result shaping. Keep in sync with the
+# ``types.WATERDATA_SERVICES`` Literal (same keys).
+_OUTPUT_ID_BY_SERVICE: dict[str, str] = {
+    "channel-measurements": "channel_measurements_id",
+    "combined-metadata": "combined_meta_id",
+    "continuous": "continuous_id",
+    "daily": "daily_id",
+    "field-measurements": "field_measurement_id",
+    "field-measurements-metadata": "field_series_id",
+    "latest-continuous": "latest_continuous_id",
+    "latest-daily": "latest_daily_id",
+    "monitoring-locations": "monitoring_location_id",
+    "peaks": "peak_id",
+    "time-series-metadata": "time_series_id",
+}
 
-    return df
+# Every service's output id EXCEPT the two that are genuinely user-facing
+# (``monitoring_location_id`` and ``time_series_id``). The rest are synthetic
+# per-record ids that ``_arrange_cols`` moves to the end of a result frame.
+# Derived from ``_OUTPUT_ID_BY_SERVICE`` so adding a service can't silently
+# leave a stray id column at the front again.
+_EXTRA_ID_COLS = frozenset(
+    set(_OUTPUT_ID_BY_SERVICE.values()) - {"monitoring_location_id", "time_series_id"}
+)
 
+# The Water Data API dialect: ``monitoring-locations`` doesn't accept
+# comma-separated multi-value GET params (so it must POST CQL2 JSON),
+# ``daily`` renders its time arguments date-only (``YYYY-MM-DD``), and the
+# ``time_cols``/``numerical_cols``/``sort_cols`` are the Water-Data column
+# vocabulary used to coerce datetime/numeric columns and to sort results.
+WATERDATA_DIALECT = OgcDialect(
+    cql2_services=frozenset({"monitoring-locations"}),
+    date_only_services=frozenset({"daily"}),
+    time_cols=frozenset(
+        {
+            "begin",
+            "begin_utc",
+            "construction_date",
+            "end",
+            "end_utc",
+            "last_modified",
+            "time",
+        }
+    ),
+    numerical_cols=frozenset(
+        {
+            "altitude",
+            "altitude_accuracy",
+            "contributing_drainage_area",
+            "drainage_area",
+            "hole_constructed_depth",
+            "value",
+            "well_constructed_depth",
+        }
+    ),
+    sort_cols=("time", "monitoring_location_id"),
+)
 
-def _sort_rows(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Sorts rows by 'time' and 'monitoring_location_id' columns if they
-    exist.
+# Iterable-shaped params that ``_get_args`` must NOT push through
+# ``_normalize_str_iterable`` (scalar non-string knobs are caught by runtime
+# type, so only iterables with special handling need to be named here):
+#   - date-range params may contain ``pd.NaT``/None or interval strings
+#   - ``bbox``/``boundingBox`` are ``list[float]``, sometimes ``numpy.ndarray``
+#   - ``get_peaks``'s int-valued filters (``water_year`` etc.) are ``list[int]``
+#   - ``get_combined_metadata``'s ``thresholds`` is ``list[float]``
+_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {
+    "bbox",
+    "boundingBox",
+    "water_year",
+    "year",
+    "month",
+    "day",
+    "peak_since",
+    "thresholds",
+}
 
-    Parameters
-    ----------
-    df : pd.DataFrame
-        The input DataFrame containing water data.
 
-    Returns
-    -------
-    pd.DataFrame
-        The DataFrame with rows ordered by time and site.
+def _get_args(
+    local_vars: dict[str, Any], exclude: set[str] | None = None
+) -> dict[str, Any]:
+    """Water-Data wrapper over :func:`engine._get_args`.
 
+    Supplies the Water Data API's extended ``no_normalize`` set (numeric
+    params such as ``water_year``, ``thresholds``, ``boundingBox``) so they
+    keep their element types. See :func:`engine._get_args` for the full
+    normalization contract.
     """
-    if "time" in df.columns and "monitoring_location_id" in df.columns:
-        df = df.sort_values(by=["time", "monitoring_location_id"], ignore_index=True)
-    elif "time" in df.columns:
-        df = df.sort_values(by="time", ignore_index=True)
-
-    return df
-
-
-def _finalize_ogc(
-    frame: pd.DataFrame,
-    response: httpx.Response,
-    *,
-    properties: list[str] | None,
-    output_id: str,
-    convert_type: bool,
-    service: str,
-    max_rows: int | None = None,
-) -> tuple[pd.DataFrame, BaseMetadata]:
-    """Shape a combined OGC result into the user-facing ``(df, md)``.
+    return _engine_get_args(local_vars, exclude, no_normalize=_NO_NORMALIZE_PARAMS)
 
-    The single home for the OGC getters' result shaping: empties
-    normalized, types coerced (when ``convert_type``), the wire ``id``
-    renamed and columns ordered, rows sorted, optionally truncated to
-    ``max_rows``, and the response wrapped as
-    :class:`~dataretrieval.utils.BaseMetadata`.
 
-    Injected into the chunker as its ``finalize`` hook (see
-    :data:`~dataretrieval.waterdata.chunking._Finalize`) so the
-    un-interrupted return *and* a resumed ``ChunkInterrupted.call.resume()``
-    produce the same shape — closing the gap where resume used to hand back
-    the chunker's raw frame and bare ``httpx.Response``.
+def _with_state(local_vars: dict[str, Any], *, to: str, into: str) -> dict[str, Any]:
+    """Resolve the unified ``state`` argument into an endpoint's native state
+    queryable, returning the (mutated) args mapping.
 
-    ``max_rows`` is applied here (after dedup/sort, on the *combined* frame)
-    rather than only per-sub-request, so a chunked call's total is bounded
-    to exactly ``max_rows`` and a resumed call honors the cap too — the
-    per-``_paginate`` ``_row_cap`` is only an early-stop download bound.
+    ``state`` is the canonical, format-flexible parameter (full name / postal /
+    FIPS); it is normalized via :func:`~dataretrieval.codes.states.to_state` to
+    the ``to`` representation and stored under ``into`` (the queryable this
+    endpoint actually filters on). It is additive sugar over the native
+    ``state_code`` / ``state_name`` parameters, which still accept the API's
+    raw values (e.g. non-US FIPS); passing ``state`` together with either
+    raises ``ValueError``.
     """
-    frame = _deal_with_empty(frame, properties, service)
-    if convert_type:
-        frame = _type_cols(frame)
-    frame = _arrange_cols(frame, properties, output_id)
-    frame = _sort_rows(frame)
-    if max_rows is not None:
-        frame = frame.head(max_rows)
-    return frame, BaseMetadata(response)
+    state = local_vars.pop("state", None)
+    if state is None:
+        return local_vars
+    if any(local_vars.get(p) is not None for p in ("state_code", "state_name")):
+        raise ValueError("Pass `state`, or state_code/state_name, but not both.")
+    local_vars[into] = to_state(state, to)
+    return local_vars
 
 
 def get_ogc_data(
@@ -1455,21 +183,20 @@ def get_ogc_data(
     output_id: str | None = None,
     max_rows: int | None = None,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
-    """
-    Retrieves OGC (Open Geospatial Consortium) data from a specified
-    endpoint and returns it as a pandas DataFrame with metadata.
+    """Water-Data wrapper over :func:`engine.get_ogc_data`.
 
-    This function prepares request arguments, constructs API requests,
-    handles pagination, processes the results, and formats output
-    according to the specified parameters.
+    Defaults ``output_id`` from the Water Data service map when not given,
+    and supplies the Water Data extra-id columns and dialect, so the typed
+    getters in ``api.py`` call this unchanged. (Sibling OGC APIs such as
+    NGWMN call ``engine.get_ogc_data`` directly with their own base URL and
+    dialect rather than going through this Water Data wrapper.)
 
     Parameters
     ----------
     args : Dict[str, Any]
         Dictionary of request arguments for the OGC service.
     service : str
-        The OGC API collection name (e.g., ``"daily"``,
-        ``"monitoring-locations"``, ``"continuous"``).
+        The OGC API collection name (e.g., ``"daily"``).
     output_id : str, optional
         The user-facing id column the wire ``id`` is renamed to. Defaults
         to ``_OUTPUT_ID_BY_SERVICE[service]``; pass it explicitly only for
@@ -1477,8 +204,7 @@ def get_ogc_data(
     max_rows : int, optional
         Stop paginating once this many rows have been collected and
         truncate the result to exactly ``max_rows``. ``None`` (default)
-        fetches the full result. Intended for cheap previews of large,
-        un-chunked tables (e.g. :func:`get_reference_table`).
+        fetches the full result.
 
     Returns
     -------
@@ -1486,360 +212,48 @@ def get_ogc_data(
         A DataFrame containing the retrieved and processed OGC data.
     BaseMetadata
         A metadata object containing request information including URL and query time.
-
-    Notes
-    -----
-    - The function does not mutate the input `args` dictionary.
-    - Handles optional arguments such as `convert_type`.
-    - Applies column cleanup and reordering based on service and properties.
     """
-    # Enforce a genuine positive integer: a float (even ``10.0``) or ``bool``
-    # would pass a bare ``< 1`` check and then crash deep in
-    # ``pd.DataFrame.head`` with an opaque ``TypeError`` after HTTP I/O has
-    # already fired. ``numbers.Integral`` (not ``int``) so numpy integers —
-    # e.g. ``max_rows`` derived from a numpy/pandas computation — are accepted;
-    # ``bool`` is an ``Integral`` subtype, so exclude it explicitly.
-    if max_rows is not None and (
-        not isinstance(max_rows, numbers.Integral)
-        or isinstance(max_rows, bool)
-        or max_rows < 1
-    ):
-        raise ValueError(f"max_rows must be a positive integer (got {max_rows!r}).")
-
-    # Each service renames its wire ``id`` to a service-specific column; that
-    # name is derived from ``service`` via the canonical map so the getters
-    # don't each repeat it. Callers for collections outside the map (e.g.
-    # get_reference_table's metadata collections) pass output_id explicitly.
     if output_id is None:
         output_id = _OUTPUT_ID_BY_SERVICE[service]
-
-    args = args.copy()
-    args["service"] = service
-    args = _switch_arg_id(args, id_name=output_id, service=service)
-    # Capture `properties` before the id-switch so post-processing sees
-    # the user-facing names, not the wire-format ones.
-    properties = args.get("properties")
-    args["properties"] = _switch_properties_id(
-        properties, id_name=output_id, service=service
-    )
-    convert_type = args.pop("convert_type", False)
-    args = {k: v for k, v in args.items() if v is not None}
-
-    # Post-processing is injected into the chunker rather than applied here,
-    # so it runs on *every* exit: the normal return AND a later
-    # ``exc.call.resume()`` after a ChunkInterrupted (which never re-enters
-    # this function). ``_finalize_ogc`` is the single source of result shape;
-    # it also applies ``max_rows`` to the *combined* frame so the cap is the
-    # exact total even when the plan chunks or the call is resumed, while
-    # ``_row_cap`` below only early-stops each sub-request's pagination.
-    finalize = functools.partial(
-        _finalize_ogc,
-        properties=properties,
-        output_id=output_id,
-        convert_type=convert_type,
-        service=service,
+    return engine.get_ogc_data(
+        args,
+        service,
+        output_id,
         max_rows=max_rows,
+        base_url=OGC_API_URL,
+        extra_id_cols=_EXTRA_ID_COLS,
+        dialect=WATERDATA_DIALECT,
     )
-    with _progress.progress_context(service=service), _row_cap(max_rows):
-        return _fetch_once(args, finalize=finalize)
-
-
-@chunking.multi_value_chunked(build_request=_construct_api_requests)
-async def _fetch_once(
-    args: dict[str, Any],
-) -> tuple[pd.DataFrame, httpx.Response]:
-    """Send one prepared-args OGC request asynchronously; return the
-    frame + response.
-
-    ``@chunking.multi_value_chunked`` models every multi-value list
-    parameter and the cql-text filter as a chunkable axis, greedy-halves
-    the biggest chunk across all axes until each sub-request URL fits,
-    and iterates the cartesian product. With no chunkable inputs the
-    decorator passes args through unchanged. The decorator gathers every
-    sub-request over one shared :class:`httpx.AsyncClient` (concurrency
-    bounded by a semaphore, sized from ``API_USGS_CONCURRENT``)
-    and returns a *synchronous* wrapper, so ``get_ogc_data`` keeps calling
-    ``_fetch_once(args, finalize=...)`` synchronously. The return shape is
-    ``(frame, response)``.
-    """
-    req = _construct_api_requests(**args)
-    return await _walk_pages(geopd=GEOPANDAS, req=req)
-
-
-def _handle_stats_nesting(
-    body: dict[str, Any],
-    geopd: bool = False,
-) -> pd.DataFrame:
-    """
-    Takes nested json from stats service and flattens into a dataframe with
-    one row per monitoring location, parameter, and statistic.
-
-    Parameters
-    ----------
-    body : Dict[str, Any]
-        The JSON response body from the statistics service containing nested data.
-    geopd : bool, optional
-        Whether ``geopandas`` is available — when ``True`` the returned
-        frame is a ``GeoDataFrame``; when ``False`` (default) a plain
-        ``pd.DataFrame`` is returned with geometry flattened.
-
-    Returns
-    -------
-    pd.DataFrame
-        A DataFrame containing the flattened statistical data.
-
-    Notes
-    -----
-    The non-geopandas branch uses the same schema-aware extraction as
-    :func:`_get_resp_data`: it builds the per-feature outer frame
-    directly from each feature's ``properties`` (minus the nested
-    ``data`` field, which is unrolled separately below via the
-    ``record_path`` json_normalize), then adds ``geometry`` only when
-    present. Unlike :func:`_get_resp_data`, no top-level ``id`` column
-    is added — stats features don't carry one, so this matches the
-    geopandas branch. Skipping the GeoJSON envelope keeps newly-added
-    fields like ``geometry.type`` from leaking into the result.
-    """
-    if body is None:
-        return gpd.GeoDataFrame() if geopd else pd.DataFrame()
-
-    # An empty (or missing) features list — a real mid-pagination
-    # shape — would otherwise crash the downstream merge with
-    # ``KeyError: 'monitoring_location_id'`` because neither df nor
-    # dat would carry the merge key. Bail out with an empty frame —
-    # ``GeoDataFrame`` when geopd is available so the eventual
-    # ``pd.concat`` with non-empty geo pages doesn't downgrade to a
-    # plain DataFrame and strip geometry/CRS.
-    features = body.get("features") or []
-    if not features:
-        return gpd.GeoDataFrame() if geopd else pd.DataFrame()
-
-    # The geopd-missing warning is emitted once at import (see top of module);
-    # doing it here would log per page.
-    if not geopd:
-        outer_props = [
-            {k: v for k, v in (f.get("properties") or {}).items() if k != "data"}
-            for f in features
-        ]
-        df = pd.json_normalize(outer_props, sep=".")
-        df.columns = df.columns.str.split(".").str[-1]
-        # Stats features don't carry a top-level ``id`` field — the
-        # geopandas branch (``GeoDataFrame.from_features``) doesn't
-        # surface one either, so the non-geopd branch stays
-        # consistent by NOT adding an id column.
-        geoms = [(f.get("geometry") or {}).get("coordinates") for f in features]
-        if any(g is not None for g in geoms):
-            df["geometry"] = geoms
-    else:
-        df = gpd.GeoDataFrame.from_features(features).drop(
-            columns=["data"], errors="ignore"
-        )
-
-    # Unnest json features, properties, data, and values while retaining necessary
-    # metadata to merge with main dataframe.
-    dat = pd.json_normalize(
-        body,
-        record_path=["features", "properties", "data", "values"],
-        meta=[
-            ["features", "properties", "monitoring_location_id"],
-            ["features", "properties", "data", "parameter_code"],
-            ["features", "properties", "data", "unit_of_measure"],
-            ["features", "properties", "data", "parent_time_series_id"],
-        ],
-        meta_prefix="",
-        errors="ignore",
-    )
-    dat.columns = dat.columns.str.split(".").str[-1]
-
-    return df.merge(dat, on="monitoring_location_id", how="left")
-
-
-def _expand_percentiles(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Takes percentile value and thresholds columns containing lists
-    of values and turns each list element into its own row in the
-    original dataframe. Exploded ``'nan'`` values are dropped. If
-    no percentile data exist, it adds a percentile column and
-    populates it with the percentile assigned to min, max, and
-    median.
-
-    Parameters
-    ----------
-    df : pd.DataFrame
-        The dataframe returned from using one of the statistics services.
-
-    Returns
-    -------
-    pd.DataFrame
-        A DataFrame containing the flattened percentile data.
-    """
-    if len(df) > 0:
-        if "percentile" in df["computation"].unique():
-            # Explode percentile lists into rows called "value" and "percentile"
-            percentiles = df.loc[df["computation"] == "percentile"]
-            percentiles_explode = percentiles[
-                ["computation_id", "values", "percentiles"]
-            ].explode(["values", "percentiles"], ignore_index=True)
-            percentiles_explode = percentiles_explode.loc[
-                percentiles_explode["values"] != "nan"
-            ]
-            percentiles_explode["value"] = pd.to_numeric(percentiles_explode["values"])
-            percentiles_explode["percentile"] = pd.to_numeric(
-                percentiles_explode["percentiles"]
-            )
-            percentiles_explode = percentiles_explode.drop(
-                columns=["values", "percentiles"]
-            )
-
-            # Merge exploded values back to other metadata/geometry
-            percentiles = percentiles.drop(
-                columns=["values", "percentiles", "value"], errors="ignore"
-            ).merge(percentiles_explode, on="computation_id", how="left")
-
-            # Concatenate back to original
-            dfs = pd.concat(
-                [df.loc[df["computation"] != "percentile"], percentiles]
-            ).drop(columns=["values", "percentiles"])
-        else:
-            dfs = df
-            dfs["percentile"] = pd.NA
 
-        # Give min, max, median a percentile value
-        dfs.loc[dfs["computation"] == "maximum", "percentile"] = 100
-        dfs.loc[dfs["computation"] == "minimum", "percentile"] = 0
-        dfs.loc[dfs["computation"] == "median", "percentile"] = 50
 
-        # Make sure numeric
-        dfs["percentile"] = pd.to_numeric(dfs["percentile"])
-
-        # Move percentile column
-        cols = dfs.columns.tolist()
-        cols.remove("percentile")
-        col_index = cols.index("value") + 1
-        cols.insert(col_index, "percentile")
-
-        return dfs[cols]
-
-    else:
-        return df
-
-
-def _run_sync(
-    make_coro: Callable[[], Awaitable[tuple[pd.DataFrame, httpx.Response]]],
+def _finalize_ogc(
+    frame: pd.DataFrame,
+    response: httpx.Response,
     *,
+    properties: list[str] | None,
+    output_id: str,
+    convert_type: bool,
     service: str,
-) -> tuple[pd.DataFrame, httpx.Response]:
-    """Drive an async OGC fetch to completion from synchronous code.
-
-    Opens the service progress context and runs ``make_coro()`` through a
-    short-lived ``anyio`` blocking portal (a worker thread), so the
-    non-chunked getters work whether or not the caller is already inside an
-    event loop (Jupyter/async apps). The portal copies the calling context,
-    so the active progress reporter still reaches the sub-requests.
-
-    Shared by the non-chunked fetch paths (:func:`get_stats_data`,
-    :func:`get_cql`); the chunked OGC getters drive their own portal
-    inside :meth:`chunking.ChunkedCall.resume`.
-    """
-    with _progress.progress_context(service=service):
-        with start_blocking_portal() as portal:
-            try:
-                return portal.call(make_coro)
-            except httpx.TransportError as exc:
-                # The initial-request connection failure ``_paginate`` lets
-                # through raw; mid-pagination failures are already typed.
-                raise _network_error(OGC_API_URL, exc) from exc
-
-
-def get_stats_data(
-    args: dict[str, Any],
-    service: str,
-    expand_percentiles: bool,
-    client: httpx.AsyncClient | None = None,
+    max_rows: int | None = None,
 ) -> tuple[pd.DataFrame, BaseMetadata]:
-    """
-    Retrieves statistical data from a specified endpoint and returns it
-    as a pandas DataFrame with metadata.
-
-    This function prepares request arguments, constructs API requests,
-    handles pagination, processes results, and formats output according
-    to the specified parameters.
-
-    The stats path doesn't go through ``multi_value_chunked`` (its query
-    shape has no chunkable list axes), so it drives :func:`_paginate`
-    directly through an ``anyio`` blocking portal. The portal runs the
-    pagination loop in a short-lived worker thread, so this works whether
-    or not the caller is already inside an event loop.
-
-    Parameters
-    ----------
-    args : Dict[str, Any]
-        Dictionary of request arguments for the statistics service.
-    service : str
-        The statistics service type (for example,
-        "observationNormals" or "observationIntervals").
-    expand_percentiles : bool
-        Determines whether the percentiles column is expanded so that
-        each percentile gets its own row in the returned dataframe. If
-        True and the user requests a computation_type other than
-        percentiles, a percentile column is still returned.
-    client : httpx.AsyncClient, optional
-        Caller-borrowed async client. ``None`` (default) opens a
-        temporary one inside the portal. Primarily a test seam.
-
-    Returns
-    -------
-    pd.DataFrame
-        A DataFrame containing the retrieved and processed statistical data.
-    BaseMetadata
-        A metadata object containing request information including URL and query time.
+    """Water-Data wrapper over :func:`engine._finalize_ogc`.
 
-    Raises
-    ------
-    DataRetrievalError
-        The typed subclass for an HTTP error response (see :func:`_paginate`);
-        or :class:`~dataretrieval.exceptions.NetworkError` if the initial request
-        can't reach the service (timeout / DNS), the ``httpx`` exception chained
-        on ``__cause__``.
+    Injects the Water Data ``extra_id_cols`` and ``dialect`` so a direct
+    call (e.g. from ``get_cql``) orders synthetic id columns and coerces/
+    sorts result columns identically to the typed getters. See
+    :func:`engine._finalize_ogc` for the full result-shaping contract.
     """
-
-    url = f"{STATISTICS_API_URL}/{service}"
-    req = httpx.Request(
-        method="GET",
-        url=url,
-        headers=_default_headers(),
-        params=args,
+    return engine._finalize_ogc(
+        frame,
+        response,
+        properties=properties,
+        output_id=output_id,
+        convert_type=convert_type,
+        service=service,
+        max_rows=max_rows,
+        extra_id_cols=_EXTRA_ID_COLS,
+        dialect=WATERDATA_DIALECT,
     )
-    method = req.method
-    headers = req.headers
-
-    def parse_response(resp: httpx.Response) -> tuple[pd.DataFrame, str | None]:
-        body = resp.json()
-        # Coerce falsy cursors ("", 0) to None so _paginate terminates.
-        # USGS uses "next": null at end-of-stream, but defensive coerce
-        # protects against any "" sentinel a future schema might use.
-        return _handle_stats_nesting(body, geopd=GEOPANDAS), body.get("next") or None
-
-    async def follow_up(cursor: str, sess: httpx.AsyncClient) -> httpx.Response:
-        # Build a fresh params dict per page so the caller's ``args``
-        # is never mutated.
-        return await sess.request(
-            method, url=url, params={**args, "next_token": cursor}, headers=headers
-        )
-
-    async def _run() -> tuple[pd.DataFrame, httpx.Response]:
-        return await _paginate(
-            req,
-            parse_response=parse_response,
-            follow_up=follow_up,
-            client=client,
-        )
-
-    df, response = _run_sync(_run, service=service)
-
-    if expand_percentiles:
-        df = _expand_percentiles(df)
-    return df, BaseMetadata(response)
 
 
 def _check_profiles(
@@ -1871,212 +285,42 @@ def _check_profiles(
         )
 
 
-_MONITORING_LOCATION_ID_RE = re.compile(r"[^-\s]+-[^-\s]+")
-
-
-# Iterable-shaped params that ``_get_args`` must NOT push through
-# ``_normalize_str_iterable`` (scalar non-string knobs are caught by runtime
-# type, so only iterables with special handling need to be named here):
-#   - date-range params may contain ``pd.NaT``/None or interval strings
-#   - ``bbox``/``boundingBox`` are ``list[float]``, sometimes ``numpy.ndarray``
-#   - ``get_peaks``'s int-valued filters (``water_year`` etc.) are ``list[int]``
-#   - ``get_combined_metadata``'s ``thresholds`` is ``list[float]``
-_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {
-    "bbox",
-    "boundingBox",
-    "water_year",
-    "year",
-    "month",
-    "day",
-    "peak_since",
-    "thresholds",
-}
-
-
-def _normalize_str_iterable(
-    value: str | Iterable[str] | None,
-    param_name: str = "value",
-) -> str | list[str] | None:
-    """Validate that ``value`` is None, a string, or an iterable of strings.
-
-    Non-string iterables (``list``, ``tuple``, ``pandas.Series``,
-    ``pandas.Index``, ``numpy.ndarray``, generators) are materialized to a
-    ``list`` so downstream code that branches on ``isinstance(v, (list,
-    tuple))`` keeps working. ``Mapping`` types are rejected because
-    iterating a mapping yields keys, not values.
-
-    Parameters
-    ----------
-    value : None, str, or iterable of str
-    param_name : str, optional
-        Used in error messages. Defaults to ``"value"``.
-
-    Returns
-    -------
-    None, str, or list of str
-
-    Raises
-    ------
-    TypeError
-        If the input isn't ``None``, ``str``, or a non-``Mapping``
-        iterable; or if any iterable element isn't a string.
-    """
-    if value is None:
-        return None
-    if isinstance(value, str):
-        return value
-    if isinstance(value, Mapping) or not isinstance(value, Iterable):
-        raise TypeError(
-            f"{param_name} must be a string or iterable of strings, "
-            f"not {type(value).__name__} (got {value!r})."
-        )
-    values: list[str] = []
-    for v in value:
-        if not isinstance(v, str):
-            raise TypeError(
-                f"{param_name} elements must be strings, "
-                f"not {type(v).__name__} (got {v!r})."
-            )
-        values.append(v)
-    return values
-
-
-def _as_str_list(
-    value: str | Iterable[str] | None,
-    param_name: str = "value",
-) -> list[str] | None:
-    """Normalize ``value`` to ``list[str]`` (``None`` passes through).
-
-    Wraps a bare ``str`` in a single-element list — so a later
-    ``",".join(...)`` doesn't iterate it character-by-character — and
-    materializes any other iterable via :func:`_normalize_str_iterable`.
-    """
-    normalized = _normalize_str_iterable(value, param_name)
-    if isinstance(normalized, str):
-        return [normalized]
-    return normalized
-
-
-def _check_monitoring_location_id(
-    monitoring_location_id: str | Iterable[str] | None,
-) -> str | list[str] | None:
-    """Validate and normalize a ``monitoring_location_id`` value.
-
-    Combines :func:`_normalize_str_iterable` with the AGENCY-ID format
-    check that is unique to ``monitoring_location_id`` (the OGC spec
-    requires a hyphen separator, e.g. ``USGS-01646500``).
-
-    Parameters
-    ----------
-    monitoring_location_id : None, str, or iterable of str
-        See :func:`_normalize_str_iterable`. Each string is additionally
-        required to match the AGENCY-ID hyphen-separated format.
-
-    Returns
-    -------
-    None, str, or list of str
-
-    Raises
-    ------
-    TypeError
-        If the input isn't ``None``, ``str``, or a non-``Mapping``
-        iterable; or if any iterable element isn't a string.
-    ValueError
-        If any identifier doesn't contain a hyphen separator
-        (per the OGC API spec: AGENCY-ID format, e.g. ``USGS-01646500``).
-    """
-    try:
-        value = _normalize_str_iterable(
-            monitoring_location_id, "monitoring_location_id"
-        )
-    except TypeError as exc:
-        # Re-raise with the AGENCY-ID hint the generic helper doesn't carry.
-        raise TypeError(
-            f"{exc} Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'."
-        ) from None
-    if value is None:
-        return None
-    for item in (value,) if isinstance(value, str) else value:
-        _check_id_format(item)
-    return value
-
-
-def _check_id_format(value: str) -> None:
-    """Raise ``ValueError`` if ``value`` is not in ``AGENCY-ID`` format."""
-    if not _MONITORING_LOCATION_ID_RE.fullmatch(value):
-        raise ValueError(
-            f"Invalid monitoring_location_id: {value!r}. "
-            f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'."
-        )
-
-
-def _get_args(
-    local_vars: dict[str, Any], exclude: set[str] | None = None
-) -> dict[str, Any]:
-    """
-    Build the API-request kwargs dict from a getter's ``locals()``.
-
-    Drops bookkeeping keys (``service``, ``output_id``, anything in
-    ``exclude``) and ``None``-valued kwargs, then normalizes the
-    remaining values:
-
-    - ``monitoring_location_id`` is validated against the AGENCY-ID
-      format (per :func:`_check_monitoring_location_id`).
-    - ``properties`` is materialized to ``list[str]`` (a bare string
-      gets wrapped in a single-element list so downstream
-      ``",".join(properties)`` doesn't iterate per character).
-    - A non-string iterable in ``_NO_NORMALIZE_PARAMS`` (numeric params
-      such as ``water_year``, ``bbox``, ``thresholds``) is materialized
-      to a ``list`` with its element types preserved (no string
-      normalization), so the GET comma-join and the chunker — which test
-      ``list``/``tuple`` — handle it instead of ``str()``-ing the whole
-      array.
-    - Any other ``Iterable[str]`` (i.e. not in ``_NO_NORMALIZE_PARAMS``)
-      is materialized to ``list[str]`` via
-      :func:`_normalize_str_iterable` so downstream code that branches
-      on ``isinstance(v, (list, tuple))`` works for ``pandas.Series``,
-      ``numpy.ndarray``, generators, etc.
-    - Scalars and strings pass through unchanged.
-
-    Parameters
-    ----------
-    local_vars : dict[str, Any]
-        Dictionary of local variables, typically from ``locals()``.
-    exclude : set[str], optional
-        Additional keys to exclude from the resulting dictionary.
-
-    Returns
-    -------
-    dict[str, Any]
-        Filtered and normalized arguments for API requests.
-    """
-    to_exclude = {"service", "output_id"}
-    if exclude:
-        to_exclude.update(exclude)
-
-    args: dict[str, Any] = {}
-    for k, v in local_vars.items():
-        if k in to_exclude or v is None:
-            continue
-        if k == "monitoring_location_id":
-            args[k] = _check_monitoring_location_id(v)
-        elif k == "properties":
-            args[k] = _as_str_list(v, k)
-        elif (
-            k in _NO_NORMALIZE_PARAMS
-            and isinstance(v, Iterable)
-            and not isinstance(v, str)
-        ):
-            # Numeric params (water_year, bbox, thresholds, …) keep their
-            # element types — no string-normalization — but a non-string
-            # iterable (numpy array, pandas Series, generator) is materialized
-            # to a list so the GET comma-join and the chunker, which test
-            # ``list``/``tuple``, handle it instead of str()-ing the whole
-            # array. ``.tolist()`` yields native int/float; ``list()`` covers
-            # generators and other iterables. Scalars/strings fall through.
-            args[k] = v.tolist() if hasattr(v, "tolist") else list(v)
-        elif isinstance(v, str) or not isinstance(v, Iterable):
-            args[k] = v
-        else:
-            args[k] = _normalize_str_iterable(v, k)
-    return args
+__all__ = [
+    "BASE_URL",
+    "GEOPANDAS",
+    "OGC_API_URL",
+    "SAMPLES_URL",
+    "WATERDATA_DIALECT",
+    "_DATE_RANGE_PARAMS",
+    "_DURATION_RE",
+    "_EXTRA_ID_COLS",
+    "_NO_NORMALIZE_PARAMS",
+    "_OUTPUT_ID_BY_SERVICE",
+    "_arrange_cols",
+    "_as_str_list",
+    "_check_id_format",
+    "_check_monitoring_location_id",
+    "_check_ogc_requests",
+    "_check_profiles",
+    "_construct_api_requests",
+    "_construct_cql_request",
+    "_deal_with_empty",
+    "_default_headers",
+    "_error_body",
+    "_finalize_ogc",
+    "_format_api_dates",
+    "_get_args",
+    "_get_resp_data",
+    "_next_req_url",
+    "_normalize_str_iterable",
+    "_paginate",
+    "_paginated_failure_message",
+    "_parse_retry_after",
+    "_raise_for_non_200",
+    "_row_cap",
+    "_run_sync",
+    "_switch_properties_id",
+    "_to_snake_case",
+    "_walk_pages",
+    "get_ogc_data",
+]
diff --git a/demos/USGS_NGWMN_Examples.ipynb b/demos/USGS_NGWMN_Examples.ipynb
new file mode 100644
index 00000000..98e6e227
--- /dev/null
+++ b/demos/USGS_NGWMN_Examples.ipynb
@@ -0,0 +1,199 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {},
+   "source": [
+    "# National Ground-Water Monitoring Network (NGWMN)\n",
+    "\n",
+    "The [National Ground-Water Monitoring Network](https://cida.usgs.gov/ngwmn/) (NGWMN)\n",
+    "brings groundwater data from many state, federal, and local agencies into a single\n",
+    "location. USGS exposes it through a dedicated OGC API\n",
+    "(`https://api.waterdata.usgs.gov/ngwmn/ogcapi`), which `dataretrieval` wraps in the\n",
+    "`dataretrieval.ngwmn` module — a sibling of `dataretrieval.waterdata` built on the\n",
+    "same shared OGC engine, so chunking, pagination, and result shaping behave the same.\n",
+    "\n",
+    "There are five getters:\n",
+    "\n",
+    "| Function | Description |\n",
+    "| --- | --- |\n",
+    "| `get_sites` | Monitoring-location (well) metadata |\n",
+    "| `get_water_level` | Water-level observations |\n",
+    "| `get_lithology` | Lithology (geologic material) logs |\n",
+    "| `get_well_construction` | Well-construction records |\n",
+    "| `get_providers` | Contributing data providers |\n",
+    "\n",
+    "Unlike the main Water Data collections, NGWMN aggregates locations from many\n",
+    "agencies, so `monitoring_location_id` values use agency prefixes besides `USGS-`\n",
+    "(e.g. `MBMG-702934`, `AKDNR-535134236016630`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataretrieval import ngwmn"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2",
+   "metadata": {},
+   "source": [
+    "## Providers\n",
+    "\n",
+    "List the organizations contributing data, optionally filtered by state."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "providers, md = ngwmn.get_providers(state=\"WI\")\n",
+    "print(f\"{len(providers)} providers in WI\")\n",
+    "providers.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4",
+   "metadata": {},
+   "source": [
+    "## Sites\n",
+    "\n",
+    "`get_sites` returns well metadata. Sites carry geometry by default, so the result is a\n",
+    "`GeoDataFrame`; pass `skip_geometry=True` to drop it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sites, md = ngwmn.get_sites(state=\"Wisconsin\")\n",
+    "print(f\"{len(sites)} NGWMN sites in Wisconsin\")\n",
+    "sites[[\"monitoring_location_id\", \"monitoring_location_name\", \"national_aquifer_description\"]].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6",
+   "metadata": {},
+   "source": [
+    "## Water levels\n",
+    "\n",
+    "`get_water_level` returns the observations for one or more sites. A two-element\n",
+    "`datetime=[start, end]` restricts the record to a time window; a list of\n",
+    "`monitoring_location_id`s fans out across sites and is unioned."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "site = \"USGS-272838082142201\"\n",
+    "wl, md = ngwmn.get_water_level(monitoring_location_id=site)\n",
+    "print(f\"{len(wl)} water-level observations at {site}\")\n",
+    "\n",
+    "wl[\"sample_time\"] = pd.to_datetime(wl[\"sample_time\"], errors=\"coerce\", utc=True)\n",
+    "wl = wl.dropna(subset=[\"sample_time\"]).sort_values(\"sample_time\")\n",
+    "depth = pd.to_numeric(wl[\"water_depth_below_land_surface_ft\"], errors=\"coerce\")\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(9, 4))\n",
+    "ax.plot(wl[\"sample_time\"], depth, lw=0.8)\n",
+    "ax.invert_yaxis()  # depth increases downward\n",
+    "ax.set(xlabel=\"Date\", ylabel=\"Depth to water (ft below land surface)\",\n",
+    "       title=f\"NGWMN water levels \\u2014 {site}\")\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8",
+   "metadata": {},
+   "source": [
+    "Restrict to a date range, or query several sites at once (they fan out and\n",
+    "union):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "windowed, md = ngwmn.get_water_level(\n",
+    "    monitoring_location_id=site, datetime=[\"2022-01-01\", \"2024-01-01\"]\n",
+    ")\n",
+    "print(f\"{len(windowed)} observations in 2022\\u20132024\")\n",
+    "\n",
+    "multi, md = ngwmn.get_water_level(\n",
+    "    monitoring_location_id=[\"USGS-272838082142201\", \"USGS-404159100494601\"]\n",
+    ")\n",
+    "print(f\"{multi['monitoring_location_id'].nunique()} sites, {len(multi)} observations\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10",
+   "metadata": {},
+   "source": [
+    "## Well construction and lithology\n",
+    "\n",
+    "Construction records describe a well's physical build-out; lithology logs describe the\n",
+    "geologic materials with depth."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "construction, md = ngwmn.get_well_construction(monitoring_location_id=site)\n",
+    "construction[[\"monitoring_location_obs_number\", \"type\", \"material\", \"depth_from\", \"depth_to\"]].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lithology, md = ngwmn.get_lithology(monitoring_location_id=\"AKDNR-535134236016630\")\n",
+    "lithology[[\"lithology_depth_from\", \"lithology_depth_to\", \"lithology_description\"]].head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/demos/USGS_WaterData_ContinuousData_Examples.ipynb b/demos/USGS_WaterData_ContinuousData_Examples.ipynb
index d843f336..8b2bafc5 100644
--- a/demos/USGS_WaterData_ContinuousData_Examples.ipynb
+++ b/demos/USGS_WaterData_ContinuousData_Examples.ipynb
@@ -125,7 +125,7 @@
    "source": [
     "import time\n",
     "\n",
-    "from dataretrieval.waterdata.chunking import ChunkInterrupted\n",
+    "from dataretrieval import ChunkInterrupted\n",
     "\n",
     "try:\n",
     "    sensor_data, _ = waterdata.get_continuous(\n",
@@ -236,7 +236,7 @@
     "## More help\n",
     "\n",
     "- Documentation: <https://doi-usgs.github.io/dataretrieval-python/>\n",
-    "- Chunking and resume internals: `dataretrieval.waterdata.chunking`\n",
+    "- Chunking and resume internals: `dataretrieval.ogc.chunking`\n",
     "- Issues / questions: <https://github.com/DOI-USGS/dataretrieval-python/issues>\n",
     "- Equivalent R article: [Continuous Data](https://doi-usgs.github.io/dataRetrieval/articles/continuous_pr.html)"
    ]
diff --git a/docs/source/examples/USGS_NGWMN_Examples.nblink b/docs/source/examples/USGS_NGWMN_Examples.nblink
new file mode 100644
index 00000000..1a5e6127
--- /dev/null
+++ b/docs/source/examples/USGS_NGWMN_Examples.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../../demos/USGS_NGWMN_Examples.ipynb"
+}
diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst
index 91d7bd1f..e7c2deb8 100644
--- a/docs/source/examples/index.rst
+++ b/docs/source/examples/index.rst
@@ -18,7 +18,7 @@ covers a basic introduction to module functions and usage.
 USGS Water Data API vignettes
 -----------------------------
 These notebooks are Python ports of the new USGS Water Data API vignettes from
-the R `dataRetrieval`_ package. Each introduces a family of ``waterdata``
+the R `dataRetrieval`_ package. Each introduces a family of Water Data API
 functions and is executed against the live USGS Water Data API.
 
 .. _dataRetrieval: https://doi-usgs.github.io/dataRetrieval/
@@ -31,6 +31,7 @@ functions and is executed against the live USGS Water Data API.
     USGS_WaterData_DailyStatistics_Examples
     USGS_WaterData_ContinuousData_Examples
     USGS_WaterData_ReferenceLists_Examples
+    USGS_NGWMN_Examples
 
 Simple uses of the ``dataretrieval`` package
 --------------------------------------------
diff --git a/docs/source/reference/exceptions.rst b/docs/source/reference/exceptions.rst
index 1d8de47e..4514ac43 100644
--- a/docs/source/reference/exceptions.rst
+++ b/docs/source/reference/exceptions.rst
@@ -6,3 +6,22 @@ dataretrieval.exceptions
 .. automodule:: dataretrieval.exceptions
     :members:
     :show-inheritance:
+
+Resumable chunk interruptions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These are raised when a transparently-chunked request is interrupted
+mid-stream; the completed work is preserved and ``exc.call.resume()`` continues
+it. They are defined in ``dataretrieval.ogc.chunking`` (they carry pandas/httpx
+state) but are importable from the top level, e.g.
+``from dataretrieval import ChunkInterrupted``.
+
+.. autoclass:: dataretrieval.ChunkInterrupted
+    :members:
+    :show-inheritance:
+
+.. autoclass:: dataretrieval.QuotaExhausted
+    :show-inheritance:
+
+.. autoclass:: dataretrieval.ServiceInterrupted
+    :show-inheritance:
diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
index 43def275..48947ff8 100644
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@@ -9,6 +9,7 @@ API reference
 
     exceptions
     nadp
+    ngwmn
     nldi
     nwis
     streamstats
diff --git a/docs/source/reference/ngwmn.rst b/docs/source/reference/ngwmn.rst
new file mode 100644
index 00000000..90668f3b
--- /dev/null
+++ b/docs/source/reference/ngwmn.rst
@@ -0,0 +1,8 @@
+.. _ngwmn:
+
+dataretrieval.ngwmn
+-------------------
+
+.. automodule:: dataretrieval.ngwmn
+    :members:
+    :special-members:
diff --git a/docs/source/userguide/errors.rst b/docs/source/userguide/errors.rst
index cd81f546..e2dc3ef1 100644
--- a/docs/source/userguide/errors.rst
+++ b/docs/source/userguide/errors.rst
@@ -82,8 +82,8 @@ condition clears -- only the unfinished sub-requests are re-issued.
 .. code-block:: python
 
     import time
+    from dataretrieval import ChunkInterrupted
     from dataretrieval.waterdata import get_daily
-    from dataretrieval.waterdata.chunking import ChunkInterrupted
 
     try:
         df, md = get_daily(monitoring_location_id=long_list_of_sites)
diff --git a/tests/ngwmn_test.py b/tests/ngwmn_test.py
new file mode 100644
index 00000000..cd20daaa
--- /dev/null
+++ b/tests/ngwmn_test.py
@@ -0,0 +1,161 @@
+"""Live tests for the NGWMN OGC getters (``dataretrieval.ngwmn``).
+
+These hit the live NGWMN OGC API (``api.waterdata.usgs.gov/ngwmn/ogcapi``),
+mirroring the integration-test style of ``waterdata_test.py``. The
+``flaky`` marker only retries transient transport errors, so a real
+behavior change still fails on the first run.
+"""
+
+import sys
+
+import pytest
+from pandas import DataFrame
+
+if sys.version_info < (3, 10):
+    pytest.skip("Skip entire module on Python < 3.10", allow_module_level=True)
+
+from dataretrieval import ngwmn
+from dataretrieval.utils import BaseMetadata
+
+pytestmark = pytest.mark.flaky(
+    reruns=2,
+    reruns_delay=5,
+    only_rerun=[
+        r"(?:RateLimited|RuntimeError):\s*(?:429|5\d\d):",
+        r"Connect(ion)?Error",
+        r"ReadTimeout|ConnectTimeout|Timeout",
+    ],
+)
+
+# A site with water-level, construction, and lithology records (per the R
+# dataRetrieval NGWMN examples), plus a non-USGS-agency id to exercise the
+# multi-agency identifier format NGWMN uses.
+_SITE = "USGS-272838082142201"
+_LITH_SITE = "AKDNR-535134236016630"
+
+
+def test_get_sites():
+    df, md = ngwmn.get_sites(state="Wisconsin", limit=10)
+    assert isinstance(df, DataFrame)
+    assert isinstance(md, BaseMetadata)
+    assert len(df) > 0
+    assert "monitoring_location_id" in df.columns
+    # All returned sites are in the requested state.
+    assert df["state_name"].dropna().eq("Wisconsin").all()
+    # Sites carry geometry by default.
+    assert "geometry" in df.columns
+    assert "ngwmn/ogcapi/collections/sites" in str(md.url)
+
+
+def test_get_sites_skip_geometry():
+    df, _ = ngwmn.get_sites(monitoring_location_id=_SITE, skip_geometry=True)
+    assert isinstance(df, DataFrame)
+    assert "geometry" not in df.columns
+
+
+def test_get_water_level():
+    df, md = ngwmn.get_water_level(monitoring_location_id=_SITE)
+    assert isinstance(df, DataFrame)
+    assert len(df) > 0
+    assert "sample_time" in df.columns
+    assert (df["monitoring_location_id"] == _SITE).all()
+
+
+def test_get_water_level_datetime_subsets():
+    full, _ = ngwmn.get_water_level(monitoring_location_id=_SITE)
+    windowed, _ = ngwmn.get_water_level(
+        monitoring_location_id=_SITE, datetime=["2022-01-01", "2024-01-01"]
+    )
+    # A bounded window returns a strict subset of the full record.
+    assert 0 < len(windowed) < len(full)
+
+
+def test_get_providers():
+    df, md = ngwmn.get_providers(state="WI")
+    assert isinstance(df, DataFrame)
+    assert len(df) > 0
+    assert {"agency_code", "organization_type", "state"}.issubset(df.columns)
+    # Providers have no geometry.
+    assert "geometry" not in df.columns
+
+
+def test_get_sites_state_accepts_name_postal_or_fips():
+    """The single ``state`` parameter accepts a full name, postal code, or FIPS
+    code; ``_resolve_state`` normalizes all three to the full ``state_name`` the
+    ``sites`` collection queries on, so every encoding returns the same sites."""
+    by_name, _ = ngwmn.get_sites(state="Wisconsin", skip_geometry=True)
+    by_postal, _ = ngwmn.get_sites(state="WI", skip_geometry=True)
+    by_fips, _ = ngwmn.get_sites(state="55", skip_geometry=True)
+    assert len(by_name) > 0
+    ids = set(by_name["monitoring_location_id"])
+    assert set(by_postal["monitoring_location_id"]) == ids
+    assert set(by_fips["monitoring_location_id"]) == ids
+
+
+def test_get_providers_state_accepts_name_postal_or_fips():
+    """``get_providers`` likewise normalizes any encoding to the uppercase
+    postal code the ``providers`` collection queries on."""
+    by_postal, _ = ngwmn.get_providers(state="WI")
+    by_name, _ = ngwmn.get_providers(state="Wisconsin")
+    by_fips, _ = ngwmn.get_providers(state="55")
+    assert len(by_postal) > 0
+    agencies = set(by_postal["agency_code"])
+    assert set(by_name["agency_code"]) == agencies
+    assert set(by_fips["agency_code"]) == agencies
+
+
+def test_state_queryables_still_diverge_upstream():
+    """The NGWMN ``sites`` and ``providers`` collections expose DIFFERENT state
+    queryables (``sites`` -> ``state_name`` full name; ``providers`` ->
+    ``state`` 2-letter code). The single-``state`` shim in
+    ``ngwmn._resolve_state`` exists ONLY to paper over that asymmetry.
+
+    If this test fails, the upstream API has unified the two queryables and the
+    shim (``_resolve_state``) can be removed in favor of a single pass-through
+    parameter.
+    """
+    import httpx
+
+    from dataretrieval.ngwmn import NGWMN_OGC_API_URL
+    from dataretrieval.ogc.engine import _default_headers
+
+    headers = _default_headers()
+
+    def queryables(collection):
+        resp = httpx.get(
+            f"{NGWMN_OGC_API_URL}/collections/{collection}/queryables",
+            headers=headers,
+            timeout=60,
+        )
+        resp.raise_for_status()
+        return set(resp.json().get("properties") or {})
+
+    sites_q = queryables("sites")
+    providers_q = queryables("providers")
+    assert "state_name" in sites_q and "state" not in sites_q, sites_q
+    assert "state" in providers_q and "state_name" not in providers_q, providers_q
+
+
+def test_get_lithology():
+    df, _ = ngwmn.get_lithology(monitoring_location_id=_LITH_SITE)
+    assert isinstance(df, DataFrame)
+    assert len(df) > 0
+    assert (df["monitoring_location_id"] == _LITH_SITE).all()
+
+
+def test_get_well_construction():
+    df, _ = ngwmn.get_well_construction(monitoring_location_id=_SITE)
+    assert isinstance(df, DataFrame)
+    assert len(df) > 0
+    assert (df["monitoring_location_id"] == _SITE).all()
+
+
+def test_multi_site_chunks_and_unions():
+    """A multi-value ``monitoring_location_id`` fans out and unions the
+    per-site results (the comma-join multi-value path), returning at least
+    the single-site total."""
+    one, _ = ngwmn.get_water_level(monitoring_location_id=_SITE)
+    many, _ = ngwmn.get_water_level(
+        monitoring_location_id=[_SITE, "USGS-404159100494601"]
+    )
+    assert len(many) >= len(one)
diff --git a/tests/utils_test.py b/tests/utils_test.py
index d90821ae..81f82f45 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -173,7 +173,7 @@ def test_waterdata_exceptions_share_the_root(self):
         ``except`` clause spans the legacy and waterdata subsystems, and they
         slot under the shared family bases (``HTTPError`` / ``TransientError`` /
         ``RequestTooLarge``)."""
-        from dataretrieval.waterdata.chunking import (
+        from dataretrieval.ogc.chunking import (
             ChunkInterrupted,
             RateLimited,
             ServiceUnavailable,
@@ -195,6 +195,25 @@ def test_base_exported_at_top_level(self):
 
         assert dataretrieval.DataRetrievalError is exceptions.DataRetrievalError
 
+    def test_chunk_interruptions_exported_at_top_level(self):
+        """The resumable chunk-interruption exceptions are reachable from the
+        top level (``from dataretrieval import ChunkInterrupted``) instead of
+        only the internal ``dataretrieval.ogc.chunking`` module, and resolve to
+        the same classes."""
+        import dataretrieval
+        from dataretrieval.ogc import chunking
+
+        for name in ("ChunkInterrupted", "QuotaExhausted", "ServiceInterrupted"):
+            assert getattr(dataretrieval, name) is getattr(chunking, name)
+            assert name in dataretrieval.__all__
+        assert issubclass(dataretrieval.QuotaExhausted, dataretrieval.ChunkInterrupted)
+        assert issubclass(
+            dataretrieval.ServiceInterrupted, dataretrieval.ChunkInterrupted
+        )
+        assert issubclass(
+            dataretrieval.ChunkInterrupted, dataretrieval.DataRetrievalError
+        )
+
 
 class Test_BaseMetadata:
     """Tests of BaseMetadata"""
@@ -309,3 +328,53 @@ def test_existing_datetime_column_not_overwritten(self):
         )
         df = utils._attach_datetime_columns(df)
         assert df["Activity_StartDateTime"].tolist() == ["preexisting"]
+
+
+class Test_to_state:
+    """Tests of the shared state normalizer in ``codes.states``."""
+
+    def test_accepts_every_encoding(self):
+        from dataretrieval.codes.states import to_state
+
+        # name (any case), postal (any case), bare FIPS, and prefixed FIPS all
+        # resolve to the same canonical full name.
+        for value in ("Wisconsin", "wisconsin", "WI", "wi", "55", "US:55"):
+            assert to_state(value) == "Wisconsin"
+
+    def test_converts_to_each_representation(self):
+        from dataretrieval.codes.states import to_state
+
+        assert to_state("WI", "name") == "Wisconsin"
+        assert to_state("Wisconsin", "postal") == "WI"
+        assert to_state("Wisconsin", "fips") == "55"
+        assert to_state("Wisconsin", "fips_us") == "US:55"
+        # Conversion is independent of the input encoding.
+        assert to_state("55", "postal") == "WI"
+        assert to_state("wi", "fips_us") == "US:55"
+
+    def test_rejects_unrecognized_state(self):
+        from dataretrieval.codes.states import to_state
+
+        for bad in ("XX", "99", "US:99", "Wisconson"):
+            with pytest.raises(ValueError, match="not a recognized US state"):
+                to_state(bad)
+
+    def test_rejects_unknown_target(self):
+        from dataretrieval.codes.states import to_state
+
+        with pytest.raises(ValueError, match="to must be"):
+            to_state("WI", "zipcode")
+
+    def test_resolves_an_iterable_element_wise(self):
+        from dataretrieval.codes.states import to_state
+
+        # An iterable of mixed encodings returns a list, converted element-wise.
+        assert to_state(["WI", "Minnesota", "39"]) == [
+            "Wisconsin",
+            "Minnesota",
+            "Ohio",
+        ]
+        assert to_state(["WI", "CA"], "fips_us") == ["US:55", "US:06"]
+        # A bad element fails the whole call (fail-fast).
+        with pytest.raises(ValueError, match="not a recognized US state"):
+            to_state(["WI", "XX"])
diff --git a/tests/waterdata_chunking_test.py b/tests/waterdata_chunking_test.py
index 37e9b999..d024b704 100644
--- a/tests/waterdata_chunking_test.py
+++ b/tests/waterdata_chunking_test.py
@@ -1,4 +1,4 @@
-"""Tests for ``dataretrieval.waterdata.chunking``.
+"""Tests for ``dataretrieval.ogc.chunking``.
 
 These tests exercise the joint planner with a fake ``build_request``
 whose URL byte length is a deterministic function of its inputs:
@@ -17,6 +17,7 @@
 
 import asyncio
 import concurrent.futures
+import contextvars
 import datetime
 import http.server
 import sys
@@ -35,10 +36,8 @@
     pytest.skip("Skip entire module on Python < 3.10", allow_module_level=True)
 
 from dataretrieval.exceptions import DataRetrievalError
-from dataretrieval.utils import HTTPX_DEFAULTS
-from dataretrieval.waterdata import chunking as _chunking
-from dataretrieval.waterdata import utils as _utils
-from dataretrieval.waterdata.chunking import (
+from dataretrieval.ogc import chunking as _chunking
+from dataretrieval.ogc.chunking import (
     _LIST_SEP,
     _NEVER_CHUNK,
     _OR_SEP,
@@ -63,6 +62,8 @@
     get_active_client,
     multi_value_chunked,
 )
+from dataretrieval.utils import HTTPX_DEFAULTS
+from dataretrieval.waterdata import utils as _utils
 from dataretrieval.waterdata.utils import _DATE_RANGE_PARAMS, _construct_api_requests
 
 
@@ -350,7 +351,7 @@ async def fetch(args):
 
 
 def test_multi_value_chunked_lazy_url_limit(monkeypatch):
-    """``url_limit=None`` → resolve chunking._WATERDATA_URL_BYTE_LIMIT at call
+    """``url_limit=None`` → resolve chunking._OGC_URL_BYTE_LIMIT at call
     time, so tests that patch the constant affect this decorator too."""
     calls = []
 
@@ -361,7 +362,7 @@ async def fetch(args):
             elapsed=datetime.timedelta(seconds=0.1), headers={}
         )
 
-    monkeypatch.setattr(_chunking, "_WATERDATA_URL_BYTE_LIMIT", 240)
+    monkeypatch.setattr(_chunking, "_OGC_URL_BYTE_LIMIT", 240)
     # 4 sites of 10 chars → exceeds 240 → planner splits.
     fetch({"sites": ["S" * 10 + str(i) for i in range(4)]})
     assert len(calls) > 1, "patched constant should drive chunking"
@@ -659,6 +660,55 @@ async def fetch(args):
     assert sorted(df_a["id"].tolist()) == sorted(sites)
 
 
+def test_resume_rebuilds_in_captured_context():
+    """Regression: sub-requests are rebuilt by reading ambient ContextVars
+    (the engine threads base URL / dialect / row cap that way). A
+    ``call.resume()`` fired AFTER the originating ``with`` block exits —
+    the documented recovery for a mid-stream 429 — must still observe the
+    values active when the call was *created*, not the process defaults.
+    ``ChunkedCall`` snapshots the context at construction and runs every
+    drive inside it; without that snapshot a resumed NGWMN call would
+    rebuild its sub-requests against the wrong (default Water Data) base."""
+    var = contextvars.ContextVar("ctx_probe", default="DEFAULT")
+    observed: list[str] = []
+
+    state = {"calls": 0, "tripped": False}
+
+    async def fetch(args):
+        state["calls"] += 1
+        # The value visible at (re)build time — what _construct_api_requests
+        # would read from _ogc_base_url_var / _dialect_var in production.
+        observed.append(var.get())
+        if state["calls"] == 3 and not state["tripped"]:
+            state["tripped"] = True
+            raise RateLimited("429: Too many requests made.")
+        sites = list(args["sites"])
+        return (pd.DataFrame({"id": sites}), _quota_response(500))
+
+    sites = ["S" * 10 + str(i) for i in range(16)]
+    decorated = multi_value_chunked(build_request=_fake_build, url_limit=240)(fetch)
+
+    # Create + drive the call INSIDE the context, so the snapshot captures "IN".
+    token = var.set("IN")
+    try:
+        with pytest.raises(QuotaExhausted) as excinfo:
+            decorated({"sites": sites})
+    finally:
+        var.reset(token)
+
+    # The originating context has exited — the bare var is back to default.
+    assert var.get() == "DEFAULT"
+    assert 0 < excinfo.value.completed_chunks < excinfo.value.total_chunks
+
+    # Resume OUTSIDE the context. Every rebuilt sub-request must still see
+    # "IN" (the captured snapshot), never the leaked "DEFAULT".
+    observed.clear()
+    df, _ = excinfo.value.call.resume()
+    assert observed, "resume issued no sub-requests"
+    assert set(observed) == {"IN"}, observed
+    assert sorted(df["id"].tolist()) == sorted(sites)
+
+
 def test_chunker_passes_through_non_429_runtime_error():
     """A non-429 ``RuntimeError`` (e.g. a 500) is not a quota signal;
     it must propagate unchanged so callers see the real cause."""
@@ -994,7 +1044,7 @@ def test_combine_chunk_responses_returns_independent_headers():
 
 def test_paginate_terminates_on_empty_string_cursor():
     """``_paginate``'s loop predicate is ``while cursor is not None``.
-    Parse-response wrappers in ``_walk_pages`` / ``get_stats_data``
+    Parse-response wrappers in ``_walk_pages`` / ``stats.get_data``
     coerce falsy non-None values to None so an empty-string next-
     cursor (a real-but-unusual end-of-stream sentinel some pagination
     APIs use) doesn't trap us in an infinite ``follow_up('')`` loop."""
diff --git a/tests/waterdata_filters_test.py b/tests/waterdata_filters_test.py
index a447cada..b87ec272 100644
--- a/tests/waterdata_filters_test.py
+++ b/tests/waterdata_filters_test.py
@@ -6,11 +6,12 @@
 import pandas as pd
 import pytest
 
-from dataretrieval.waterdata import get_continuous
-from dataretrieval.waterdata.filters import (
+from dataretrieval.ogc.filters import (
     _check_numeric_filter_pitfall,
+    _quote_cql_str,
     _split_top_level_or,
 )
+from dataretrieval.waterdata import get_continuous
 from dataretrieval.waterdata.utils import _construct_api_requests
 
 
@@ -32,6 +33,14 @@ def _fake_response(url="https://example.test", elapsed_ms=1):
     )
 
 
+def test_quote_cql_str_doubles_embedded_quotes():
+    """The shared CQL-text escaper doubles ``'`` and leaves other input
+    untouched (the contract ``waterdata.ratings._build_filter`` relies on)."""
+    assert _quote_cql_str("O'Brien") == "O''Brien"
+    assert _quote_cql_str("USGS-01646500") == "USGS-01646500"
+    assert _quote_cql_str("a'b'c") == "a''b''c"
+
+
 def test_construct_filter_passthrough():
     """`filter` is forwarded verbatim as a query parameter."""
     expr = (
@@ -163,11 +172,11 @@ async def fake_walk_pages(*, geopd, req):
 
     with (
         mock.patch(
-            "dataretrieval.waterdata.utils._construct_api_requests",
+            "dataretrieval.ogc.engine._construct_api_requests",
             side_effect=_filter_size_aware_build,
         ),
         mock.patch(
-            "dataretrieval.waterdata.utils._walk_pages",
+            "dataretrieval.ogc.engine._walk_pages",
             side_effect=fake_walk_pages,
         ),
     ):
@@ -202,11 +211,11 @@ async def fake_walk_pages(*_args, **_kwargs):
 
     with (
         mock.patch(
-            "dataretrieval.waterdata.utils._construct_api_requests",
+            "dataretrieval.ogc.engine._construct_api_requests",
             side_effect=_filter_size_aware_build,
         ),
         mock.patch(
-            "dataretrieval.waterdata.utils._walk_pages",
+            "dataretrieval.ogc.engine._walk_pages",
             side_effect=fake_walk_pages,
         ),
     ):
@@ -249,11 +258,11 @@ async def fake_walk_pages(*_args, **_kwargs):
 
     with (
         mock.patch(
-            "dataretrieval.waterdata.utils._construct_api_requests",
+            "dataretrieval.ogc.engine._construct_api_requests",
             side_effect=_filter_size_aware_build,
         ),
         mock.patch(
-            "dataretrieval.waterdata.utils._walk_pages",
+            "dataretrieval.ogc.engine._walk_pages",
             side_effect=fake_walk_pages,
         ),
     ):
@@ -281,11 +290,11 @@ def fake_construct_api_requests(**kwargs):
 
     with (
         mock.patch(
-            "dataretrieval.waterdata.utils._construct_api_requests",
+            "dataretrieval.ogc.engine._construct_api_requests",
             side_effect=fake_construct_api_requests,
         ),
         mock.patch(
-            "dataretrieval.waterdata.utils._walk_pages",
+            "dataretrieval.ogc.engine._walk_pages",
             new=mock.AsyncMock(
                 return_value=(
                     pd.DataFrame({"id": ["row-1"], "value": [1]}),
@@ -426,7 +435,7 @@ def test_get_continuous_surfaces_pitfall_to_caller():
     """End-to-end: the check runs at the ``get_continuous`` boundary,
     not as a deep internal-only protection, so callers see the error
     before any HTTP traffic."""
-    with mock.patch("dataretrieval.waterdata.utils._construct_api_requests") as build:
+    with mock.patch("dataretrieval.ogc.engine._construct_api_requests") as build:
         with pytest.raises(ValueError, match="lexicographic"):
             get_continuous(
                 monitoring_location_id="USGS-02238500",
diff --git a/tests/waterdata_progress_test.py b/tests/waterdata_progress_test.py
index 08f6ca26..0ba801e1 100644
--- a/tests/waterdata_progress_test.py
+++ b/tests/waterdata_progress_test.py
@@ -17,13 +17,13 @@
 import pandas as pd
 import pytest
 
-from dataretrieval.waterdata import _progress
-from dataretrieval.waterdata._progress import (
+from dataretrieval.ogc import progress as _progress
+from dataretrieval.ogc.chunking import ChunkedCall, ChunkPlan
+from dataretrieval.ogc.progress import (
     ProgressReporter,
     current,
     progress_context,
 )
-from dataretrieval.waterdata.chunking import ChunkedCall, ChunkPlan
 from dataretrieval.waterdata.utils import _paginate, _walk_pages
 
 
diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py
index 34ccf4f2..a68033e3 100644
--- a/tests/waterdata_test.py
+++ b/tests/waterdata_test.py
@@ -11,6 +11,7 @@
 if sys.version_info < (3, 10):
     pytest.skip("Skip entire module on Python < 3.10", allow_module_level=True)
 
+from dataretrieval.ogc.engine import _dialect
 from dataretrieval.waterdata import (
     get_channel,
     get_combined_metadata,
@@ -31,6 +32,7 @@
     get_time_series_metadata,
 )
 from dataretrieval.waterdata.utils import (
+    WATERDATA_DIALECT,
     _check_monitoring_location_id,
     _check_profiles,
     _construct_api_requests,
@@ -60,6 +62,19 @@
 )
 
 
+@pytest.fixture(autouse=True)
+def _activate_waterdata_dialect():
+    """Make the Water Data OGC dialect ambient for this module.
+
+    The dialect (monitoring-locations -> POST/CQL2; daily -> date-only time
+    args) is normally set by ``get_ogc_data`` per call. The direct
+    ``_construct_api_requests`` unit tests here bypass it, so activate the
+    dialect module-wide so they exercise the real Water Data behavior.
+    """
+    with _dialect(WATERDATA_DIALECT):
+        yield
+
+
 def mock_request(httpx_mock, request_url, file_path):
     """Mock request code"""
     with open(file_path) as text:
@@ -136,7 +151,7 @@ def test_get_samples_raises_typed_error_on_429(httpx_mock):
     """Non-200 from the Samples endpoint now raises the module's typed error
     (RateLimited on 429) — consistent with the OGC/stats path — instead of a
     bare httpx.HTTPStatusError."""
-    from dataretrieval.waterdata.chunking import RateLimited
+    from dataretrieval.ogc.chunking import RateLimited
 
     httpx_mock.add_response(status_code=429, headers={"Retry-After": "30"})
     with pytest.raises(RateLimited):
@@ -149,7 +164,7 @@ def test_get_samples_raises_typed_error_on_429(httpx_mock):
 
 def test_get_samples_summary_raises_typed_error_on_5xx(httpx_mock):
     """A 5xx from the Samples summary endpoint raises ServiceUnavailable."""
-    from dataretrieval.waterdata.chunking import ServiceUnavailable
+    from dataretrieval.ogc.chunking import ServiceUnavailable
 
     httpx_mock.add_response(status_code=503)
     with pytest.raises(ServiceUnavailable):
diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py
index 10733773..f68e1f7e 100644
--- a/tests/waterdata_utils_test.py
+++ b/tests/waterdata_utils_test.py
@@ -8,9 +8,13 @@
 import pandas as pd
 import pytest
 
+import dataretrieval.ogc.engine as _engine_module
+import dataretrieval.waterdata.stats as _stats_module
 import dataretrieval.waterdata.utils as _utils_module
 from dataretrieval.exceptions import DataRetrievalError, HTTPError, TransientError
-from dataretrieval.waterdata.chunking import RateLimited, ServiceUnavailable
+from dataretrieval.ogc.chunking import RateLimited, ServiceUnavailable
+from dataretrieval.waterdata import get_stats_date_range, get_stats_por
+from dataretrieval.waterdata.stats import _handle_nesting, get_data
 from dataretrieval.waterdata.utils import (
     OGC_API_URL,
     _arrange_cols,
@@ -20,13 +24,12 @@
     _format_api_dates,
     _get_args,
     _get_resp_data,
-    _handle_stats_nesting,
     _next_req_url,
     _parse_retry_after,
     _raise_for_non_200,
     _row_cap,
+    _to_snake_case,
     _walk_pages,
-    get_stats_data,
 )
 
 _LOGGER_NAME = _utils_module.__name__
@@ -332,7 +335,7 @@ def test_get_resp_data_handles_missing_features_key():
     """Regression: a 200 with ``numberReturned > 0`` but no
     ``features`` key (real schema-drift shape) used to crash
     ``_get_resp_data`` with ``KeyError`` — wrapped downstream by
-    ``_paginate`` as a generic transport error. ``_handle_stats_nesting``
+    ``_paginate`` as a generic transport error. ``_handle_nesting``
     was already hardened against this; ``_get_resp_data`` now mirrors
     that defensiveness and returns an empty frame instead."""
     resp = mock.Mock()
@@ -342,6 +345,32 @@ def test_get_resp_data_handles_missing_features_key():
     assert isinstance(df, pd.DataFrame)
 
 
+def test_next_req_url_follows_link_without_number_returned():
+    """The NGWMN OGC API omits ``numberReturned`` from its page envelope, so
+    ``_next_req_url`` keys the ``next`` link off ``features`` (mirroring
+    ``_get_resp_data``) rather than that count -- otherwise a page that carries
+    features but no count stops pagination after page 1 and silently truncates
+    every multi-page result. A page that carries features still follows its
+    ``next`` link even when ``numberReturned`` is absent."""
+    resp = mock.MagicMock()
+    resp.url = httpx.URL("https://example.com/page1")
+    body = {
+        # NGWMN shape: features present, NO numberReturned key.
+        "features": [{"id": "1"}],
+        "links": [{"rel": "next", "href": "https://example.com/page2"}],
+    }
+    assert _next_req_url(resp, body=body) == "https://example.com/page2"
+
+
+def test_next_req_url_stops_when_no_features():
+    """A page with no features ends pagination regardless of any stray
+    ``next`` link (and regardless of ``numberReturned``)."""
+    resp = mock.MagicMock()
+    resp.url = httpx.URL("https://example.com/page1")
+    body = {"features": [], "links": [{"rel": "next", "href": "https://x/2"}]}
+    assert _next_req_url(resp, body=body) is None
+
+
 def test_walk_pages_does_not_mutate_initial_response():
     """The aggregated response returned from ``_walk_pages`` is built
     via ``_aggregate_paginated_response``, which returns a fresh copy.
@@ -413,15 +442,15 @@ def _stats_initial_ok():
     return resp
 
 
-def _run_get_stats_data_with_failure(failure_resp_or_exc, monkeypatch):
-    """Exercise get_stats_data where the initial response succeeds and the
+def _run_get_data_with_failure(failure_resp_or_exc, monkeypatch):
+    """Exercise get_data where the initial response succeeds and the
     paginated follow-up fails as given. Mirrors _walk_pages_with_failure.
-    `monkeypatch` stubs ``_handle_stats_nesting`` so the synthetic minimal
+    `monkeypatch` stubs ``_handle_nesting`` so the synthetic minimal
     response body doesn't need to parse — these tests only assert on the
     pagination loop's error surfacing."""
     monkeypatch.setattr(
-        _utils_module,
-        "_handle_stats_nesting",
+        _stats_module,
+        "_handle_nesting",
         mock.MagicMock(return_value=pd.DataFrame()),
     )
 
@@ -432,7 +461,7 @@ def _run_get_stats_data_with_failure(failure_resp_or_exc, monkeypatch):
     else:
         mock_client.request.return_value = failure_resp_or_exc
 
-    return get_stats_data(
+    return get_data(
         args={"monitoring_location_id": "USGS-1"},
         service="observationNormals",
         expand_percentiles=False,
@@ -440,14 +469,14 @@ def _run_get_stats_data_with_failure(failure_resp_or_exc, monkeypatch):
     )
 
 
-def test_get_stats_data_raises_on_mid_pagination_failure(monkeypatch):
-    """Wiring smoke: ``get_stats_data`` and ``_walk_pages`` share the
+def test_get_data_raises_on_mid_pagination_failure(monkeypatch):
+    """Wiring smoke: ``get_data`` and ``_walk_pages`` share the
     same ``_paginate`` strategy helper, so error-routing behaviour is
     exercised by the ``_walk_pages`` triplet above. This single
-    ``get_stats_data`` mid-pagination case proves the stats-specific
+    ``get_data`` mid-pagination case proves the stats-specific
     follow-up callback is wired into ``_paginate`` correctly."""
     with pytest.raises(DataRetrievalError, match="Paginated request failed") as excinfo:
-        _run_get_stats_data_with_failure(
+        _run_get_data_with_failure(
             httpx.ConnectError("stats-boom"),
             monkeypatch,
         )
@@ -456,7 +485,7 @@ def test_get_stats_data_raises_on_mid_pagination_failure(monkeypatch):
     assert "stats-boom" in str(excinfo.value)
 
 
-def test_get_stats_data_warning_includes_next_token(caplog, monkeypatch):
+def test_get_data_warning_includes_next_token(caplog, monkeypatch):
     """The pagination-failure warning includes the next_token so operators
     can identify which page in the sequence failed. (Addresses Copilot's
     PR #273 review note: the base URL alone drops cursor context.)"""
@@ -470,17 +499,18 @@ def test_get_stats_data_warning_includes_next_token(caplog, monkeypatch):
     }
 
     with pytest.raises(DataRetrievalError):
-        _run_get_stats_data_with_failure(page2_503, monkeypatch)
+        _run_get_data_with_failure(page2_503, monkeypatch)
 
     warnings_ = [r.getMessage() for r in caplog.records if r.levelno == logging.WARNING]
     # The initial response from _stats_initial_ok carries next=tok2.
     assert any("tok2" in m for m in warnings_), warnings_
 
 
-def test_handle_stats_nesting_tolerates_missing_drop_columns():
-    """If the upstream stats response shape ever changes such that one of
-    the columns we try to drop ("type", "properties.data") is absent, the
-    function should still return a DataFrame instead of raising KeyError.
+def test_handle_nesting_tolerates_missing_drop_columns():
+    """If the upstream stats response shape ever changes such that the nested
+    ``data`` column ``_handle_nesting`` drops is absent, the function should
+    still return a DataFrame instead of raising KeyError (the drop uses
+    ``errors="ignore"``).
     """
     body = {
         "next": None,
@@ -501,38 +531,40 @@ def test_handle_stats_nesting_tolerates_missing_drop_columns():
         ],
     }
 
-    df = _handle_stats_nesting(body, geopd=False)
+    df = _handle_nesting(body, geopd=False)
 
     assert len(df) == 1
     assert df["monitoring_location_id"].iloc[0] == "USGS-12345"
 
 
-def test_handle_stats_nesting_returns_empty_on_empty_features():
+def test_handle_nesting_returns_empty_on_empty_features():
     """A mid-pagination empty page ({\"features\": [], \"next\": <tok>})
     must not crash the downstream merge with
     ``KeyError: 'monitoring_location_id'``. The function short-
     circuits to an empty DataFrame so pagination can continue."""
-    df = _handle_stats_nesting({"features": [], "next": None}, geopd=False)
+    df = _handle_nesting({"features": [], "next": None}, geopd=False)
     assert df.empty
 
 
-def test_handle_stats_nesting_empty_preserves_geopd_type():
+def test_handle_nesting_empty_preserves_geopd_type():
     """When geopandas is available, the empty-features short-circuit
     must return a ``GeoDataFrame`` rather than a plain ``DataFrame``.
     Otherwise a subsequent ``pd.concat([empty, geo_page])`` downgrades
     the final result to a plain ``DataFrame`` and strips geometry/CRS
     — a real regression for geopd-installed users on stats queries
     that hit an empty intermediate page."""
-    # Monkeypatch a stub gpd into the utils module so the test runs
-    # whether or not geopandas is actually installed.
+    # Monkeypatch a stub gpd so the test runs whether or not geopandas is
+    # installed. The empty-page short-circuit delegates to the shared
+    # ``engine._empty_feature_frame``, which resolves ``gpd`` from the engine
+    # namespace — so patch it there, not in the stats module.
     fake_gpd = mock.MagicMock()
 
     class _Sentinel:
         pass
 
     fake_gpd.GeoDataFrame = lambda *a, **kw: _Sentinel()
-    with mock.patch.object(_utils_module, "gpd", fake_gpd, create=True):
-        result = _handle_stats_nesting({"features": []}, geopd=True)
+    with mock.patch.object(_engine_module, "gpd", fake_gpd, create=True):
+        result = _handle_nesting({"features": []}, geopd=True)
     assert isinstance(result, _Sentinel)
 
 
@@ -551,17 +583,19 @@ class _Sentinel:
 
     resp = mock.MagicMock()
     resp.json.return_value = {"numberReturned": 0, "features": [], "links": []}
-    with mock.patch.object(_utils_module, "gpd", fake_gpd, create=True):
+    # ``_get_resp_data`` resolves ``gpd`` from the engine namespace -- patch
+    # it there, not in ``utils``.
+    with mock.patch.object(_engine_module, "gpd", fake_gpd, create=True):
         result = _get_resp_data(resp, geopd=True)
     assert isinstance(result, _Sentinel)
 
 
-def test_handle_stats_nesting_tolerates_missing_features_key():
+def test_handle_nesting_tolerates_missing_features_key():
     """A 200 response with a body that doesn't carry ``features`` at
     all (rare but seen in error envelopes) must also short-circuit
     rather than KeyError before the schema-aware extraction even
     runs."""
-    df = _handle_stats_nesting({}, geopd=False)
+    df = _handle_nesting({}, geopd=False)
     assert df.empty
 
 
@@ -860,3 +894,117 @@ def test_check_ogc_requests_raises_typed_on_5xx(httpx_mock):
     )
     with pytest.raises(ServiceUnavailable):
         _check_ogc_requests(endpoint="daily", req_type="schema")
+
+
+@pytest.mark.parametrize(
+    "name, expected",
+    [
+        ("waterLevelObs", "water_level_obs"),  # camelCase -> snake_case
+        ("monitoring_location_id", "monitoring_location_id"),  # already snake
+        ("value", "value"),  # all-lowercase unchanged
+        ("navd88", "navd88"),  # letter/digit boundary NOT split
+        ("someField", "some_field"),  # simple camelCase
+        ("PascalCase", "pascal_case"),  # leading capital
+        # Runs of capitals are best-effort: only the lower->Upper boundary
+        # before the run is split, so the acronym stays glued to the next word.
+        ("someXMLField", "some_xmlfield"),
+    ],
+)
+def test_to_snake_case(name, expected):
+    assert _to_snake_case(name) == expected
+
+
+def test_get_stats_por_forwards_normal_type(monkeypatch):
+    """``normal_type`` reaches the observationNormals request (parity with R's
+    ``read_waterdata_stats_por``). Guards against the param being dropped from
+    the forwarded args (e.g. accidentally added to ``_get_args``'s exclude)."""
+    captured: dict = {}
+
+    def fake_get_data(args, service, expand_percentiles, client=None):
+        captured.update(args=args, service=service)
+        return pd.DataFrame(), mock.Mock()
+
+    monkeypatch.setattr(_stats_module, "get_data", fake_get_data)
+    get_stats_por(monitoring_location_id="USGS-1", normal_type="MOY")
+    assert captured["service"] == "observationNormals"
+    assert captured["args"].get("normal_type") == "MOY"
+
+
+def test_get_stats_date_range_forwards_interval_type(monkeypatch):
+    """``interval_type`` (multi-value) reaches the observationIntervals request
+    (parity with R's ``read_waterdata_stats_daterange``)."""
+    captured: dict = {}
+
+    def fake_get_data(args, service, expand_percentiles, client=None):
+        captured.update(args=args, service=service)
+        return pd.DataFrame(), mock.Mock()
+
+    monkeypatch.setattr(_stats_module, "get_data", fake_get_data)
+    get_stats_date_range(monitoring_location_id="USGS-1", interval_type=["M", "CY"])
+    assert captured["service"] == "observationIntervals"
+    assert captured["args"].get("interval_type") == ["M", "CY"]
+
+
+def test_with_state_routes_into_native_queryable():
+    """``_with_state`` resolves the canonical ``state`` argument into the
+    endpoint's native queryable (any encoding -> the requested representation)
+    and leaves args without ``state`` untouched."""
+    assert _utils_module._with_state({"state": "WI"}, to="name", into="state_name") == {
+        "state_name": "Wisconsin"
+    }
+    assert _utils_module._with_state(
+        {"state": "Wisconsin"}, to="fips_us", into="state_code"
+    ) == {"state_code": "US:55"}
+    # Multi-value state fans out element-wise.
+    assert _utils_module._with_state(
+        {"state": ["WI", "55"]}, to="name", into="state_name"
+    ) == {"state_name": ["Wisconsin", "Wisconsin"]}
+    # No ``state`` -> mapping returned unchanged.
+    assert _utils_module._with_state(
+        {"state_name": "Ohio"}, to="name", into="state_name"
+    ) == {"state_name": "Ohio"}
+
+
+def test_with_state_conflict_raises():
+    """Passing ``state`` together with a native ``state_code``/``state_name``
+    is ambiguous and raises."""
+    with pytest.raises(ValueError, match="not both"):
+        _utils_module._with_state(
+            {"state": "WI", "state_code": "55"}, to="name", into="state_name"
+        )
+    with pytest.raises(ValueError, match="not both"):
+        _utils_module._with_state(
+            {"state": "WI", "state_name": "Wisconsin"}, to="name", into="state_name"
+        )
+
+
+def test_ogc_getter_resolves_state_at_getter_layer(monkeypatch):
+    """The OGC getters resolve the unified ``state`` into ``state_name``
+    themselves (any encoding), so the shared ``get_ogc_data`` wrapper stays
+    state-agnostic."""
+    import dataretrieval.waterdata.api as _api
+
+    captured: dict = {}
+
+    def fake_get_ogc_data(args, service, *a, **k):
+        captured.update(args=args, service=service)
+        return pd.DataFrame(), mock.Mock()
+
+    monkeypatch.setattr(_api, "get_ogc_data", fake_get_ogc_data)
+    _api.get_monitoring_locations(state="55")  # FIPS in -> full name out
+    assert captured["args"].get("state_name") == "Wisconsin"
+    assert "state" not in captured["args"]
+
+
+def test_get_ogc_data_wrapper_does_not_touch_state():
+    """``get_ogc_data`` no longer rewrites a ``state`` key, so a passthrough
+    query dict (e.g. from ``get_reference_table``) is forwarded untouched."""
+    captured: dict = {}
+
+    def fake_engine_get_ogc_data(args, service, output_id, **k):
+        captured["args"] = dict(args)
+        return pd.DataFrame(), mock.Mock()
+
+    with mock.patch.object(_engine_module, "get_ogc_data", fake_engine_get_ogc_data):
+        _utils_module.get_ogc_data({"state": "WI"}, "monitoring-locations")
+    assert captured["args"] == {"state": "WI"}