From f28a6f51078f4802ba83194cd6484f082986c930 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Fri, 12 Jun 2026 12:48:28 -0500 Subject: [PATCH 1/2] fix(waterdata): drop id from the wire `properties` so daily/continuous don't 400 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The USGS OGC API now rejects the feature `id` as a selectable `properties` value on the `daily` and `continuous` collections (HTTP 400 "InvalidParameterValue. unknown properties specified"; `latest-daily` still accepts it). `_switch_properties_id` translated the user-facing id column (e.g. `daily_id`) to `id` and *kept* it in the wire `properties` list, so any call passing the id column in `properties` started failing — breaking `test_get_daily_properties` and `test_get_daily_properties_id`. The feature `id` is always returned and is renamed to the service-specific id column in `_arrange_cols`, so it never needs to be requested. Drop every id alias (`id`, the service id like `daily_id`, and its singular form) plus `geometry` (controlled by `skip_geometry`) from the wire list, matching the R dataRetrieval package's `switch_properties_id`. User-facing behavior is unchanged: callers still pass `daily_id`/`id` in `properties` and get the same output columns; only the request the chunker sends is corrected. Also update the stale `test_get_continuous` geometry assertion: with `skip_geometry` unset the server now includes geometry by default, which the library already documents ("the server defaults to including geometry"). The user-facing default is intentionally left unchanged. Co-Authored-By: Claude Fable 5 --- dataretrieval/waterdata/utils.py | 50 ++++++++++++++++++-------------- tests/waterdata_test.py | 3 +- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 7a2c0d5c..34095bec 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -147,13 +147,19 @@ def _switch_properties_id( properties: list[str] | None, id_name: str, service: str ) -> list[str]: """ - Switch properties id from its package-specific identifier to the - standardized "id" name that the API recognizes. - - Replaces any service-specific id name in `properties` with "id", - normalizes remaining hyphens to underscores, and drops the "geometry" - and service-id entries. Returns an empty list when `properties` is empty - or None. + Build the wire ``properties`` list, dropping every id alias and + ``geometry``. + + The feature ``id`` is always returned and is renamed to the + service-specific id column (e.g. ``daily_id``) in post-processing, so + it must not be requested as a property: several collections (e.g. + ``daily``, ``continuous``) reject ``id`` in ``properties`` with an + HTTP 400. ``geometry`` is likewise excluded because it is controlled + by ``skip_geometry``. Any service-specific id name (``daily_id``, + ``monitoring_location_id``, …) and the bare ``id`` are dropped, and + remaining hyphens are normalized to underscores. Returns an empty + list when `properties` is empty or None — the URL then omits the + ``properties`` filter and the result is shaped by :func:`_arrange_cols`. Parameters ---------- @@ -161,34 +167,36 @@ def _switch_properties_id( A list containing the properties or column names to be pulled from the service, or None. id_name : str - The name of the specific identifier key to look for. + The service-specific id column name to drop (e.g. ``daily_id``). service : str The service name. Returns ------- List[str] - The modified list with id names standardized to "id". + The wire ``properties`` with id aliases and ``geometry`` removed + and hyphens normalized. Examples -------- - For service "monitoring-locations", it will look for - "monitoring_location_id" and change - it to "id". + For service "daily" with ``properties=["daily_id", "value", "geometry"]``, + returns ``["value"]`` — ``daily_id`` and ``geometry`` are dropped, while + the ``daily_id`` column still appears in the result, renamed from the + always-returned feature ``id``. """ if not properties: return [] service_id = service.replace("-", "_") + "_id" - last_letter = service[-1] service_id_singular = "" - if last_letter == "s": - service_singular = service[:-1] - service_id_singular = service_singular.replace("-", "_") + "_id" - # Replace id fields with "id" - id_fields = [service_id, service_id_singular, id_name] - properties = ["id" if p in id_fields else p.replace("-", "_") for p in properties] - # Remove unwanted fields - return [p for p in properties if p not in ["geometry", service_id]] + if service.endswith("s"): + service_id_singular = service[:-1].replace("-", "_") + "_id" + # The feature ``id`` always comes back (renamed to the service id + # downstream) and several collections now reject it as a selectable + # property; ``geometry`` is controlled by ``skip_geometry``. Drop both, + # plus every service-specific id alias. + drop = {"id", "geometry", id_name, service_id, service_id_singular} + normalized = (p.replace("-", "_") for p in properties) + return [p for p in normalized if p not in drop] _DATETIME_FORMATS = ( diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 3358899b..34ccf4f2 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -470,7 +470,8 @@ def test_get_continuous(): time="2025-01-01/2025-12-31", ) assert isinstance(df, DataFrame) - assert "geometry" not in df.columns + # ``skip_geometry`` is unset, so the server includes geometry by default. + assert "geometry" in df.columns assert ( df["time"].dtype.name.startswith("datetime64[") and "UTC" in df["time"].dtype.name From 4b14691948bb53c7029ee877951b8aef026f44c2 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 15 Jun 2026 09:00:28 -0500 Subject: [PATCH 2/2] refactor(waterdata): drop the dead service_id_singular branch in _switch_properties_id Both callers pass id_name = _OUTPUT_ID_BY_SERVICE[service] (the canonical service id column), so the `service.endswith("s")` singular-alias heuristic never caught a real property the drop set was missing: where it produced a meaningful name it equalled id_name already in the set, and otherwise it produced non-existent forms (e.g. "continuou_id"). The drop set is now {"id", "geometry", id_name, service_id}. Also fix the get_cql call-site comment, which still described the pre-fix REPLACE behavior. Co-Authored-By: Claude Opus 4.8 --- dataretrieval/waterdata/api.py | 5 +++-- dataretrieval/waterdata/utils.py | 10 ++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index f4ecba47..336aa09b 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -3007,8 +3007,9 @@ def get_cql( properties_list = _as_str_list(properties, "properties") - # Translate user-facing names (``daily_id``/``id``) to the wire ``id`` the - # OGC API expects, matching the typed getters. + # Drop id aliases (``daily_id``/``id``) and ``geometry`` from the wire + # ``properties`` (the feature ``id`` is always returned and renamed + # downstream), matching the typed getters. wire_properties = _switch_properties_id(properties_list, output_id, service) req = _construct_cql_request( diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 34095bec..a3a63d7e 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -187,14 +187,12 @@ def _switch_properties_id( if not properties: return [] service_id = service.replace("-", "_") + "_id" - service_id_singular = "" - if service.endswith("s"): - service_id_singular = service[:-1].replace("-", "_") + "_id" # The feature ``id`` always comes back (renamed to the service id - # downstream) and several collections now reject it as a selectable + # downstream) and several collections reject it as a selectable # property; ``geometry`` is controlled by ``skip_geometry``. Drop both, - # plus every service-specific id alias. - drop = {"id", "geometry", id_name, service_id, service_id_singular} + # plus the service-specific id column (``id_name``) and the name derived + # straight from the service (``service_id``). + drop = {"id", "geometry", id_name, service_id} normalized = (p.replace("-", "_") for p in properties) return [p for p in normalized if p not in drop]