From fc736e489bfbeaeff4eef26269f08aabf6fed508 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sat, 30 May 2026 15:03:06 -0400 Subject: [PATCH] fix(waterdata): materialize numpy/Series numeric params instead of str()-ing them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A numeric (_NO_NORMALIZE_PARAMS) param — water_year, year, month, day, thresholds, … — passed as a numpy array or pandas Series fell into the `args[k] = v` passthrough in _get_args without being materialized to a list. Downstream, the GET comma-join and the chunker both test `list`/`tuple`, so an ndarray/Series was neither comma-joined nor chunked: e.g. get_peaks(water_year=np.array([2020, 2021])) produced `water_year=%5B2020+2021%5D` (the array's repr) instead of `water_year=2020,2021`, which the API rejects with HTTP 400. Plain lists already worked. Split the branch so _NO_NORMALIZE_PARAMS values keep their element types (no string-normalization) but a non-string iterable is still materialized to a list of native Python scalars — `.tolist()` for numpy/pandas, `list()` for generators and other iterables — so the values comma-join in the URL, chunk, and stay JSON-serializable (no numpy reprs in args). Co-Authored-By: Claude Opus 4.8 (1M context) --- dataretrieval/waterdata/utils.py | 13 +++++++++++-- tests/waterdata_test.py | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index dd622efd..ad1b3afd 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -2024,9 +2024,18 @@ def _get_args( args[k] = _as_str_list(v, k) elif ( k in _NO_NORMALIZE_PARAMS - or isinstance(v, str) - or not isinstance(v, Iterable) + and isinstance(v, Iterable) + and not isinstance(v, str) ): + # Numeric params (water_year, bbox, thresholds, …) keep their + # element types — no string-normalization — but a non-string + # iterable (numpy array, pandas Series, generator) is materialized + # to a list so the GET comma-join and the chunker, which test + # ``list``/``tuple``, handle it instead of str()-ing the whole + # array. ``.tolist()`` yields native int/float; ``list()`` covers + # generators and other iterables. Scalars/strings fall through. + args[k] = v.tolist() if hasattr(v, "tolist") else list(v) + elif isinstance(v, str) or not isinstance(v, Iterable): args[k] = v else: args[k] = _normalize_str_iterable(v, k) diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 47bc4458..3358899b 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -35,6 +35,7 @@ _check_profiles, _construct_api_requests, _construct_cql_request, + _get_args, _normalize_str_iterable, ) @@ -294,6 +295,27 @@ def test_construct_api_requests_numeric_list_joins_with_str(): assert "water_year=2020%2C2021" in str(req.url) +def test_get_args_materializes_numpy_and_series_numeric_params(): + """Regression: numeric (_NO_NORMALIZE_PARAMS) params given as a numpy array + or pandas Series must be materialized to a list of native Python scalars so + they comma-join in the URL (and stay JSON-serializable) — previously the + array/Series repr leaked into the query string.""" + for value in (np.array([2020, 2021]), pd.Series([2020, 2021])): + args = _get_args({"water_year": value}) + assert args["water_year"] == [2020, 2021] + # native Python ints, not numpy scalars (JSON-serializable, no np reprs) + assert [type(x) for x in args["water_year"]] == [int, int] + req = _construct_api_requests("peaks", **args) + assert "water_year=2020%2C2021" in str(req.url) + + # float coordinate arrays (e.g. bbox) likewise materialize to native floats + args = _get_args({"bbox": np.array([-92.8, 44.2, -88.9, 46.0])}) + assert args["bbox"] == [-92.8, 44.2, -88.9, 46.0] + assert all(type(x) is float for x in args["bbox"]) + req = _construct_api_requests("daily", **args) + assert "bbox=-92.8%2C44.2%2C-88.9%2C46.0" in str(req.url) + + def test_construct_api_requests_two_element_date_list_becomes_interval(): """A two-element date list is interpreted as start/end of an OGC datetime interval (joined with '/'), NOT as two discrete dates. The OGC `datetime`