diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index dd622efd..ad1b3afd 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -2024,9 +2024,18 @@ def _get_args( args[k] = _as_str_list(v, k) elif ( k in _NO_NORMALIZE_PARAMS - or isinstance(v, str) - or not isinstance(v, Iterable) + and isinstance(v, Iterable) + and not isinstance(v, str) ): + # Numeric params (water_year, bbox, thresholds, …) keep their + # element types — no string-normalization — but a non-string + # iterable (numpy array, pandas Series, generator) is materialized + # to a list so the GET comma-join and the chunker, which test + # ``list``/``tuple``, handle it instead of str()-ing the whole + # array. ``.tolist()`` yields native int/float; ``list()`` covers + # generators and other iterables. Scalars/strings fall through. + args[k] = v.tolist() if hasattr(v, "tolist") else list(v) + elif isinstance(v, str) or not isinstance(v, Iterable): args[k] = v else: args[k] = _normalize_str_iterable(v, k) diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 47bc4458..3358899b 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -35,6 +35,7 @@ _check_profiles, _construct_api_requests, _construct_cql_request, + _get_args, _normalize_str_iterable, ) @@ -294,6 +295,27 @@ def test_construct_api_requests_numeric_list_joins_with_str(): assert "water_year=2020%2C2021" in str(req.url) +def test_get_args_materializes_numpy_and_series_numeric_params(): + """Regression: numeric (_NO_NORMALIZE_PARAMS) params given as a numpy array + or pandas Series must be materialized to a list of native Python scalars so + they comma-join in the URL (and stay JSON-serializable) — previously the + array/Series repr leaked into the query string.""" + for value in (np.array([2020, 2021]), pd.Series([2020, 2021])): + args = _get_args({"water_year": value}) + assert args["water_year"] == [2020, 2021] + # native Python ints, not numpy scalars (JSON-serializable, no np reprs) + assert [type(x) for x in args["water_year"]] == [int, int] + req = _construct_api_requests("peaks", **args) + assert "water_year=2020%2C2021" in str(req.url) + + # float coordinate arrays (e.g. bbox) likewise materialize to native floats + args = _get_args({"bbox": np.array([-92.8, 44.2, -88.9, 46.0])}) + assert args["bbox"] == [-92.8, 44.2, -88.9, 46.0] + assert all(type(x) is float for x in args["bbox"]) + req = _construct_api_requests("daily", **args) + assert "bbox=-92.8%2C44.2%2C-88.9%2C46.0" in str(req.url) + + def test_construct_api_requests_two_element_date_list_becomes_interval(): """A two-element date list is interpreted as start/end of an OGC datetime interval (joined with '/'), NOT as two discrete dates. The OGC `datetime`