From 6efee947853cd3d5f8ec861fc1e04ecef98cfd8a Mon Sep 17 00:00:00 2001 From: gaoflow Date: Wed, 17 Jun 2026 08:33:17 +0200 Subject: [PATCH] Accept array-style keys in from_query_string from_query_string raised ValueError on valid query strings with array-style keys such as a[]=1&a[]=2 or user[name]=joe. The format gate used a hardcoded character whitelist that omitted [ and ], so any bracketed key was rejected before parse_qs was called. Replace the whitelist with a structural check (each &-segment is key=value with a non-empty key and no raw whitespace), which accepts bracketed/array keys while still rejecting TOML/YAML/JSON/XML and plain text. Adds tests for array-style and bracketed keys. --- benedict/serializers/query_string.py | 12 +++++++++--- tests/dicts/io/test_io_dict_query_string.py | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/benedict/serializers/query_string.py b/benedict/serializers/query_string.py index c7272f50..19a7a25e 100644 --- a/benedict/serializers/query_string.py +++ b/benedict/serializers/query_string.py @@ -30,9 +30,15 @@ def __init__(self) -> None: def decode( # type: ignore[override] self, s: str, flat: bool = True ) -> dict[str, str] | dict[str, list[str]]: - qs_re = r"(?:([\w\-\%\+\.\|]+\=[\w\-\%\+\.\|]*)+(?:[\&]{1})?)+" - qs_pattern = re.compile(qs_re) - if qs_pattern.match(s): + # A query string is a sequence of "key=value" pairs joined by "&". + # Each key must be non-empty and free of whitespace, "=" and "&"; + # each value must be free of whitespace and "&" (spaces are encoded + # as "+" or "%20"). This accepts real-world keys such as array-style + # "a[]" / "user[name]" while still rejecting other formats (TOML, YAML, + # JSON, XML), plain text and URLs. + pair_re = re.compile(r"^[^\s=&]+=[^\s&]*$") + pairs = s.split("&") + if all(pair_re.match(pair) for pair in pairs): data = parse_qs(s) if flat: return {key: value[0] for key, value in data.items()} diff --git a/tests/dicts/io/test_io_dict_query_string.py b/tests/dicts/io/test_io_dict_query_string.py index 702a54c8..e96e5101 100644 --- a/tests/dicts/io/test_io_dict_query_string.py +++ b/tests/dicts/io/test_io_dict_query_string.py @@ -26,6 +26,24 @@ def test_from_query_string_with_valid_data(self) -> None: self.assertTrue(isinstance(d, dict)) self.assertEqual(d, r) + def test_from_query_string_with_array_style_keys(self) -> None: + # array-style keys (PHP / HTML form syntax) are valid query strings + s = "a[]=1&a[]=2" + r = {"a[]": "1"} + d = IODict.from_query_string(s) + self.assertTrue(isinstance(d, dict)) + self.assertEqual(d, r) + d = IODict(s, format="query_string") + self.assertTrue(isinstance(d, dict)) + self.assertEqual(d, r) + + def test_from_query_string_with_bracketed_keys(self) -> None: + s = "user[name]=joe&user[age]=42" + r = {"user[name]": "joe", "user[age]": "42"} + d = IODict.from_query_string(s) + self.assertTrue(isinstance(d, dict)) + self.assertEqual(d, r) + def test_from_query_string_with_invalid_data(self) -> None: s = "Lorem ipsum est in ea occaecat nisi officia." # static method