From d023600420865f58e4ead3eba089e9aa2c175dae Mon Sep 17 00:00:00 2001 From: Quratulain-bilal Date: Mon, 18 May 2026 20:58:22 +0500 Subject: [PATCH] fix(catalogs): validate extension and preset catalog payload shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ExtensionCatalog._fetch_single_catalog` and `PresetCatalog._fetch_single_catalog` only check that the `extensions` / `presets` key is *present* in the parsed catalog JSON. They don't check that the value is a JSON object, and they don't check that the root is a JSON object at all. A malformed (or compromised) upstream catalog returning: {"schema_version": "1.0", "extensions": []} passes both `"extensions" not in catalog_data` and the subsequent `response.read()` JSON parse, gets cached on disk, and then crashes deep inside `_get_merged_extensions` (resp. `_get_merged_packs`) with: AttributeError: 'list' object has no attribute 'items' instead of the existing user-facing `ExtensionError("Invalid catalog format from ")` / `PresetError("Invalid preset catalog format")` that the surrounding code is clearly trying to produce. The sibling integration-catalog reader already validates this — see `src/specify_cli/integrations/catalog.py` where the fetch path explicitly checks both `isinstance(catalog_data, dict)` and `isinstance(catalog_data.get("integrations"), dict)` before returning. This change mirrors that pattern in the extension and preset readers so the three catalog fetchers stay consistent and a malformed upstream surfaces as the user-facing error instead of a raw Python traceback. Adds parametrized regression tests covering: - root payload is not a JSON object (list, str, int, null) - root is a dict but `extensions` / `presets` value is the wrong type (list, str, null, int) All eight bad-payload shapes now raise the expected catalog error. --- src/specify_cli/extensions.py | 19 +++++++++++++++ src/specify_cli/presets.py | 20 ++++++++++++++++ tests/test_extensions.py | 45 +++++++++++++++++++++++++++++++++++ tests/test_presets.py | 45 +++++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 871503f0ae..5ad6303838 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1839,8 +1839,27 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) + # Validate payload shape before iteration. Checking only key + # presence would let a payload like ``{"extensions": []}`` or + # ``{"extensions": null}`` slip through here and then crash with + # ``AttributeError: 'list' object has no attribute 'items'`` deep + # inside ``_get_merged_extensions``. The sibling integration + # catalog reader already guards both the root object and the + # nested mapping (see ``integrations/catalog.py``); the extension + # catalog must stay consistent so a malformed upstream surfaces as + # the user-facing ``Invalid catalog format`` error instead of a + # raw Python traceback. + if not isinstance(catalog_data, dict): + raise ExtensionError( + f"Invalid catalog format from {entry.url}: expected a JSON object" + ) if "schema_version" not in catalog_data or "extensions" not in catalog_data: raise ExtensionError(f"Invalid catalog format from {entry.url}") + if not isinstance(catalog_data.get("extensions"), dict): + raise ExtensionError( + f"Invalid catalog format from {entry.url}: " + "'extensions' must be a JSON object" + ) # Save to cache self.cache_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index fd9d4745f1..523b50726a 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -2045,11 +2045,31 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) + # Validate payload shape before iteration. Checking only key + # presence would let a payload like ``{"presets": []}`` or + # ``{"presets": null}`` slip through here and then crash with + # ``AttributeError: 'list' object has no attribute 'items'`` deep + # inside ``_get_merged_packs``. The sibling integration catalog + # reader already guards both the root object and the nested + # mapping (see ``integrations/catalog.py``); the preset catalog + # must stay consistent so a malformed upstream surfaces as the + # user-facing ``Invalid preset catalog format`` error instead of + # a raw Python traceback. + if not isinstance(catalog_data, dict): + raise PresetError( + f"Invalid preset catalog format from {entry.url}: " + "expected a JSON object" + ) if ( "schema_version" not in catalog_data or "presets" not in catalog_data ): raise PresetError("Invalid preset catalog format") + if not isinstance(catalog_data.get("presets"), dict): + raise PresetError( + f"Invalid preset catalog format from {entry.url}: " + "'presets' must be a JSON object" + ) self.cache_dir.mkdir(parents=True, exist_ok=True) cache_file.write_text(json.dumps(catalog_data, indent=2)) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 153388a541..f1ce74c33a 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2577,6 +2577,51 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``extensions`` is the wrong type. + {"schema_version": "1.0", "extensions": []}, + {"schema_version": "1.0", "extensions": "oops"}, + {"schema_version": "1.0", "extensions": None}, + {"schema_version": "1.0", "extensions": 42}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_payload(self, temp_dir, payload): + """Malformed catalog payloads raise ExtensionError, not AttributeError. + + Without this guard, a payload like ``{"extensions": []}`` would pass the + key-presence check and then crash with ``AttributeError: 'list' object + has no attribute 'items'`` deep inside ``_get_merged_extensions``. The + sibling integration catalog reader already validates both the root + object and the nested mapping (see ``integrations/catalog.py``); the + extension catalog must stay consistent. + """ + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = CatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(ExtensionError, match="Invalid catalog format"): + catalog._fetch_single_catalog(entry, force_refresh=True) + def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header when a provider is configured.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_presets.py b/tests/test_presets.py index 8fa700fa77..1928840d96 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1514,6 +1514,51 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + @pytest.mark.parametrize( + "payload", + [ + # Root is not a JSON object. + [], + "oops", + 42, + None, + # Root is fine but ``presets`` is the wrong type. + {"schema_version": "1.0", "presets": []}, + {"schema_version": "1.0", "presets": "oops"}, + {"schema_version": "1.0", "presets": None}, + {"schema_version": "1.0", "presets": 42}, + ], + ) + def test_fetch_single_catalog_rejects_malformed_payload(self, project_dir, payload): + """Malformed catalog payloads raise PresetError, not AttributeError. + + Without this guard, a payload like ``{"presets": []}`` would pass the + key-presence check and then crash with ``AttributeError: 'list' object + has no attribute 'items'`` deep inside ``_get_merged_packs``. The + sibling integration catalog reader already validates both the root + object and the nested mapping (see ``integrations/catalog.py``); the + preset catalog must stay consistent. + """ + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(payload).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + entry = PresetCatalogEntry( + url="https://example.com/catalog.json", + name="default", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(PresetError, match="Invalid preset catalog format"): + catalog._fetch_single_catalog(entry, force_refresh=True) + def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): """download_pack passes Authorization header when configured.""" from unittest.mock import patch, MagicMock