From f1c646fa77502d82b67a7654a4b2e16340b0faa7 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 13:48:54 -0500 Subject: [PATCH 01/10] fix: sanitize extension name in download path to prevent path traversal (GHSA-67q9-p54f-7cpr) The extension argument in 'specify extension add --from' was interpolated directly into the temporary ZIP download path. Absolute paths or ../ segments could escape .specify/extensions/.cache/downloads/, causing arbitrary file writes and deletes. Fix: strip directory components via Path.name, replace residual separators, and add a resolve().relative_to() containment guard before any I/O. CWE-22, CWE-73 --- src/specify_cli/__init__.py | 10 ++- tests/test_extension_add_path_traversal.py | 73 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 tests/test_extension_add_path_traversal.py diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index df537b062f..8d41066121 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3644,7 +3644,15 @@ def extension_add( # Download ZIP to temp location download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" download_dir.mkdir(parents=True, exist_ok=True) - zip_path = download_dir / f"{extension}-url-download.zip" + safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" + zip_path = download_dir / f"{safe_name}-url-download.zip" + + # Guard: resolved path must stay inside download_dir (CWE-22) + try: + zip_path.resolve().relative_to(download_dir.resolve()) + except ValueError: + console.print("[red]Error:[/red] Invalid extension name (path traversal detected)") + raise typer.Exit(1) try: from specify_cli.authentication.http import open_url as _open_url diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py new file mode 100644 index 0000000000..aef7b2ff7f --- /dev/null +++ b/tests/test_extension_add_path_traversal.py @@ -0,0 +1,73 @@ +"""Tests for path traversal guard in `specify extension add --from` (GHSA-67q9-p54f-7cpr). + +The extension argument is used to construct a temporary ZIP download path. +Without sanitisation, absolute paths or ``../`` segments in the argument can +escape the intended cache directory, causing arbitrary file writes and deletes. +""" + +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from specify_cli import app + +runner = CliRunner() + +TRAVERSAL_PAYLOADS = [ + "../pwned", + "../../etc/passwd", + "subdir/../../escape", + "/tmp/evil", +] + + +@pytest.fixture() +def project_dir(tmp_path, monkeypatch): + proj = tmp_path / "project" + proj.mkdir() + (proj / ".specify").mkdir() + monkeypatch.chdir(proj) + return proj + + +class TestExtensionAddFromPathTraversal: + """Path traversal payloads in the extension name must not escape the download cache.""" + + @pytest.mark.parametrize("bad_name", TRAVERSAL_PAYLOADS) + def test_traversal_payload_cannot_write_outside_cache(self, project_dir, bad_name): + """The download zip path must stay inside .specify/extensions/.cache/downloads/.""" + result = runner.invoke( + app, + ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], + ) + # The command should either reject early (path traversal guard) or + # the sanitised name means the file would land inside the cache dir. + # In no case should files appear outside the project tree. + cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" + stray = [ + p + for p in project_dir.parent.rglob("*") + if p.is_file() and "url-download.zip" in p.name and not str(p).startswith(str(cache_dir)) + ] + assert stray == [], f"Traversal payload leaked files: {stray}" + + def test_clean_name_is_unaffected(self, project_dir, monkeypatch): + """A normal extension name should not trigger the guard.""" + from unittest.mock import patch, MagicMock + + mock_response = MagicMock() + mock_response.read.return_value = b"PK\x03\x04fake" + mock_response.__enter__ = MagicMock(return_value=mock_response) + mock_response.__exit__ = MagicMock(return_value=False) + + with patch("specify_cli.authentication.http.open_url", return_value=mock_response), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install: + mock_install.return_value = MagicMock(id="my-ext", name="My Ext", version="1.0.0") + result = runner.invoke( + app, + ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], + ) + + # Should reach the install call, not be rejected as traversal + assert "path traversal" not in (result.output or "").lower() From 62499d7bc4172ea8a23e1d1a1e396b48403d1dcb Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 13:57:18 -0500 Subject: [PATCH 02/10] address review: mock network, unique filename, backslash payloads, delete-vector + containment tests - Mock open_url in all traversal tests to avoid network dependency - Use uuid-based temp filename to prevent concurrent collisions - Add backslash traversal payloads (..\pwned, ..\..\etc\passwd) - Add sentinel-file test covering the delete vector - Use Path.relative_to() instead of str.startswith() for containment - Assert exit_code and mock_install.assert_called_once() in clean-name test --- src/specify_cli/__init__.py | 3 +- tests/test_extension_add_path_traversal.py | 66 +++++++++++++++------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 8d41066121..e66d96d8c7 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3642,10 +3642,11 @@ def extension_add( console.print(f"Downloading from {from_url}...") # Download ZIP to temp location + import uuid as _uuid download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" download_dir.mkdir(parents=True, exist_ok=True) safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" - zip_path = download_dir / f"{safe_name}-url-download.zip" + zip_path = download_dir / f"{safe_name}-{_uuid.uuid4().hex[:8]}.zip" # Guard: resolved path must stay inside download_dir (CWE-22) try: diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index aef7b2ff7f..540beb5a8a 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -6,6 +6,7 @@ """ from pathlib import Path +from unittest.mock import MagicMock, patch import pytest from typer.testing import CliRunner @@ -18,6 +19,8 @@ "../pwned", "../../etc/passwd", "subdir/../../escape", + "..\\pwned", + "..\\..\\etc\\passwd", "/tmp/evil", ] @@ -31,37 +34,57 @@ def project_dir(tmp_path, monkeypatch): return proj +def _mock_open_url(): + """Return a mock open_url that yields fake ZIP bytes.""" + mock_response = MagicMock() + mock_response.read.return_value = b"PK\x03\x04fake" + mock_response.__enter__ = MagicMock(return_value=mock_response) + mock_response.__exit__ = MagicMock(return_value=False) + return mock_response + + class TestExtensionAddFromPathTraversal: """Path traversal payloads in the extension name must not escape the download cache.""" @pytest.mark.parametrize("bad_name", TRAVERSAL_PAYLOADS) def test_traversal_payload_cannot_write_outside_cache(self, project_dir, bad_name): """The download zip path must stay inside .specify/extensions/.cache/downloads/.""" - result = runner.invoke( - app, - ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], - ) - # The command should either reject early (path traversal guard) or - # the sanitised name means the file would land inside the cache dir. - # In no case should files appear outside the project tree. + with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip", return_value=MagicMock(id="x", name="X", version="1.0.0")): + result = runner.invoke( + app, + ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], + ) cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" - stray = [ - p - for p in project_dir.parent.rglob("*") - if p.is_file() and "url-download.zip" in p.name and not str(p).startswith(str(cache_dir)) - ] + cache_resolved = cache_dir.resolve() + stray = [] + for p in project_dir.parent.rglob("*"): + if not p.is_file() or ".zip" not in p.name: + continue + try: + p.resolve().relative_to(cache_resolved) + except ValueError: + stray.append(p) assert stray == [], f"Traversal payload leaked files: {stray}" - def test_clean_name_is_unaffected(self, project_dir, monkeypatch): - """A normal extension name should not trigger the guard.""" - from unittest.mock import patch, MagicMock + @pytest.mark.parametrize("bad_name", TRAVERSAL_PAYLOADS) + def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_name): + """The finally-block cleanup must not delete files outside the cache dir.""" + # Place a sentinel where the pre-fix code would have written/deleted + sentinel = project_dir.parent / "sentinel.txt" + sentinel.write_text("must survive") - mock_response = MagicMock() - mock_response.read.return_value = b"PK\x03\x04fake" - mock_response.__enter__ = MagicMock(return_value=mock_response) - mock_response.__exit__ = MagicMock(return_value=False) + with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip", return_value=MagicMock(id="x", name="X", version="1.0.0")): + runner.invoke( + app, + ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], + ) + assert sentinel.exists(), "Sentinel file was deleted by cleanup — traversal not blocked" - with patch("specify_cli.authentication.http.open_url", return_value=mock_response), \ + def test_clean_name_is_unaffected(self, project_dir): + """A normal extension name should not trigger the guard.""" + with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install: mock_install.return_value = MagicMock(id="my-ext", name="My Ext", version="1.0.0") result = runner.invoke( @@ -69,5 +92,6 @@ def test_clean_name_is_unaffected(self, project_dir, monkeypatch): ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], ) - # Should reach the install call, not be rejected as traversal + assert result.exit_code == 0, result.output + mock_install.assert_called_once() assert "path traversal" not in (result.output or "").lower() From 24bae9359d0d9c8fcf54a2e25fac6e3dbd45a456 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 14:08:22 -0500 Subject: [PATCH 03/10] address review round 2: symlink guard, length cap, mock-based assertions - Reject symlinked download cache directory (consistent with shared_infra.py) - Cap safe_name to 64 chars to avoid filesystem errors on long inputs - Remove unused Path import - Write-side test now asserts install_from_zip arg is inside cache dir - Delete-side sentinel placed at the pre-fix path (bad_name-url-download.zip) - Clean-name test verifies zip_path via mock call_args --- src/specify_cli/__init__.py | 7 ++++ tests/test_extension_add_path_traversal.py | 45 ++++++++++++---------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index e66d96d8c7..0ac691f2f7 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3645,7 +3645,14 @@ def extension_add( import uuid as _uuid download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" download_dir.mkdir(parents=True, exist_ok=True) + + # Reject symlinked cache directory (consistent with shared_infra.py) + if download_dir.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + raise typer.Exit(1) + safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" + safe_name = safe_name[:64] # cap length to avoid filesystem errors zip_path = download_dir / f"{safe_name}-{_uuid.uuid4().hex[:8]}.zip" # Guard: resolved path must stay inside download_dir (CWE-22) diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index 540beb5a8a..029f239390 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -5,8 +5,7 @@ escape the intended cache directory, causing arbitrary file writes and deletes. """ -from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch import pytest from typer.testing import CliRunner @@ -47,32 +46,32 @@ class TestExtensionAddFromPathTraversal: """Path traversal payloads in the extension name must not escape the download cache.""" @pytest.mark.parametrize("bad_name", TRAVERSAL_PAYLOADS) - def test_traversal_payload_cannot_write_outside_cache(self, project_dir, bad_name): - """The download zip path must stay inside .specify/extensions/.cache/downloads/.""" + def test_traversal_payload_writes_inside_cache(self, project_dir, bad_name): + """The zip_path passed to install_from_zip must resolve inside the cache dir.""" + cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ - patch("specify_cli.extensions.ExtensionManager.install_from_zip", return_value=MagicMock(id="x", name="X", version="1.0.0")): + patch("specify_cli.extensions.ExtensionManager.install_from_zip", return_value=MagicMock(id="x", name="X", version="1.0.0")) as mock_install: result = runner.invoke( app, ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], ) - cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" - cache_resolved = cache_dir.resolve() - stray = [] - for p in project_dir.parent.rglob("*"): - if not p.is_file() or ".zip" not in p.name: - continue - try: - p.resolve().relative_to(cache_resolved) - except ValueError: - stray.append(p) - assert stray == [], f"Traversal payload leaked files: {stray}" + if mock_install.called: + zip_arg = mock_install.call_args[0][0] # positional arg: zip_path + zip_arg.resolve().relative_to(cache_dir.resolve()) # raises ValueError if outside @pytest.mark.parametrize("bad_name", TRAVERSAL_PAYLOADS) def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_name): """The finally-block cleanup must not delete files outside the cache dir.""" - # Place a sentinel where the pre-fix code would have written/deleted - sentinel = project_dir.parent / "sentinel.txt" - sentinel.write_text("must survive") + # Place a sentinel at the path the pre-fix code would have constructed + cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" + cache_dir.mkdir(parents=True, exist_ok=True) + pre_fix_path = cache_dir / f"{bad_name}-url-download.zip" + try: + pre_fix_path.parent.mkdir(parents=True, exist_ok=True) + pre_fix_path.write_text("sentinel") + except OSError: + # Absolute payloads like /tmp/evil can't be created relative to cache + pre_fix_path = None with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ patch("specify_cli.extensions.ExtensionManager.install_from_zip", return_value=MagicMock(id="x", name="X", version="1.0.0")): @@ -80,7 +79,9 @@ def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_na app, ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], ) - assert sentinel.exists(), "Sentinel file was deleted by cleanup — traversal not blocked" + if pre_fix_path is not None and pre_fix_path.exists(): + # Sentinel survived — the fix didn't touch the pre-fix path + assert pre_fix_path.read_text() == "sentinel" def test_clean_name_is_unaffected(self, project_dir): """A normal extension name should not trigger the guard.""" @@ -94,4 +95,8 @@ def test_clean_name_is_unaffected(self, project_dir): assert result.exit_code == 0, result.output mock_install.assert_called_once() + # Verify the zip path is inside the cache + zip_arg = mock_install.call_args[0][0] + cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" + zip_arg.resolve().relative_to(cache_dir.resolve()) assert "path traversal" not in (result.output or "").lower() From 44bde396bba147db9f130f17129c76fabf185c75 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 14:34:06 -0500 Subject: [PATCH 04/10] address review round 3: walk ancestor symlinks, unconditional sentinel assert - Symlink check now walks all ancestors (.specify, extensions, .cache, downloads) consistent with shared_infra.py pattern - Write-side test: unconditionally assert exit_code == 0 and mock_install called - Delete-side test: unconditionally assert sentinel exists (not conditional skip) - Remove unused 'call' import --- src/specify_cli/__init__.py | 11 +++++++---- tests/test_extension_add_path_traversal.py | 13 +++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 0ac691f2f7..0bfda6dfa2 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3646,10 +3646,13 @@ def extension_add( download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" download_dir.mkdir(parents=True, exist_ok=True) - # Reject symlinked cache directory (consistent with shared_infra.py) - if download_dir.is_symlink(): - console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") - raise typer.Exit(1) + # Reject symlinked ancestors (consistent with shared_infra.py) + _check = project_root + for _part in download_dir.relative_to(project_root).parts: + _check = _check / _part + if _check.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + raise typer.Exit(1) safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" safe_name = safe_name[:64] # cap length to avoid filesystem errors diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index 029f239390..08cf67d338 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -5,7 +5,7 @@ escape the intended cache directory, causing arbitrary file writes and deletes. """ -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, patch import pytest from typer.testing import CliRunner @@ -55,9 +55,10 @@ def test_traversal_payload_writes_inside_cache(self, project_dir, bad_name): app, ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], ) - if mock_install.called: - zip_arg = mock_install.call_args[0][0] # positional arg: zip_path - zip_arg.resolve().relative_to(cache_dir.resolve()) # raises ValueError if outside + assert result.exit_code == 0, result.output + mock_install.assert_called_once() + zip_arg = mock_install.call_args[0][0] # positional arg: zip_path + zip_arg.resolve().relative_to(cache_dir.resolve()) # raises ValueError if outside @pytest.mark.parametrize("bad_name", TRAVERSAL_PAYLOADS) def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_name): @@ -79,8 +80,8 @@ def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_na app, ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], ) - if pre_fix_path is not None and pre_fix_path.exists(): - # Sentinel survived — the fix didn't touch the pre-fix path + if pre_fix_path is not None: + assert pre_fix_path.exists(), f"Sentinel deleted by cleanup: {pre_fix_path}" assert pre_fix_path.read_text() == "sentinel" def test_clean_name_is_unaffected(self, project_dir): From 77b083c8cac741f4f70dcb9948ac4675f8d70c23 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 14:44:03 -0500 Subject: [PATCH 05/10] address review round 4: TOCTOU-safe ancestor walk + sentinel hygiene - mkdir(parents=True) was following symlinked ancestors before the symlink check; replace with per-component walk that validates each ancestor and creates missing components individually (mirrors shared_infra._ensure_safe_shared_directory). - Add TestExtensionAddFromSymlinkedCache regression test covering symlinks at .specify, .specify/extensions, .cache, and downloads. - Skip sentinel creation for absolute-path payloads in the delete-side test to avoid writing /tmp/evil-url-download.zip into the runner; write-side test still proves containment for those payloads. --- src/specify_cli/__init__.py | 20 ++++--- tests/test_extension_add_path_traversal.py | 66 ++++++++++++++++++---- 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 0bfda6dfa2..c94720181c 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3641,18 +3641,24 @@ def extension_add( console.print("Only install extensions from sources you trust.\n") console.print(f"Downloading from {from_url}...") - # Download ZIP to temp location + # Download ZIP to temp location. + # Validate and create each ancestor without following symlinks + # (consistent with shared_infra._ensure_safe_shared_directory): + # `mkdir(parents=True)` would silently traverse a symlinked ancestor, + # so each component is checked and created individually instead. import uuid as _uuid download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" - download_dir.mkdir(parents=True, exist_ok=True) - - # Reject symlinked ancestors (consistent with shared_infra.py) - _check = project_root + _current = project_root for _part in download_dir.relative_to(project_root).parts: - _check = _check / _part - if _check.is_symlink(): + _current = _current / _part + if _current.is_symlink(): console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") raise typer.Exit(1) + if not _current.exists(): + _current.mkdir() + if _current.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + raise typer.Exit(1) safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" safe_name = safe_name[:64] # cap length to avoid filesystem errors diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index 08cf67d338..5da4bf9b1e 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -60,19 +60,20 @@ def test_traversal_payload_writes_inside_cache(self, project_dir, bad_name): zip_arg = mock_install.call_args[0][0] # positional arg: zip_path zip_arg.resolve().relative_to(cache_dir.resolve()) # raises ValueError if outside - @pytest.mark.parametrize("bad_name", TRAVERSAL_PAYLOADS) + @pytest.mark.parametrize("bad_name", [p for p in TRAVERSAL_PAYLOADS if not p.startswith(("/", "\\"))]) def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_name): - """The finally-block cleanup must not delete files outside the cache dir.""" + """The finally-block cleanup must not delete files outside the cache dir. + + Absolute-path payloads are excluded here to avoid writing sentinels + outside the pytest sandbox (e.g. ``/tmp``); the write-side test above + already proves the production guard contains absolute payloads. + """ # Place a sentinel at the path the pre-fix code would have constructed cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" cache_dir.mkdir(parents=True, exist_ok=True) pre_fix_path = cache_dir / f"{bad_name}-url-download.zip" - try: - pre_fix_path.parent.mkdir(parents=True, exist_ok=True) - pre_fix_path.write_text("sentinel") - except OSError: - # Absolute payloads like /tmp/evil can't be created relative to cache - pre_fix_path = None + pre_fix_path.parent.mkdir(parents=True, exist_ok=True) + pre_fix_path.write_text("sentinel") with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ patch("specify_cli.extensions.ExtensionManager.install_from_zip", return_value=MagicMock(id="x", name="X", version="1.0.0")): @@ -80,9 +81,8 @@ def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_na app, ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], ) - if pre_fix_path is not None: - assert pre_fix_path.exists(), f"Sentinel deleted by cleanup: {pre_fix_path}" - assert pre_fix_path.read_text() == "sentinel" + assert pre_fix_path.exists(), f"Sentinel deleted by cleanup: {pre_fix_path}" + assert pre_fix_path.read_text() == "sentinel" def test_clean_name_is_unaffected(self, project_dir): """A normal extension name should not trigger the guard.""" @@ -101,3 +101,47 @@ def test_clean_name_is_unaffected(self, project_dir): cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" zip_arg.resolve().relative_to(cache_dir.resolve()) assert "path traversal" not in (result.output or "").lower() + + +class TestExtensionAddFromSymlinkedCache: + """A symlinked download-cache ancestor must be rejected before any write.""" + + @pytest.mark.parametrize( + "ancestor_parts", + [ + (".specify",), + (".specify", "extensions"), + (".specify", "extensions", ".cache"), + (".specify", "extensions", ".cache", "downloads"), + ], + ) + def test_symlinked_ancestor_is_refused(self, tmp_path, monkeypatch, ancestor_parts): + """Symlinking any cache ancestor outside the project must abort the command.""" + project_dir = tmp_path / "project" + project_dir.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + monkeypatch.chdir(project_dir) + + # Build the parent of the symlinked component using real directories. + parent = project_dir + for part in ancestor_parts[:-1]: + parent = parent / part + parent.mkdir() + + # Replace the final ancestor component with a symlink pointing outside the project. + symlink_path = parent / ancestor_parts[-1] + symlink_path.symlink_to(outside, target_is_directory=True) + + with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install: + result = runner.invoke( + app, + ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], + ) + + assert result.exit_code != 0 + assert "symlinked download cache" in (result.output or "").lower() + mock_install.assert_not_called() + # No download was written through the symlink + assert list(outside.iterdir()) == [] From f43e2ce3c37822f0bd3ec33496bfa6325f35d6c6 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 14:58:32 -0500 Subject: [PATCH 06/10] address review round 5: ancestor-resolves-outside check + TOCTOU-safe write - Per-component walk now also re-resolves each existing ancestor and requires it to land under project_root.resolve(). This catches non-symlink directory aliases (e.g. Windows junctions / mount points) that resolve outside the project even though no component is itself a symlink. - ZIP write now uses os.open(O_WRONLY|O_CREAT|O_EXCL|O_NOFOLLOW, 0o600). O_NOFOLLOW (POSIX) refuses a symlink swapped in between the cache validation and the write, closing the TOCTOU window. O_EXCL guarantees the unique UUID filename is freshly created and cannot be pre-staged. O_NOFOLLOW falls back to 0 on Windows. - Add TestExtensionAddFromAncestorEscape: simulates a junction-like alias by patching Path.resolve() so .cache resolves outside; command must refuse with 'escapes project root'. - Add TestExtensionAddFromTOCTOUWrite: races a symlink swap into the cache between validation and write; O_NOFOLLOW/O_EXCL must reject. --- src/specify_cli/__init__.py | 37 ++++++++-- tests/test_extension_add_path_traversal.py | 85 ++++++++++++++++++++++ 2 files changed, 117 insertions(+), 5 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index c94720181c..3351976187 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3646,19 +3646,29 @@ def extension_add( # (consistent with shared_infra._ensure_safe_shared_directory): # `mkdir(parents=True)` would silently traverse a symlinked ancestor, # so each component is checked and created individually instead. + # Each existing component is also re-resolved and required to land + # under project_root, which catches non-symlink directory aliases + # (e.g. Windows junctions / mount points) that resolve outside. import uuid as _uuid download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" + project_root_resolved = project_root.resolve() _current = project_root for _part in download_dir.relative_to(project_root).parts: _current = _current / _part if _current.is_symlink(): console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") raise typer.Exit(1) - if not _current.exists(): - _current.mkdir() - if _current.is_symlink(): - console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + if _current.exists(): + try: + _current.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + console.print("[red]Error:[/red] Download cache directory escapes project root") raise typer.Exit(1) + continue + _current.mkdir() + if _current.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + raise typer.Exit(1) safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" safe_name = safe_name[:64] # cap length to avoid filesystem errors @@ -3676,7 +3686,24 @@ def extension_add( with _open_url(from_url, timeout=60) as response: zip_data = response.read() - zip_path.write_bytes(zip_data) + # Open with O_NOFOLLOW | O_EXCL to defeat any TOCTOU symlink + # swap that may have happened between the cache validation + # above and this write. O_EXCL also guarantees the unique + # UUID filename is freshly created, never reused. O_NOFOLLOW + # is a no-op on Windows where the constant is absent. + _flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0) + _fd = os.open(zip_path, _flags, 0o600) + try: + with os.fdopen(_fd, "wb") as _zf: + _zf.write(zip_data) + except BaseException: + # os.fdopen took ownership only on success; on failure + # before the with-block, the fd may still be open. + try: + os.close(_fd) + except OSError: + pass + raise # Install from downloaded ZIP manifest = manager.install_from_zip(zip_path, speckit_version, priority=priority) diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index 5da4bf9b1e..9b6588438a 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -5,6 +5,7 @@ escape the intended cache directory, causing arbitrary file writes and deletes. """ +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -145,3 +146,87 @@ def test_symlinked_ancestor_is_refused(self, tmp_path, monkeypatch, ancestor_par mock_install.assert_not_called() # No download was written through the symlink assert list(outside.iterdir()) == [] + + +class TestExtensionAddFromAncestorEscape: + """A non-symlink ancestor whose ``.resolve()`` lands outside the project must be refused. + + Simulates the Windows junction / mount-point case using a manually-pre-resolved + directory swap: we cannot create real junctions on POSIX, but we can patch the + ``.resolve()`` of an existing ancestor to land outside the project root. The + production guard must reject it before any download happens. + """ + + def test_ancestor_resolves_outside_project_is_refused(self, tmp_path, monkeypatch): + project_dir = tmp_path / "project" + project_dir.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + monkeypatch.chdir(project_dir) + + # Pre-create the cache ancestors as real directories so the per-component + # walk reaches the resolve()/relative_to() check on each existing one. + cache_root = project_dir / ".specify" / "extensions" / ".cache" + cache_root.mkdir(parents=True) + + real_resolve = Path.resolve + + def fake_resolve(self, *a, **kw): + # Pretend the .cache ancestor resolves outside the project. + if self == cache_root: + return real_resolve(outside, *a, **kw) + return real_resolve(self, *a, **kw) + + with patch.object(Path, "resolve", fake_resolve), \ + patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install: + result = runner.invoke( + app, + ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], + ) + + assert result.exit_code != 0 + assert "escapes project root" in (result.output or "").lower() + mock_install.assert_not_called() + assert list(outside.iterdir()) == [] + + +class TestExtensionAddFromTOCTOUWrite: + """The download write must not follow a symlink swapped in after validation.""" + + def test_swapped_zip_path_symlink_is_refused(self, project_dir): + """If zip_path is replaced by a symlink between validation and write, refuse.""" + cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" + outside = project_dir.parent / "outside" + outside.mkdir(exist_ok=True) + target = outside / "stolen.bin" + + # Patch os.open to simulate the TOCTOU window: just before the real + # os.open runs, replace the target path with a symlink pointing outside. + # With O_NOFOLLOW the os.open call must raise instead of writing through. + import os as _os + real_open = _os.open + + def racing_open(path, flags, mode=0o777): + try: + p = Path(path) + if p.parent == cache_dir and not p.exists(): + p.symlink_to(target) + except OSError: + pass + return real_open(path, flags, mode) + + with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install, \ + patch("os.open", side_effect=racing_open): + result = runner.invoke( + app, + ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], + ) + + # Either O_NOFOLLOW (POSIX) or O_EXCL (all platforms) must reject the + # swapped symlink. The command must fail and nothing must be written + # through the symlink to `outside`. + assert result.exit_code != 0 + mock_install.assert_not_called() + assert not target.exists(), "write followed swapped symlink" From db4e3b3c807a0f80eb8e7600c211ab1509820771 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 15:21:02 -0500 Subject: [PATCH 07/10] address review round 6: dir_fd-relative open + is_dir guard + clean errors Production: - Add _safe_open_download_zip helper. On POSIX it walks each cache ancestor with dir_fd + O_NOFOLLOW and creates the leaf relative to the deepest fd with O_EXCL|O_NOFOLLOW. This closes the ancestor-swap TOCTOU window (O_NOFOLLOW alone only protects the leaf). On Windows (no dir_fd/O_NOFOLLOW), falls back to plain O_EXCL; symlink attacks there require elevated privileges and are covered by the per-component validation walk. - Add is_dir() check on existing cache ancestors so a stray file at e.g. .specify/extensions yields a clean CLI error instead of a later NotADirectoryError from mkdir(). - Wrap the safe-open call and download write in OSError handlers so EEXIST/ELOOP/EACCES from the hardened open path surface as controlled CLI errors, not raw tracebacks. Tests: - Add _require_symlinks() helper that probes symlink creation and pytest.skip()s when unavailable (Windows without dev mode); apply to every test that creates symlinks. - Add TestExtensionAddFromTOCTOUWrite::test_swapped_ancestor_symlink_is_refused covering ancestor-swap; previous leaf-only test missed this vector. - Rewrite test_swapped_zip_path_symlink_is_refused to wrap the helper itself (no fragile os.open patching), and gate both TOCTOU tests on POSIX O_NOFOLLOW + dir_fd availability so Windows runners skip cleanly. - Add TestExtensionAddFromCacheAncestorIsFile to verify the new is_dir check produces a clean 'not a directory' error. --- src/specify_cli/__init__.py | 77 +++++++++-- tests/test_extension_add_path_traversal.py | 149 ++++++++++++++++++--- 2 files changed, 196 insertions(+), 30 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 3351976187..220cc3add0 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3329,6 +3329,47 @@ def _resolve_installed_extension( raise typer.Exit(1) +def _safe_open_download_zip(project_root: Path, download_dir: Path, zip_filename: str) -> int: + """Open ``download_dir / zip_filename`` for writing without following any symlink. + + Closes the TOCTOU window between cache-ancestor validation and the actual + file write. On POSIX, walks each component of ``download_dir`` using + ``dir_fd`` + ``O_NOFOLLOW`` so a symlink swap of *any* component (including + ancestors) after validation is rejected. The leaf is created with + ``O_EXCL`` so the unique UUID filename cannot collide with anything an + attacker pre-staged. + + On platforms without ``dir_fd``/``O_NOFOLLOW`` support (Windows), falls + back to a plain ``O_EXCL`` open. Symlink creation on Windows requires + elevated privileges, and the per-component ancestor walk performed by the + caller already covers the realistic threat surface there. + + Returns an open file descriptor; the caller owns and must close it. + Raises ``OSError`` (e.g. ``ELOOP``, ``EEXIST``, ``EACCES``) if any + component is a symlink or the leaf already exists. + """ + o_nofollow = getattr(os, "O_NOFOLLOW", 0) + o_directory = getattr(os, "O_DIRECTORY", 0) + leaf_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | o_nofollow + + # Windows / no openat support: best-effort O_EXCL open. + if os.open not in os.supports_dir_fd: + return os.open(download_dir / zip_filename, leaf_flags, 0o600) + + rel_parts = download_dir.relative_to(project_root).parts + parent_fd = os.open(project_root, os.O_RDONLY | o_directory) + try: + for part in rel_parts: + new_fd = os.open( + part, os.O_RDONLY | o_directory | o_nofollow, dir_fd=parent_fd + ) + os.close(parent_fd) + parent_fd = new_fd + return os.open(zip_filename, leaf_flags, 0o600, dir_fd=parent_fd) + finally: + os.close(parent_fd) + + def _resolve_catalog_extension( argument: str, catalog, @@ -3659,6 +3700,11 @@ def extension_add( console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") raise typer.Exit(1) if _current.exists(): + if not _current.is_dir(): + console.print( + f"[red]Error:[/red] Download cache path is not a directory: {_current}" + ) + raise typer.Exit(1) try: _current.resolve().relative_to(project_root_resolved) except (OSError, ValueError): @@ -3672,7 +3718,8 @@ def extension_add( safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" safe_name = safe_name[:64] # cap length to avoid filesystem errors - zip_path = download_dir / f"{safe_name}-{_uuid.uuid4().hex[:8]}.zip" + zip_filename = f"{safe_name}-{_uuid.uuid4().hex[:8]}.zip" + zip_path = download_dir / zip_filename # Guard: resolved path must stay inside download_dir (CWE-22) try: @@ -3686,13 +3733,24 @@ def extension_add( with _open_url(from_url, timeout=60) as response: zip_data = response.read() - # Open with O_NOFOLLOW | O_EXCL to defeat any TOCTOU symlink - # swap that may have happened between the cache validation - # above and this write. O_EXCL also guarantees the unique - # UUID filename is freshly created, never reused. O_NOFOLLOW - # is a no-op on Windows where the constant is absent. - _flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0) - _fd = os.open(zip_path, _flags, 0o600) + + # Open the ZIP file for writing without following any symlink + # in the path. On POSIX, walk each ancestor of `download_dir` + # using dir_fd + O_NOFOLLOW so a symlink swap of *any* + # component (including ancestors) between validation above + # and this write is rejected. On Windows there is no + # `dir_fd` / `O_NOFOLLOW` support, so we fall back to a + # plain O_EXCL open which still requires the unique UUID + # filename to be freshly created. + try: + _fd = _safe_open_download_zip( + project_root, download_dir, zip_filename + ) + except OSError as _open_err: + console.print( + f"[red]Error:[/red] Could not safely create download file: {_open_err}" + ) + raise typer.Exit(1) try: with os.fdopen(_fd, "wb") as _zf: _zf.write(zip_data) @@ -3710,6 +3768,9 @@ def extension_add( except urllib.error.URLError as e: console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}") raise typer.Exit(1) + except OSError as e: + console.print(f"[red]Error:[/red] Failed to write download cache: {e}") + raise typer.Exit(1) finally: # Clean up downloaded ZIP if zip_path.exists(): diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index 9b6588438a..2e2019d600 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -8,6 +8,8 @@ from pathlib import Path from unittest.mock import MagicMock, patch +import os + import pytest from typer.testing import CliRunner @@ -15,6 +17,28 @@ runner = CliRunner() + +def _require_symlinks(tmp_path) -> None: + """Skip the calling test when symlink creation is not permitted. + + On Windows, symlink creation requires elevated privileges or developer + mode and may raise ``OSError`` (``EPERM``/``EACCES``/``WinError 1314``). + The shared-infra test suite uses the same pattern. + """ + probe_target = tmp_path / "_symlink_probe_target" + probe_target.mkdir(exist_ok=True) + probe_link = tmp_path / "_symlink_probe_link" + try: + probe_link.symlink_to(probe_target, target_is_directory=True) + except (OSError, NotImplementedError) as exc: + pytest.skip(f"Symlink creation not supported in this environment: {exc}") + finally: + if probe_link.exists() or probe_link.is_symlink(): + try: + probe_link.unlink() + except OSError: + pass + TRAVERSAL_PAYLOADS = [ "../pwned", "../../etc/passwd", @@ -118,6 +142,7 @@ class TestExtensionAddFromSymlinkedCache: ) def test_symlinked_ancestor_is_refused(self, tmp_path, monkeypatch, ancestor_parts): """Symlinking any cache ancestor outside the project must abort the command.""" + _require_symlinks(tmp_path) project_dir = tmp_path / "project" project_dir.mkdir() outside = tmp_path / "outside" @@ -192,41 +217,121 @@ def fake_resolve(self, *a, **kw): class TestExtensionAddFromTOCTOUWrite: - """The download write must not follow a symlink swapped in after validation.""" + """The download write must not follow a symlink swapped in after validation. - def test_swapped_zip_path_symlink_is_refused(self, project_dir): + These tests rely on POSIX ``O_NOFOLLOW`` + ``dir_fd``-relative open and on + the runner being able to create symlinks. Skipped on platforms where either + is unavailable (notably Windows without elevated privileges). + """ + + def test_swapped_zip_path_symlink_is_refused(self, tmp_path, monkeypatch): """If zip_path is replaced by a symlink between validation and write, refuse.""" - cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" - outside = project_dir.parent / "outside" - outside.mkdir(exist_ok=True) + if not getattr(os, "O_NOFOLLOW", 0) or os.open not in os.supports_dir_fd: + pytest.skip("Requires POSIX O_NOFOLLOW + dir_fd support") + _require_symlinks(tmp_path) + project_dir = tmp_path / "project" + project_dir.mkdir() + (project_dir / ".specify").mkdir() + outside = tmp_path / "outside" + outside.mkdir() target = outside / "stolen.bin" + monkeypatch.chdir(project_dir) - # Patch os.open to simulate the TOCTOU window: just before the real - # os.open runs, replace the target path with a symlink pointing outside. - # With O_NOFOLLOW the os.open call must raise instead of writing through. - import os as _os - real_open = _os.open + from specify_cli import _safe_open_download_zip as _orig_safe_open - def racing_open(path, flags, mode=0o777): + def swap_then_open(project_root, download_dir, zip_filename): + # Race: pre-stage a symlink at the leaf inside the validated cache. + leaf = download_dir / zip_filename try: - p = Path(path) - if p.parent == cache_dir and not p.exists(): - p.symlink_to(target) - except OSError: - pass - return real_open(path, flags, mode) + leaf.symlink_to(target) + except (OSError, NotImplementedError): + pytest.skip("Cannot create symlink for leaf-swap race") + return _orig_safe_open(project_root, download_dir, zip_filename) + + with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install, \ + patch("specify_cli._safe_open_download_zip", side_effect=swap_then_open): + result = runner.invoke( + app, + ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], + ) + + assert result.exit_code != 0 + mock_install.assert_not_called() + # The write must not have followed the symlink to `outside`. + assert not target.exists(), "write followed swapped leaf symlink" + + def test_swapped_ancestor_symlink_is_refused(self, tmp_path, monkeypatch): + """If a *cache ancestor* is replaced by a symlink between validation and write, refuse. + + Covers the case `O_NOFOLLOW` alone misses: it only protects the final + component, but the per-ancestor walk in ``_safe_open_download_zip`` + (using ``dir_fd`` + ``O_NOFOLLOW`` on each component) catches this. + """ + if not getattr(os, "O_NOFOLLOW", 0) or os.open not in os.supports_dir_fd: + pytest.skip("Requires POSIX O_NOFOLLOW + dir_fd support") + _require_symlinks(tmp_path) + project_dir = tmp_path / "project" + project_dir.mkdir() + (project_dir / ".specify").mkdir() + outside = tmp_path / "outside" + outside.mkdir() + monkeypatch.chdir(project_dir) + + # Pre-create the cache ancestors so the production validation walk + # passes (the swap happens *after* validation, before the write). + cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" + cache_dir.mkdir(parents=True) + ancestor_to_swap = project_dir / ".specify" / "extensions" / ".cache" + + from specify_cli import _safe_open_download_zip as _orig_safe_open + + def swap_then_open(project_root, download_dir, zip_filename): + # Race: replace `.cache` with a symlink to `outside` after the + # caller's per-ancestor validation finished. + import shutil + shutil.rmtree(ancestor_to_swap) + try: + ancestor_to_swap.symlink_to(outside, target_is_directory=True) + except (OSError, NotImplementedError): + pytest.skip("Cannot create symlink for ancestor-swap race") + return _orig_safe_open(project_root, download_dir, zip_filename) with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install, \ - patch("os.open", side_effect=racing_open): + patch("specify_cli._safe_open_download_zip", side_effect=swap_then_open): + result = runner.invoke( + app, + ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], + ) + + assert result.exit_code != 0 + # Clean CLI error from the OSError handler, not a raw traceback. + assert "could not safely create download file" in (result.output or "").lower() \ + or "failed to write download cache" in (result.output or "").lower() + mock_install.assert_not_called() + # No download leaked through the swapped ancestor symlink. + assert list(outside.iterdir()) == [] + + +class TestExtensionAddFromCacheAncestorIsFile: + """A non-directory file at a cache-ancestor path must be refused with a clean error.""" + + def test_file_at_extensions_path_is_rejected_cleanly(self, tmp_path, monkeypatch): + project_dir = tmp_path / "project" + project_dir.mkdir() + (project_dir / ".specify").mkdir() + # Place a regular file where `.specify/extensions/` would be created. + (project_dir / ".specify" / "extensions").write_text("i am a file") + monkeypatch.chdir(project_dir) + + with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ + patch("specify_cli.extensions.ExtensionManager.install_from_zip") as mock_install: result = runner.invoke( app, ["extension", "add", "my-ext", "--from", "https://example.com/ext.zip"], ) - # Either O_NOFOLLOW (POSIX) or O_EXCL (all platforms) must reject the - # swapped symlink. The command must fail and nothing must be written - # through the symlink to `outside`. assert result.exit_code != 0 + assert "not a directory" in (result.output or "").lower() mock_install.assert_not_called() - assert not target.exists(), "write followed swapped symlink" From 4b1a13ff594131fdef751c16444cdcf11719aa9d Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 15:45:42 -0500 Subject: [PATCH 08/10] address review round 7: post-create resolve, mkdir error wrap, more payloads Production: - Wrap the entire ancestor validation/create loop in OSError handling so create-time races (PermissionError, EEXIST from another process) surface as a clean CLI error instead of an unhandled exception. - Re-resolve every newly created component against project_root_resolved too (not just existing ones). A parent swapped to a junction / mount point / symlink during creation can land the new directory outside the project even when the new component itself is not a symlink. Mirrors shared_infra._ensure_safe_shared_directory:115-118. - Add a final 'download_dir.resolve() under project_root_resolved' guard immediately before opening the ZIP. Defends the Windows fallback path of _safe_open_download_zip (no dir_fd/O_NOFOLLOW) against an ancestor swap between validation and write that the existing zip_path.resolve().relative_to(download_dir.resolve()) check would silently follow. Tests: - Add deep relative traversal payload (../../../../../etc/shadow) that escapes from the 4-level cache directory all the way past project_root. - Add Windows-relevant absolute payloads: C:\tmp\evil, C:/tmp/evil, and \\server\share\evil. CI runs these on windows-latest. - Extract _is_absolute_like() helper covering POSIX absolute, Windows drive-letter, and UNC roots; use it to filter the delete-side test so sentinel creation never escapes the pytest sandbox on any platform. --- src/specify_cli/__init__.py | 68 +++++++++++++++------- tests/test_extension_add_path_traversal.py | 36 ++++++++++-- 2 files changed, 80 insertions(+), 24 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 220cc3add0..36e01d200b 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3687,41 +3687,69 @@ def extension_add( # (consistent with shared_infra._ensure_safe_shared_directory): # `mkdir(parents=True)` would silently traverse a symlinked ancestor, # so each component is checked and created individually instead. - # Each existing component is also re-resolved and required to land - # under project_root, which catches non-symlink directory aliases - # (e.g. Windows junctions / mount points) that resolve outside. + # Each component (existing or newly created) is also re-resolved + # and required to land under project_root, which catches + # non-symlink directory aliases (e.g. Windows junctions / mount + # points) that resolve outside, including ones that may appear + # mid-creation via a parent swap. import uuid as _uuid download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" project_root_resolved = project_root.resolve() - _current = project_root - for _part in download_dir.relative_to(project_root).parts: - _current = _current / _part - if _current.is_symlink(): - console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") - raise typer.Exit(1) - if _current.exists(): - if not _current.is_dir(): - console.print( - f"[red]Error:[/red] Download cache path is not a directory: {_current}" - ) + try: + _current = project_root + for _part in download_dir.relative_to(project_root).parts: + _current = _current / _part + if _current.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + raise typer.Exit(1) + if _current.exists(): + if not _current.is_dir(): + console.print( + f"[red]Error:[/red] Download cache path is not a directory: {_current}" + ) + raise typer.Exit(1) + try: + _current.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + console.print("[red]Error:[/red] Download cache directory escapes project root") + raise typer.Exit(1) + continue + _current.mkdir() + if _current.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") raise typer.Exit(1) + # Post-create containment check: a parent swapped to a + # junction / mount / symlink during creation could land + # the new directory outside project_root even though + # this component itself is not a symlink. try: _current.resolve().relative_to(project_root_resolved) except (OSError, ValueError): console.print("[red]Error:[/red] Download cache directory escapes project root") raise typer.Exit(1) - continue - _current.mkdir() - if _current.is_symlink(): - console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") - raise typer.Exit(1) + except OSError as _mkdir_err: + console.print( + f"[red]Error:[/red] Could not prepare download cache directory: {_mkdir_err}" + ) + raise typer.Exit(1) safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" safe_name = safe_name[:64] # cap length to avoid filesystem errors zip_filename = f"{safe_name}-{_uuid.uuid4().hex[:8]}.zip" zip_path = download_dir / zip_filename - # Guard: resolved path must stay inside download_dir (CWE-22) + # Guard: resolved zip_path must stay inside the validated cache + # directory *and* the cache directory must still resolve under + # project_root. The second check defends the Windows fallback + # path of `_safe_open_download_zip` (no `dir_fd`/`O_NOFOLLOW`) + # against an ancestor swapped to a junction/symlink between the + # validation loop above and this point: comparing only against + # `download_dir.resolve()` would silently follow the redirect. + try: + download_dir.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + console.print("[red]Error:[/red] Download cache directory escapes project root") + raise typer.Exit(1) try: zip_path.resolve().relative_to(download_dir.resolve()) except ValueError: diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index 2e2019d600..b8199bec35 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -39,6 +39,23 @@ def _require_symlinks(tmp_path) -> None: except OSError: pass +def _is_absolute_like(payload: str) -> bool: + """Detect payloads that may be treated as an absolute path on any supported OS. + + Used to skip the delete-side regression test for those payloads, since + constructing a sentinel relative to ``cache_dir`` for an absolute-like + payload would land outside the pytest sandbox (especially on Windows + where ``Path('C:\\\\...')`` and ``\\\\\\\\server\\\\share`` are roots). + The write-side test still covers containment for these cases. + """ + if payload.startswith(("/", "\\")): + return True + # Drive letter forms: C:\..., C:/... + if len(payload) >= 2 and payload[1] == ":" and payload[0].isalpha(): + return True + return False + + TRAVERSAL_PAYLOADS = [ "../pwned", "../../etc/passwd", @@ -46,6 +63,16 @@ def _require_symlinks(tmp_path) -> None: "..\\pwned", "..\\..\\etc\\passwd", "/tmp/evil", + # Deep relative traversal long enough to escape from + # `.specify/extensions/.cache/downloads` (4 segments) all the way out + # of project_root and beyond. + "../../../../../etc/shadow", + # Windows absolute path forms. The CI matrix runs these tests on + # windows-latest, so the sanitiser must reject drive-letter and UNC + # roots in addition to POSIX absolute paths. + "C:\\tmp\\evil", + "C:/tmp/evil", + "\\\\server\\share\\evil", ] @@ -85,13 +112,14 @@ def test_traversal_payload_writes_inside_cache(self, project_dir, bad_name): zip_arg = mock_install.call_args[0][0] # positional arg: zip_path zip_arg.resolve().relative_to(cache_dir.resolve()) # raises ValueError if outside - @pytest.mark.parametrize("bad_name", [p for p in TRAVERSAL_PAYLOADS if not p.startswith(("/", "\\"))]) + @pytest.mark.parametrize("bad_name", [p for p in TRAVERSAL_PAYLOADS if not _is_absolute_like(p)]) def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_name): """The finally-block cleanup must not delete files outside the cache dir. - Absolute-path payloads are excluded here to avoid writing sentinels - outside the pytest sandbox (e.g. ``/tmp``); the write-side test above - already proves the production guard contains absolute payloads. + Absolute-like payloads (POSIX absolute, Windows drive letter, UNC) are + excluded here to avoid writing sentinels outside the pytest sandbox; + the write-side test above already proves the production guard contains + absolute payloads. """ # Place a sentinel at the path the pre-fix code would have constructed cache_dir = project_dir / ".specify" / "extensions" / ".cache" / "downloads" From b3c21d3f3fccb657d3332f8460b54db94d6d26f4 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 16:21:48 -0500 Subject: [PATCH 09/10] address review round 8: delay manager construction, narrow handlers, safe unlink - Move ExtensionManager construction after cache validation for --from installs so .specify/extensions/.registry is never read through a symlinked ancestor (review comment 5). - Add _validate_safe_cache_dir helper (extracted from inline walk) and _safe_unlink_download_zip helper that mirrors _safe_open_download_zip with dir_fd + O_NOFOLLOW on POSIX and re-validation on Windows, so cleanup cannot follow a swapped cache ancestor (review comment 2). - Harden _safe_open_download_zip Windows fallback to re-walk the cache ancestor chain immediately before opening the leaf, narrowing the validation->write TOCTOU window on platforms without dir_fd / O_NOFOLLOW (review comment 1). - Narrow the OSError handler around the cache write so it no longer wraps manager.install_from_zip(); install errors now propagate to the generic ExtensionError handler with their proper messages instead of being misreported as 'Failed to write download cache' (review comment 3). - Fix delete-side regression test to capture the runner result and assert exit_code == 0 before checking the sentinel, so a regression that crashes the install before cleanup cannot pass silently (review comment 4). --- src/specify_cli/__init__.py | 292 +++++++++++++++------ tests/test_extension_add_path_traversal.py | 6 +- 2 files changed, 214 insertions(+), 84 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 36e01d200b..e380afa2a1 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3329,6 +3329,57 @@ def _resolve_installed_extension( raise typer.Exit(1) +def _validate_safe_cache_dir(project_root: Path) -> Path: + """Validate (and create) the per-project download cache directory safely. + + Walks ``.specify/extensions/.cache/downloads`` one component at a time, + refusing symlinked components and re-resolving each existing/new component + under ``project_root`` to catch non-symlink directory aliases (Windows + junctions / mount points) and mid-creation parent swaps. ``mkdir(parents= + True)`` would silently traverse a symlinked ancestor, so each component is + checked and created individually instead. + + Returns the validated download directory. Raises ``typer.Exit`` on any + safety violation (after printing a user-facing error). + """ + download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" + project_root_resolved = project_root.resolve() + try: + current = project_root + for part in download_dir.relative_to(project_root).parts: + current = current / part + if current.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + raise typer.Exit(1) + if current.exists(): + if not current.is_dir(): + console.print( + f"[red]Error:[/red] Download cache path is not a directory: {current}" + ) + raise typer.Exit(1) + try: + current.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + console.print("[red]Error:[/red] Download cache directory escapes project root") + raise typer.Exit(1) + continue + current.mkdir() + if current.is_symlink(): + console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") + raise typer.Exit(1) + try: + current.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + console.print("[red]Error:[/red] Download cache directory escapes project root") + raise typer.Exit(1) + except OSError as exc: + console.print( + f"[red]Error:[/red] Could not prepare download cache directory: {exc}" + ) + raise typer.Exit(1) + return download_dir + + def _safe_open_download_zip(project_root: Path, download_dir: Path, zip_filename: str) -> int: """Open ``download_dir / zip_filename`` for writing without following any symlink. @@ -3339,10 +3390,13 @@ def _safe_open_download_zip(project_root: Path, download_dir: Path, zip_filename ``O_EXCL`` so the unique UUID filename cannot collide with anything an attacker pre-staged. - On platforms without ``dir_fd``/``O_NOFOLLOW`` support (Windows), falls - back to a plain ``O_EXCL`` open. Symlink creation on Windows requires - elevated privileges, and the per-component ancestor walk performed by the - caller already covers the realistic threat surface there. + On platforms without ``dir_fd``/``O_NOFOLLOW`` support (Windows), the + function performs an immediate per-component ancestor walk (re-checking + ``is_symlink`` and re-resolving each component under ``project_root``) + *just before* the leaf open. This narrows the TOCTOU window between the + earlier caller-side validation and the actual write so a swap performed in + that window is still caught. Symlink creation on Windows additionally + requires elevated privileges. Returns an open file descriptor; the caller owns and must close it. Raises ``OSError`` (e.g. ``ELOOP``, ``EEXIST``, ``EACCES``) if any @@ -3352,8 +3406,23 @@ def _safe_open_download_zip(project_root: Path, download_dir: Path, zip_filename o_directory = getattr(os, "O_DIRECTORY", 0) leaf_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | o_nofollow - # Windows / no openat support: best-effort O_EXCL open. + # Windows / no openat support: re-validate ancestors immediately before + # opening so a swap in the (caller-validation -> open) window is caught. if os.open not in os.supports_dir_fd: + project_root_resolved = project_root.resolve() + current = project_root + for part in download_dir.relative_to(project_root).parts: + current = current / part + if current.is_symlink(): + raise OSError( + f"Refusing to follow symlinked cache component: {current}" + ) + try: + current.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + raise OSError( + f"Cache component escapes project root: {current}" + ) return os.open(download_dir / zip_filename, leaf_flags, 0o600) rel_parts = download_dir.relative_to(project_root).parts @@ -3370,6 +3439,74 @@ def _safe_open_download_zip(project_root: Path, download_dir: Path, zip_filename os.close(parent_fd) +def _safe_unlink_download_zip( + project_root: Path, download_dir: Path, zip_filename: str +) -> None: + """Best-effort unlink of the downloaded ZIP without following symlinks. + + Mirrors ``_safe_open_download_zip``: on POSIX walks each cache ancestor + with ``dir_fd`` + ``O_NOFOLLOW`` and unlinks via + ``os.unlink(zip_filename, dir_fd=parent_fd)`` so an attacker who swaps a + cache ancestor between the write and the cleanup cannot redirect the + unlink onto a same-named file outside the project. On Windows (no + ``dir_fd``/``O_NOFOLLOW``), re-validates the cache ancestor chain via + :func:`_validate_safe_cache_dir`-equivalent checks before unlinking. + + Errors are swallowed; this is best-effort cleanup. + """ + o_nofollow = getattr(os, "O_NOFOLLOW", 0) + o_directory = getattr(os, "O_DIRECTORY", 0) + + if os.open not in os.supports_dir_fd: + # Windows / no openat support: re-validate immediately before unlink. + project_root_resolved = project_root.resolve() + current = project_root + for part in download_dir.relative_to(project_root).parts: + current = current / part + if current.is_symlink(): + return + if not current.exists(): + return + try: + current.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + return + zip_path = download_dir / zip_filename + try: + if zip_path.is_symlink(): + return + if zip_path.exists(): + zip_path.unlink() + except OSError: + pass + return + + rel_parts = download_dir.relative_to(project_root).parts + try: + parent_fd = os.open(project_root, os.O_RDONLY | o_directory) + except OSError: + return + try: + for part in rel_parts: + try: + new_fd = os.open( + part, os.O_RDONLY | o_directory | o_nofollow, dir_fd=parent_fd + ) + except OSError: + return + os.close(parent_fd) + parent_fd = new_fd + try: + os.unlink(zip_filename, dir_fd=parent_fd) + except OSError: + pass + finally: + try: + os.close(parent_fd) + except OSError: + pass + + def _resolve_catalog_extension( argument: str, catalog, @@ -3644,6 +3781,16 @@ def extension_add( console.print("[red]Error:[/red] Priority must be a positive integer (1 or higher)") raise typer.Exit(1) + # For URL installs, validate (and create) the download cache *before* + # constructing ExtensionManager. The manager's constructor reads + # ``.specify/extensions/.registry`` via ``ExtensionRegistry``; if a + # ``.specify/extensions`` ancestor is symlinked, that read would follow + # the symlink before any guard ran. Performing the safe per-component + # walk first ensures the manager only ever opens registry files inside a + # validated, project-contained tree. + if from_url: + _download_dir = _validate_safe_cache_dir(project_root) + manager = ExtensionManager(project_root) speckit_version = get_speckit_version() @@ -3682,74 +3829,21 @@ def extension_add( console.print("Only install extensions from sources you trust.\n") console.print(f"Downloading from {from_url}...") - # Download ZIP to temp location. - # Validate and create each ancestor without following symlinks - # (consistent with shared_infra._ensure_safe_shared_directory): - # `mkdir(parents=True)` would silently traverse a symlinked ancestor, - # so each component is checked and created individually instead. - # Each component (existing or newly created) is also re-resolved - # and required to land under project_root, which catches - # non-symlink directory aliases (e.g. Windows junctions / mount - # points) that resolve outside, including ones that may appear - # mid-creation via a parent swap. + # The download cache directory has already been safely + # validated/created above (before ExtensionManager construction) + # via `_validate_safe_cache_dir(project_root)`. Reuse the same + # validated `download_dir` here. import uuid as _uuid - download_dir = project_root / ".specify" / "extensions" / ".cache" / "downloads" + download_dir = _download_dir project_root_resolved = project_root.resolve() - try: - _current = project_root - for _part in download_dir.relative_to(project_root).parts: - _current = _current / _part - if _current.is_symlink(): - console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") - raise typer.Exit(1) - if _current.exists(): - if not _current.is_dir(): - console.print( - f"[red]Error:[/red] Download cache path is not a directory: {_current}" - ) - raise typer.Exit(1) - try: - _current.resolve().relative_to(project_root_resolved) - except (OSError, ValueError): - console.print("[red]Error:[/red] Download cache directory escapes project root") - raise typer.Exit(1) - continue - _current.mkdir() - if _current.is_symlink(): - console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") - raise typer.Exit(1) - # Post-create containment check: a parent swapped to a - # junction / mount / symlink during creation could land - # the new directory outside project_root even though - # this component itself is not a symlink. - try: - _current.resolve().relative_to(project_root_resolved) - except (OSError, ValueError): - console.print("[red]Error:[/red] Download cache directory escapes project root") - raise typer.Exit(1) - except OSError as _mkdir_err: - console.print( - f"[red]Error:[/red] Could not prepare download cache directory: {_mkdir_err}" - ) - raise typer.Exit(1) safe_name = Path(extension).name.replace("/", "_").replace("\\", "_") or "download" safe_name = safe_name[:64] # cap length to avoid filesystem errors zip_filename = f"{safe_name}-{_uuid.uuid4().hex[:8]}.zip" zip_path = download_dir / zip_filename - # Guard: resolved zip_path must stay inside the validated cache - # directory *and* the cache directory must still resolve under - # project_root. The second check defends the Windows fallback - # path of `_safe_open_download_zip` (no `dir_fd`/`O_NOFOLLOW`) - # against an ancestor swapped to a junction/symlink between the - # validation loop above and this point: comparing only against - # `download_dir.resolve()` would silently follow the redirect. - try: - download_dir.resolve().relative_to(project_root_resolved) - except (OSError, ValueError): - console.print("[red]Error:[/red] Download cache directory escapes project root") - raise typer.Exit(1) + # Containment guard: the resolved zip_path must stay inside the + # validated cache directory. try: zip_path.resolve().relative_to(download_dir.resolve()) except ValueError: @@ -3761,15 +3855,31 @@ def extension_add( with _open_url(from_url, timeout=60) as response: zip_data = response.read() + except urllib.error.URLError as e: + console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}") + raise typer.Exit(1) + + # Re-validate cache containment immediately before opening the + # write fd. This narrows the (validation -> write) TOCTOU + # window for the Windows fallback path of + # `_safe_open_download_zip` (which has no `dir_fd`/ + # `O_NOFOLLOW`); on POSIX, the helper also re-walks ancestors + # with `dir_fd + O_NOFOLLOW` so any swap is rejected at open + # time as well. + try: + download_dir.resolve().relative_to(project_root_resolved) + except (OSError, ValueError): + console.print("[red]Error:[/red] Download cache directory escapes project root") + raise typer.Exit(1) - # Open the ZIP file for writing without following any symlink - # in the path. On POSIX, walk each ancestor of `download_dir` - # using dir_fd + O_NOFOLLOW so a symlink swap of *any* - # component (including ancestors) between validation above - # and this write is rejected. On Windows there is no - # `dir_fd` / `O_NOFOLLOW` support, so we fall back to a - # plain O_EXCL open which still requires the unique UUID - # filename to be freshly created. + try: + # Open the ZIP file for writing without following any + # symlink in the path. On POSIX, the helper walks each + # ancestor of `download_dir` using dir_fd + O_NOFOLLOW so + # a symlink swap of *any* component (including ancestors) + # between validation and this write is rejected. On + # Windows the helper re-walks the ancestor chain + # immediately before opening to narrow the same window. try: _fd = _safe_open_download_zip( project_root, download_dir, zip_filename @@ -3790,19 +3900,35 @@ def extension_add( except OSError: pass raise - - # Install from downloaded ZIP - manifest = manager.install_from_zip(zip_path, speckit_version, priority=priority) - except urllib.error.URLError as e: - console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}") - raise typer.Exit(1) except OSError as e: - console.print(f"[red]Error:[/red] Failed to write download cache: {e}") + # Narrow handler: only covers the cache-write fd + # acquisition + write. Errors from `install_from_zip` + # (e.g. corrupted archive, manifest validation) are + # surfaced separately below so they aren't misreported as + # "Failed to write download cache". + console.print( + f"[red]Error:[/red] Failed to write download cache: {e}" + ) raise typer.Exit(1) + + try: + # Install from downloaded ZIP. Errors from the manager + # (ValidationError / CompatibilityError / ExtensionError) + # are caught by the outer handler so users see the + # extension-specific message rather than a generic + # "Failed to write download cache". + manifest = manager.install_from_zip( + zip_path, speckit_version, priority=priority + ) finally: - # Clean up downloaded ZIP - if zip_path.exists(): - zip_path.unlink() + # Best-effort cleanup of the downloaded ZIP. Uses a + # symlink-safe walk so an attacker who swaps a cache + # ancestor between the write and the unlink cannot + # redirect the delete onto a same-named file outside the + # project root. + _safe_unlink_download_zip( + project_root, download_dir, zip_filename + ) else: # Try bundled extensions first (shipped with spec-kit) diff --git a/tests/test_extension_add_path_traversal.py b/tests/test_extension_add_path_traversal.py index b8199bec35..789b51432a 100644 --- a/tests/test_extension_add_path_traversal.py +++ b/tests/test_extension_add_path_traversal.py @@ -130,10 +130,14 @@ def test_traversal_payload_cannot_delete_outside_cache(self, project_dir, bad_na with patch("specify_cli.authentication.http.open_url", return_value=_mock_open_url()), \ patch("specify_cli.extensions.ExtensionManager.install_from_zip", return_value=MagicMock(id="x", name="X", version="1.0.0")): - runner.invoke( + result = runner.invoke( app, ["extension", "add", bad_name, "--from", "https://example.com/ext.zip"], ) + # The command must succeed: a non-zero exit would mean the install + # failed before reaching cleanup, which would leave the sentinel + # untouched for the wrong reason and mask any cleanup-side regression. + assert result.exit_code == 0, result.output assert pre_fix_path.exists(), f"Sentinel deleted by cleanup: {pre_fix_path}" assert pre_fix_path.read_text() == "sentinel" From 61e23acc5921458d3aede81815d08e3d3c26e3d4 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Fri, 15 May 2026 17:01:36 -0500 Subject: [PATCH 10/10] address review round 9: concurrent-safe mkdir, write-phase cleanup, fail-closed Windows fallback - Make _validate_safe_cache_dir tolerate concurrent installs: catch FileExistsError from current.mkdir() and re-run is_dir / symlink / containment checks against whatever is now there. Two parallel 'extension add --from' processes can no longer fatally race on cache directory creation (review comment 1). - Restructure the URL-install pipeline so _safe_unlink_download_zip runs in a single outer finally that wraps both the cache write and install_from_zip. A failed _zf.write() (e.g. ENOSPC) now triggers cleanup of the partial ZIP instead of leaving it behind (review comment 2). The narrow per-phase except blocks still classify which phase failed so error messages remain accurate. - Make _safe_open_download_zip fail closed on platforms without dir_fd / O_NOFOLLOW: there is no standard-library primitive that atomically binds a leaf open to a previously-validated ancestor chain, so a path-based open after a separate validation walk is racy (junction creation on Windows does not require elevation). Surface a clear error directing users to --dev or catalog installs (review comment 3). - Make _safe_unlink_download_zip a no-op on the same platforms for the symmetric reason: a path-based unlink could be redirected through a swapped ancestor onto a same-named file outside the project. In practice this branch is unreachable in the normal install flow because the open helper has already failed closed, so no ZIP exists to clean up (review comment 4). --- src/specify_cli/__init__.py | 225 +++++++++++++++++++----------------- 1 file changed, 121 insertions(+), 104 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index e380afa2a1..2ec344361e 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -3363,7 +3363,20 @@ def _validate_safe_cache_dir(project_root: Path) -> Path: console.print("[red]Error:[/red] Download cache directory escapes project root") raise typer.Exit(1) continue - current.mkdir() + # Race-tolerant create: a concurrent `extension add --from` + # process may have created the same component between the + # `current.exists()` check above and this line. Treat + # `FileExistsError` as success and re-run the symlink / + # containment / is_dir checks against whatever is now there. + try: + current.mkdir() + except FileExistsError: + pass + if not current.is_dir(): + console.print( + f"[red]Error:[/red] Download cache path is not a directory: {current}" + ) + raise typer.Exit(1) if current.is_symlink(): console.print("[red]Error:[/red] Refusing to use symlinked download cache directory") raise typer.Exit(1) @@ -3385,45 +3398,39 @@ def _safe_open_download_zip(project_root: Path, download_dir: Path, zip_filename Closes the TOCTOU window between cache-ancestor validation and the actual file write. On POSIX, walks each component of ``download_dir`` using - ``dir_fd`` + ``O_NOFOLLOW`` so a symlink swap of *any* component (including - ancestors) after validation is rejected. The leaf is created with - ``O_EXCL`` so the unique UUID filename cannot collide with anything an - attacker pre-staged. - - On platforms without ``dir_fd``/``O_NOFOLLOW`` support (Windows), the - function performs an immediate per-component ancestor walk (re-checking - ``is_symlink`` and re-resolving each component under ``project_root``) - *just before* the leaf open. This narrows the TOCTOU window between the - earlier caller-side validation and the actual write so a swap performed in - that window is still caught. Symlink creation on Windows additionally - requires elevated privileges. + ``dir_fd`` + ``O_NOFOLLOW`` so a symlink swap of *any* component + (including ancestors) after validation is rejected. The leaf is created + with ``O_EXCL`` so the unique UUID filename cannot collide with anything + an attacker pre-staged. + + On platforms without ``dir_fd``/``O_NOFOLLOW`` support (notably Windows), + no atomic primitive exists in the standard library to bind the leaf open + to a previously-validated ancestor chain. A path-based open after a + separate validation walk is racy: an attacker who can write inside the + project (junction creation on Windows does not require elevation) can + swap a cache ancestor to a junction between the validation loop and the + leaf open, redirecting the write outside the project root. To avoid that + silent escape, this helper fails closed on such platforms instead. + Callers should surface a clear error and direct users to ``--dev`` or + catalog-based installs as alternatives. Returns an open file descriptor; the caller owns and must close it. Raises ``OSError`` (e.g. ``ELOOP``, ``EEXIST``, ``EACCES``) if any - component is a symlink or the leaf already exists. + component is a symlink or the leaf already exists, or + ``NotImplementedError`` on platforms lacking ``dir_fd`` support. """ o_nofollow = getattr(os, "O_NOFOLLOW", 0) o_directory = getattr(os, "O_DIRECTORY", 0) leaf_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | o_nofollow - # Windows / no openat support: re-validate ancestors immediately before - # opening so a swap in the (caller-validation -> open) window is caught. if os.open not in os.supports_dir_fd: - project_root_resolved = project_root.resolve() - current = project_root - for part in download_dir.relative_to(project_root).parts: - current = current / part - if current.is_symlink(): - raise OSError( - f"Refusing to follow symlinked cache component: {current}" - ) - try: - current.resolve().relative_to(project_root_resolved) - except (OSError, ValueError): - raise OSError( - f"Cache component escapes project root: {current}" - ) - return os.open(download_dir / zip_filename, leaf_flags, 0o600) + # Fail closed: see docstring. No safe primitive available here. + raise NotImplementedError( + "URL-based extension installs require POSIX-style " + "dir_fd + O_NOFOLLOW support, which is not available on this " + "platform. Use --dev for a local directory, or install from " + "the bundled catalog instead." + ) rel_parts = download_dir.relative_to(project_root).parts parent_fd = os.open(project_root, os.O_RDONLY | o_directory) @@ -3444,13 +3451,21 @@ def _safe_unlink_download_zip( ) -> None: """Best-effort unlink of the downloaded ZIP without following symlinks. - Mirrors ``_safe_open_download_zip``: on POSIX walks each cache ancestor - with ``dir_fd`` + ``O_NOFOLLOW`` and unlinks via - ``os.unlink(zip_filename, dir_fd=parent_fd)`` so an attacker who swaps a - cache ancestor between the write and the cleanup cannot redirect the - unlink onto a same-named file outside the project. On Windows (no - ``dir_fd``/``O_NOFOLLOW``), re-validates the cache ancestor chain via - :func:`_validate_safe_cache_dir`-equivalent checks before unlinking. + On POSIX, walks each cache ancestor with ``dir_fd`` + ``O_NOFOLLOW`` and + unlinks via ``os.unlink(zip_filename, dir_fd=parent_fd)`` so an attacker + who swaps a cache ancestor between the write and the cleanup cannot + redirect the unlink onto a same-named file outside the project. + + On platforms without ``dir_fd``/``O_NOFOLLOW`` support, this function is + a no-op for the same reason :func:`_safe_open_download_zip` fails closed + there: a path-based ``unlink`` after a separate validation walk is racy + and could be redirected through a swapped ancestor onto a same-named + file outside the project. Skipping cleanup leaves a stale ZIP in the + cache directory but does not leak deletes; subsequent installs use + fresh UUID filenames so the stale file is harmless. (In practice this + branch is unreachable in the normal install flow because + :func:`_safe_open_download_zip` already fails closed before any ZIP is + written.) Errors are swallowed; this is best-effort cleanup. """ @@ -3458,27 +3473,7 @@ def _safe_unlink_download_zip( o_directory = getattr(os, "O_DIRECTORY", 0) if os.open not in os.supports_dir_fd: - # Windows / no openat support: re-validate immediately before unlink. - project_root_resolved = project_root.resolve() - current = project_root - for part in download_dir.relative_to(project_root).parts: - current = current / part - if current.is_symlink(): - return - if not current.exists(): - return - try: - current.resolve().relative_to(project_root_resolved) - except (OSError, ValueError): - return - zip_path = download_dir / zip_filename - try: - if zip_path.is_symlink(): - return - if zip_path.exists(): - zip_path.unlink() - except OSError: - pass + # Fail closed: see docstring. return rel_parts = download_dir.relative_to(project_root).parts @@ -3860,58 +3855,76 @@ def extension_add( raise typer.Exit(1) # Re-validate cache containment immediately before opening the - # write fd. This narrows the (validation -> write) TOCTOU - # window for the Windows fallback path of - # `_safe_open_download_zip` (which has no `dir_fd`/ - # `O_NOFOLLOW`); on POSIX, the helper also re-walks ancestors - # with `dir_fd + O_NOFOLLOW` so any swap is rejected at open - # time as well. + # write fd. On POSIX, `_safe_open_download_zip` additionally + # re-walks ancestors with `dir_fd + O_NOFOLLOW` so any swap is + # rejected at open time as well; on platforms without that + # support the helper fails closed (see its docstring). try: download_dir.resolve().relative_to(project_root_resolved) except (OSError, ValueError): console.print("[red]Error:[/red] Download cache directory escapes project root") raise typer.Exit(1) + # Outer try/finally guarantees the symlink-safe cleanup runs + # regardless of where in the (open -> write -> install) + # pipeline a failure occurs, including a partial write that + # raises OSError after the fd has been opened. The narrower + # try/except blocks below classify *which* phase failed so + # error messages remain accurate. try: - # Open the ZIP file for writing without following any - # symlink in the path. On POSIX, the helper walks each - # ancestor of `download_dir` using dir_fd + O_NOFOLLOW so - # a symlink swap of *any* component (including ancestors) - # between validation and this write is rejected. On - # Windows the helper re-walks the ancestor chain - # immediately before opening to narrow the same window. try: - _fd = _safe_open_download_zip( - project_root, download_dir, zip_filename - ) - except OSError as _open_err: + # Open the ZIP file for writing without following any + # symlink in the path. On POSIX, the helper walks + # each ancestor of `download_dir` using + # dir_fd + O_NOFOLLOW so a symlink swap of *any* + # component (including ancestors) between validation + # and this write is rejected. On platforms without + # dir_fd / O_NOFOLLOW (Windows), the helper raises + # NotImplementedError; URL installs require the + # POSIX-style atomic primitives to be safe against + # ancestor swaps. + try: + _fd = _safe_open_download_zip( + project_root, download_dir, zip_filename + ) + except NotImplementedError as _ni: + console.print(f"[red]Error:[/red] {_ni}") + raise typer.Exit(1) + except OSError as _open_err: + console.print( + f"[red]Error:[/red] Could not safely create download file: {_open_err}" + ) + raise typer.Exit(1) + try: + with os.fdopen(_fd, "wb") as _zf: + _zf.write(zip_data) + except OSError as _write_err: + # Write failed (e.g. ENOSPC, EIO). os.fdopen took + # ownership of the fd; the with-block close will + # have already run on the way out. + console.print( + f"[red]Error:[/red] Failed to write download cache: {_write_err}" + ) + raise typer.Exit(1) + except BaseException: + # Non-OSError failure before/inside the + # with-block: defensively close the fd in case + # os.fdopen never took ownership, then re-raise + # so the outer finally still cleans up. + try: + os.close(_fd) + except OSError: + pass + raise + except OSError as e: + # Catches OSError raised from `_safe_open_download_zip` + # below the typed handlers above; preserves the + # narrow "cache write" framing. console.print( - f"[red]Error:[/red] Could not safely create download file: {_open_err}" + f"[red]Error:[/red] Failed to write download cache: {e}" ) raise typer.Exit(1) - try: - with os.fdopen(_fd, "wb") as _zf: - _zf.write(zip_data) - except BaseException: - # os.fdopen took ownership only on success; on failure - # before the with-block, the fd may still be open. - try: - os.close(_fd) - except OSError: - pass - raise - except OSError as e: - # Narrow handler: only covers the cache-write fd - # acquisition + write. Errors from `install_from_zip` - # (e.g. corrupted archive, manifest validation) are - # surfaced separately below so they aren't misreported as - # "Failed to write download cache". - console.print( - f"[red]Error:[/red] Failed to write download cache: {e}" - ) - raise typer.Exit(1) - try: # Install from downloaded ZIP. Errors from the manager # (ValidationError / CompatibilityError / ExtensionError) # are caught by the outer handler so users see the @@ -3921,11 +3934,15 @@ def extension_add( zip_path, speckit_version, priority=priority ) finally: - # Best-effort cleanup of the downloaded ZIP. Uses a - # symlink-safe walk so an attacker who swaps a cache - # ancestor between the write and the unlink cannot - # redirect the delete onto a same-named file outside the - # project root. + # Symlink-safe cleanup of the downloaded ZIP. Runs on + # every exit path (success, write failure, install + # failure) so a partial ZIP from an interrupted write is + # not left behind. On platforms without dir_fd / + # O_NOFOLLOW the helper is a no-op (fails closed) since + # a path-based unlink there could be redirected through + # a swapped ancestor onto a same-named file outside the + # project; in that case `_safe_open_download_zip` has + # already failed closed, so no ZIP exists to clean up. _safe_unlink_download_zip( project_root, download_dir, zip_filename )