From 7b0d8ce5ed64ad0c336518b4347d5e6ae36fb220 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 22:51:58 -0700 Subject: [PATCH 1/7] Add arch tests for Codex backend architectural invariants - tests/arch/test_env_symmetry.py: Verify build_skill_session_cmd and build_food_truck_cmd share required env vars (xfail: AGENT_BACKEND_ENV_VAR missing from food_truck) - tests/arch/test_no_not_implemented.py: Verify no BACKEND_REGISTRY class raises NotImplementedError (xfail: CodexBackend has two unimplemented methods) - tests/server/test_server_init_gate.py: Add test_tool_list_changes_after_enable_within_session to verify kitchen tag enable propagates to client tool list - tests/arch/CLAUDE.md: Register new test files - resolve-review/SKILL.md: Add to Architectural Constraint Catalog Co-Authored-By: Claude Opus 4.7 --- .../skills_extended/resolve-review/SKILL.md | 2 + tests/arch/CLAUDE.md | 2 + tests/arch/test_env_symmetry.py | 69 +++++++++++++++++++ tests/arch/test_no_not_implemented.py | 41 +++++++++++ tests/server/test_server_init_gate.py | 18 +++++ 5 files changed, 132 insertions(+) create mode 100644 tests/arch/test_env_symmetry.py create mode 100644 tests/arch/test_no_not_implemented.py diff --git a/src/autoskillit/skills_extended/resolve-review/SKILL.md b/src/autoskillit/skills_extended/resolve-review/SKILL.md index 0ea34dc541..9f1470cbf1 100644 --- a/src/autoskillit/skills_extended/resolve-review/SKILL.md +++ b/src/autoskillit/skills_extended/resolve-review/SKILL.md @@ -468,6 +468,8 @@ classified `REJECT` with `category: "arch_violation"`. | Boot step symmetry | `test_boot_step_symmetry.py` | Both boot functions must call all required boot steps in the right order | | BackendCapabilities consumed | `test_capability_consumption.py` | Every `BackendCapabilities` field must have a production consumer — unused capability fields are prohibited | | BackendCapabilities documented | `test_capability_docstrings.py` | `BackendCapabilities` class and every field must have docstrings | +| Env var symmetry | `test_env_symmetry.py` | `build_skill_session_cmd` and `build_food_truck_cmd` must both set required base env vars; `AGENT_BACKEND_ENV_VAR` must appear in food_truck | +| No NotImplementedError in backends | `test_no_not_implemented.py` | Registered backend classes must not raise `NotImplementedError` — `CodingAgentBackend` is a Protocol, not an ABC | | Channel B timeout floor | `test_channel_b_timeout_guard.py` | Channel B calls with `timeout` below `TimeoutTier.CHANNEL_B` minimum | | Clone network timeouts | `test_clone_timeouts.py` | `subprocess.run()` with git network subcommands (clone/fetch/pull/push/ls-remote) in `clone.py` without `timeout=` | | Dispatch timeout resolver | `test_dispatch_timeout_guard.py` | `_run_dispatch` using hardcoded timeout instead of `resolve_dispatch_timeout()` | diff --git a/tests/arch/CLAUDE.md b/tests/arch/CLAUDE.md index 4c05c90f27..45db6c9b5f 100644 --- a/tests/arch/CLAUDE.md +++ b/tests/arch/CLAUDE.md @@ -23,6 +23,7 @@ AST enforcement, sub-package layer contracts, and architectural invariant tests. | `test_backend_protocol_completeness.py` | Protocol completeness tests for CodingAgentBackend command builders | | `test_audit_feature_gates_skill.py` | Structural integrity tests for the audit-feature-gates skill | | `test_eval_agent_skill.py` | Structural integrity tests for the eval-agent skill | +| `test_env_symmetry.py` | Architectural invariant: skill and food-truck builders must set the same required base env vars | | `test_boot_step_symmetry.py` | AST guard: both boot functions (_fleet_auto_gate_boot, _food_truck_auto_gate_boot) must call sweep_stale_dispatch_labels | | `test_bundled_recipes_split.py` | Enforcement: test_bundled_recipes.py split structure guard | | `test_cascade_map_guard.py` | REQ-GUARD-001..003, 005: CI guard validating cascade maps against AST-derived reverse import graph | @@ -54,6 +55,7 @@ AST enforcement, sub-package layer contracts, and architectural invariant tests. | `test_no_error_dict_return.py` | AST guard: load_and_validate must not return dicts with 'error' key — errors flow via exceptions | | `test_flush_no_rid_guard.py` | AST guard: no requestId truthiness guard in flush_session_log turn extraction loop | | `test_no_inline_jsonl_request_id_dedup.py` | AST guard: no inline requestId dedup in session_log.py or tool_sequence_analysis.py | +| `test_no_not_implemented.py` | Architectural invariant: no registered backend may raise NotImplementedError | | `test_resolve_turn_id_guard.py` | AST guard: no direct .get("requestId") calls outside _resolve_turn_id() in tool_sequence_analysis.py | | `test_protocol_names.py` | T5-T6: Protocol naming and DefaultSkillResolver export smoke tests | | `test_provider_profile_contract.py` | Tier 3 contract: _resolve_provider_profile must never return step_name as profile | diff --git a/tests/arch/test_env_symmetry.py b/tests/arch/test_env_symmetry.py new file mode 100644 index 0000000000..7a84a919cc --- /dev/null +++ b/tests/arch/test_env_symmetry.py @@ -0,0 +1,69 @@ +"""Architectural invariant: skill and food-truck builders must set the same required base env vars.""" # noqa: E501 + +from pathlib import Path + +import pytest + +pytestmark = [pytest.mark.layer("arch"), pytest.mark.small] + +_REQUIRED_IN_BOTH: frozenset[str] = frozenset( + { + "AUTOSKILLIT_HEADLESS", + "AUTOSKILLIT_SESSION_TYPE", + "MAX_MCP_OUTPUT_TOKENS", + } +) + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("AUTOSKILLIT_CAMPAIGN_ID", raising=False) + monkeypatch.delenv("AUTOSKILLIT_KITCHEN_SESSION_ID", raising=False) + + +def test_skill_and_food_truck_share_required_env_vars() -> None: + """build_skill_session_cmd and build_food_truck_cmd must both set the required base env vars.""" # noqa: E501 + from autoskillit.core.types._type_plugin_source import DirectInstall + from autoskillit.execution.backends import BACKEND_REGISTRY + + for name, cls in BACKEND_REGISTRY.items(): + backend = cls() + skill_spec = backend.build_skill_session_cmd( + "/autoskillit:investigate", "/repo", completion_marker="DONE" + ) + food_truck_spec = backend.build_food_truck_cmd( + orchestrator_prompt="test prompt", + plugin_source=DirectInstall(plugin_dir=Path("/plugins")), + cwd="/repo", + completion_marker="DONE", + ) + for var in _REQUIRED_IN_BOTH: + assert var in skill_spec.env, f"{name}: {var} missing from build_skill_session_cmd env" + assert var in food_truck_spec.env, ( + f"{name}: {var} missing from build_food_truck_cmd env" + ) + + +@pytest.mark.xfail( + reason="AGENT_BACKEND_ENV_VAR not yet added to build_food_truck_cmd", + strict=True, +) +def test_agent_backend_env_var_in_food_truck(monkeypatch: pytest.MonkeyPatch) -> None: + """AUTOSKILLIT_AGENT_BACKEND must appear in build_food_truck_cmd env for every backend.""" + from autoskillit.core.types._type_plugin_source import DirectInstall + from autoskillit.execution.backends import BACKEND_REGISTRY + + # Ensure clean environment - remove any residual AGENT_BACKEND_ENV_VAR from host + monkeypatch.delenv("AUTOSKILLIT_AGENT_BACKEND", raising=False) + + for name, cls in BACKEND_REGISTRY.items(): + backend = cls() + food_truck_spec = backend.build_food_truck_cmd( + orchestrator_prompt="test prompt", + plugin_source=DirectInstall(plugin_dir=Path("/plugins")), + cwd="/repo", + completion_marker="DONE", + ) + assert "AUTOSKILLIT_AGENT_BACKEND" in food_truck_spec.env, ( + f"{name}: AUTOSKILLIT_AGENT_BACKEND missing from build_food_truck_cmd env" + ) diff --git a/tests/arch/test_no_not_implemented.py b/tests/arch/test_no_not_implemented.py new file mode 100644 index 0000000000..d98de91c54 --- /dev/null +++ b/tests/arch/test_no_not_implemented.py @@ -0,0 +1,41 @@ +"""Architectural invariant: no registered backend may raise NotImplementedError.""" + +import ast +import inspect + +import pytest + +pytestmark = [pytest.mark.layer("arch"), pytest.mark.small] + + +@pytest.mark.xfail( + reason="#3383: CodexBackend.validate_skill_content and .list_plugins not yet implemented", + strict=True, +) +def test_no_not_implemented_error_in_registered_backends() -> None: + """No method on any BACKEND_REGISTRY class should raise NotImplementedError. + + CodingAgentBackend is a typing.Protocol, not an ABC — there are no legitimate + abstract stubs. Any raise NotImplementedError in a registered backend is a bug. + """ + from autoskillit.execution.backends import BACKEND_REGISTRY + + violations: list[str] = [] + for name, cls in BACKEND_REGISTRY.items(): + source = inspect.getsource(cls) + tree = ast.parse(source) + for node in ast.walk(tree): + if not isinstance(node, ast.Raise): + continue + exc = node.exc + if ( + isinstance(exc, ast.Call) + and isinstance(exc.func, ast.Name) + and exc.func.id == "NotImplementedError" + ): + violations.append(f"{name}:{node.lineno}") + + assert not violations, ( + f"Registered backends must not raise NotImplementedError " + f"(CodingAgentBackend is a Protocol, not an ABC): {violations}" + ) diff --git a/tests/server/test_server_init_gate.py b/tests/server/test_server_init_gate.py index 8abe3d201a..a2e779b22b 100644 --- a/tests/server/test_server_init_gate.py +++ b/tests/server/test_server_init_gate.py @@ -66,6 +66,24 @@ async def test_redisable_subsets_hides_kitchen_core(self) -> None: mock_ctx.disable_components.assert_called_once_with(tags={"kitchen-core"}) + @pytest.mark.anyio + async def test_tool_list_changes_after_enable_within_session(self): + """Tool list visible to a client changes when kitchen tags are enabled mid-session.""" # noqa: E501 + from fastmcp.client import Client + + from autoskillit.pipeline.gate import GATED_TOOLS + from autoskillit.server import mcp + + async with Client(mcp) as client: + tools_before = {t.name for t in await client.list_tools()} + assert not (tools_before & GATED_TOOLS), "Kitchen tools should be hidden before enable" + + mcp.enable(tags={"kitchen"}) + + tools_after = {t.name for t in await client.list_tools()} + new_tools = tools_after - tools_before + assert new_tools, "Enabling kitchen tags should make new tools visible" + class TestGatedToolAccess: """Prompt-gated tool access: tools disabled by default, user-only activation.""" From baf4772b35dde94762e2e5c14bd75b2ad1a2c5b7 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 23:37:21 -0700 Subject: [PATCH 2/7] fix(review): add issue reference to xfail decorator in test_env_symmetry xfail without a tracking issue number has no discoverable removal path. Adds #3385 reference so the xfail can be cleaned up when AUTOSKILLIT_AGENT_BACKEND is wired into build_food_truck_cmd. --- tests/arch/test_env_symmetry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/arch/test_env_symmetry.py b/tests/arch/test_env_symmetry.py index 7a84a919cc..ee4b0ab823 100644 --- a/tests/arch/test_env_symmetry.py +++ b/tests/arch/test_env_symmetry.py @@ -45,7 +45,7 @@ def test_skill_and_food_truck_share_required_env_vars() -> None: @pytest.mark.xfail( - reason="AGENT_BACKEND_ENV_VAR not yet added to build_food_truck_cmd", + reason="#3385: AUTOSKILLIT_AGENT_BACKEND not yet added to build_food_truck_cmd", strict=True, ) def test_agent_backend_env_var_in_food_truck(monkeypatch: pytest.MonkeyPatch) -> None: From d985f3b1ac6003e97c08438fda3fdec73c1626c3 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 23:37:27 -0700 Subject: [PATCH 3/7] fix(review): strengthen gate-enable assertion and add return type annotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - assert new_tools & GATED_TOOLS instead of bare truthiness check — verifies that the newly visible tools are actually kitchen-gated tools, not arbitrary tools from a misconfigured tag. - Add -> None return type to test_tool_list_changes_after_enable_within_session for consistency with the surrounding class. --- tests/server/test_server_init_gate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/server/test_server_init_gate.py b/tests/server/test_server_init_gate.py index a2e779b22b..99ad9a64db 100644 --- a/tests/server/test_server_init_gate.py +++ b/tests/server/test_server_init_gate.py @@ -67,7 +67,7 @@ async def test_redisable_subsets_hides_kitchen_core(self) -> None: mock_ctx.disable_components.assert_called_once_with(tags={"kitchen-core"}) @pytest.mark.anyio - async def test_tool_list_changes_after_enable_within_session(self): + async def test_tool_list_changes_after_enable_within_session(self) -> None: """Tool list visible to a client changes when kitchen tags are enabled mid-session.""" # noqa: E501 from fastmcp.client import Client @@ -82,7 +82,9 @@ async def test_tool_list_changes_after_enable_within_session(self): tools_after = {t.name for t in await client.list_tools()} new_tools = tools_after - tools_before - assert new_tools, "Enabling kitchen tags should make new tools visible" + assert new_tools & GATED_TOOLS, ( + "Enabling kitchen tags should make kitchen-gated tools visible" + ) class TestGatedToolAccess: From ecc7dd23ac3662a7ea30b4fbe98d6df2ccfd93c2 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 23:53:36 -0700 Subject: [PATCH 4/7] fix(review): add BACKEND_REGISTRY non-empty guard and bare NotImplementedError detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_env_symmetry.py: assert BACKEND_REGISTRY before both loops so vacuous passes (empty registry → skipped assertions → false green) are detected - test_no_not_implemented.py: extend AST check to catch bare `raise NotImplementedError` (ast.Name), which the existing ast.Call-only branch silently missed Co-Authored-By: Claude Sonnet 4.6 --- tests/arch/test_env_symmetry.py | 2 ++ tests/arch/test_no_not_implemented.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/arch/test_env_symmetry.py b/tests/arch/test_env_symmetry.py index ee4b0ab823..ca91c242d6 100644 --- a/tests/arch/test_env_symmetry.py +++ b/tests/arch/test_env_symmetry.py @@ -26,6 +26,7 @@ def test_skill_and_food_truck_share_required_env_vars() -> None: from autoskillit.core.types._type_plugin_source import DirectInstall from autoskillit.execution.backends import BACKEND_REGISTRY + assert BACKEND_REGISTRY, "BACKEND_REGISTRY is empty — test provides no coverage" for name, cls in BACKEND_REGISTRY.items(): backend = cls() skill_spec = backend.build_skill_session_cmd( @@ -56,6 +57,7 @@ def test_agent_backend_env_var_in_food_truck(monkeypatch: pytest.MonkeyPatch) -> # Ensure clean environment - remove any residual AGENT_BACKEND_ENV_VAR from host monkeypatch.delenv("AUTOSKILLIT_AGENT_BACKEND", raising=False) + assert BACKEND_REGISTRY, "BACKEND_REGISTRY is empty — test provides no coverage" for name, cls in BACKEND_REGISTRY.items(): backend = cls() food_truck_spec = backend.build_food_truck_cmd( diff --git a/tests/arch/test_no_not_implemented.py b/tests/arch/test_no_not_implemented.py index d98de91c54..5783d1ff1e 100644 --- a/tests/arch/test_no_not_implemented.py +++ b/tests/arch/test_no_not_implemented.py @@ -32,7 +32,7 @@ def test_no_not_implemented_error_in_registered_backends() -> None: isinstance(exc, ast.Call) and isinstance(exc.func, ast.Name) and exc.func.id == "NotImplementedError" - ): + ) or (isinstance(exc, ast.Name) and exc.id == "NotImplementedError"): violations.append(f"{name}:{node.lineno}") assert not violations, ( From eeacbfcf18db4c9c7e69f0e1a15ddb7e7b572fe0 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sun, 31 May 2026 00:12:24 -0700 Subject: [PATCH 5/7] fix(review): add AUTOSKILLIT_AGENT_BACKEND to _clean_env autouse fixture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure consistent env isolation across all tests in test_env_symmetry.py by deleting AUTOSKILLIT_AGENT_BACKEND in the autouse _clean_env fixture — preventing host-env state from leaking into test_skill_and_food_truck_share_required_env_vars. --- tests/arch/test_env_symmetry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/arch/test_env_symmetry.py b/tests/arch/test_env_symmetry.py index ca91c242d6..4d7f603602 100644 --- a/tests/arch/test_env_symmetry.py +++ b/tests/arch/test_env_symmetry.py @@ -19,6 +19,7 @@ def _clean_env(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("AUTOSKILLIT_CAMPAIGN_ID", raising=False) monkeypatch.delenv("AUTOSKILLIT_KITCHEN_SESSION_ID", raising=False) + monkeypatch.delenv("AUTOSKILLIT_AGENT_BACKEND", raising=False) def test_skill_and_food_truck_share_required_env_vars() -> None: From 704c3b027536bcbc6fb8221674065d61790f11ac Mon Sep 17 00:00:00 2001 From: Trecek Date: Sun, 31 May 2026 07:56:11 -0700 Subject: [PATCH 6/7] ci: trigger From 45dad77657d962d38851f54d2134a6283274c5a9 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sun, 31 May 2026 08:12:49 -0700 Subject: [PATCH 7/7] fix(tests): remove stale xfail markers from arch tests Both tests now pass (underlying fixes in #3383 and #3385 have landed), so the strict xfail decorators cause XPASS(strict) failures. Removed. --- tests/arch/test_env_symmetry.py | 4 ---- tests/arch/test_no_not_implemented.py | 4 ---- 2 files changed, 8 deletions(-) diff --git a/tests/arch/test_env_symmetry.py b/tests/arch/test_env_symmetry.py index 4d7f603602..a9d8fc36a7 100644 --- a/tests/arch/test_env_symmetry.py +++ b/tests/arch/test_env_symmetry.py @@ -46,10 +46,6 @@ def test_skill_and_food_truck_share_required_env_vars() -> None: ) -@pytest.mark.xfail( - reason="#3385: AUTOSKILLIT_AGENT_BACKEND not yet added to build_food_truck_cmd", - strict=True, -) def test_agent_backend_env_var_in_food_truck(monkeypatch: pytest.MonkeyPatch) -> None: """AUTOSKILLIT_AGENT_BACKEND must appear in build_food_truck_cmd env for every backend.""" from autoskillit.core.types._type_plugin_source import DirectInstall diff --git a/tests/arch/test_no_not_implemented.py b/tests/arch/test_no_not_implemented.py index 5783d1ff1e..d0a58925dc 100644 --- a/tests/arch/test_no_not_implemented.py +++ b/tests/arch/test_no_not_implemented.py @@ -8,10 +8,6 @@ pytestmark = [pytest.mark.layer("arch"), pytest.mark.small] -@pytest.mark.xfail( - reason="#3383: CodexBackend.validate_skill_content and .list_plugins not yet implemented", - strict=True, -) def test_no_not_implemented_error_in_registered_backends() -> None: """No method on any BACKEND_REGISTRY class should raise NotImplementedError.