From 33def5b647b270096780aee4ae04abbbc2b89ac6 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 13:45:03 -0700 Subject: [PATCH 1/8] test: add failing tests for CmdSpec argument ordering immunity Adds test coverage that will fail until the structural guards are in place: - test_flag_metadata_coverage: closed categorization of ClaudeFlags members - test_cmd_builder: CmdBuilder ordering invariants and CmdSpec origin - test_assert_interactive_ordering: runtime gate for positional/variadic ordering - test_interactive_ordering_gate: AST guard that launch sites call the gate - Extended test_variadic_ordering: cross-backend parametrized coverage Co-Authored-By: Claude Sonnet 4.6 --- tests/arch/CLAUDE.md | 2 + tests/arch/test_flag_metadata_coverage.py | 32 ++++++ tests/arch/test_interactive_ordering_gate.py | 91 +++++++++++++++ tests/arch/test_variadic_ordering.py | 22 +++- tests/execution/CLAUDE.md | 2 + tests/execution/backends/test_cmd_builder.py | 95 ++++++++++++++++ .../test_assert_interactive_ordering.py | 106 ++++++++++++++++++ 7 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 tests/arch/test_flag_metadata_coverage.py create mode 100644 tests/arch/test_interactive_ordering_gate.py create mode 100644 tests/execution/backends/test_cmd_builder.py create mode 100644 tests/execution/test_assert_interactive_ordering.py diff --git a/tests/arch/CLAUDE.md b/tests/arch/CLAUDE.md index eb6b52602d..0d541742f0 100644 --- a/tests/arch/CLAUDE.md +++ b/tests/arch/CLAUDE.md @@ -72,6 +72,8 @@ AST enforcement, sub-package layer contracts, and architectural invariant tests. | `test_tools_execution_decomposition.py` | Structural decomposition guard for tools_execution.py split | | `test_transforms_hygiene.py` | Structural guards for FastMCP visibility tag hygiene | | `test_variadic_ordering.py` | Architectural invariant: positional initial_prompt must precede all variadic CLI flags in build_interactive_cmd | +| `test_flag_metadata_coverage.py` | Closed categorization guard: every ClaudeFlags member must appear in VARIADIC_CLAUDE_FLAGS or NON_VARIADIC_CLAUDE_FLAGS | +| `test_interactive_ordering_gate.py` | AST guard: _session_launch.py and _cook.py must import and call assert_interactive_ordering before subprocess invocation | | `test_watcher_signal_consistency.py` | Structural guard: all process watchers must call `_has_active_dispatch_marker` | | `test_write_restriction_coverage.py` | Architectural invariant: skills with prose write restrictions in NEVER blocks have runtime enforcement (read_only, output_dir, or allowlist) | | `test_model_identity_contract.py` | AST guard: detect_model_drift must use normalize_model_id and _models_match — raw alias/full-ID comparison is a false-positive source | diff --git a/tests/arch/test_flag_metadata_coverage.py b/tests/arch/test_flag_metadata_coverage.py new file mode 100644 index 0000000000..e02d2f6a86 --- /dev/null +++ b/tests/arch/test_flag_metadata_coverage.py @@ -0,0 +1,32 @@ +"""Flag metadata coverage: every ClaudeFlags member must be categorized.""" + +from __future__ import annotations + +import pytest + +from autoskillit.core import NON_VARIADIC_CLAUDE_FLAGS, VARIADIC_CLAUDE_FLAGS, ClaudeFlags + +pytestmark = [pytest.mark.layer("arch"), pytest.mark.small] + + +def test_every_claude_flag_is_categorized(): + all_flags = frozenset(ClaudeFlags) + categorized = VARIADIC_CLAUDE_FLAGS | NON_VARIADIC_CLAUDE_FLAGS + uncategorized = all_flags - categorized + assert not uncategorized, ( + f"ClaudeFlags members not categorized as variadic (repeating) " + f"or non-variadic (single-occurrence): {uncategorized}. " + f"Add to VARIADIC_CLAUDE_FLAGS or NON_VARIADIC_CLAUDE_FLAGS." + ) + + +def test_variadic_and_non_variadic_are_disjoint(): + overlap = VARIADIC_CLAUDE_FLAGS & NON_VARIADIC_CLAUDE_FLAGS + assert not overlap, f"Flags in both sets: {overlap}" + + +def test_all_categorized_flags_are_valid_members(): + all_flags = frozenset(ClaudeFlags) + categorized = VARIADIC_CLAUDE_FLAGS | NON_VARIADIC_CLAUDE_FLAGS + invalid = categorized - all_flags + assert not invalid, f"Categorized flags not in ClaudeFlags: {invalid}" diff --git a/tests/arch/test_interactive_ordering_gate.py b/tests/arch/test_interactive_ordering_gate.py new file mode 100644 index 0000000000..21da4d4d0f --- /dev/null +++ b/tests/arch/test_interactive_ordering_gate.py @@ -0,0 +1,91 @@ +"""AST guard: interactive launch sites must call assert_interactive_ordering. + +Ensures that _session_launch.py and _cook.py both call +assert_interactive_ordering after build_interactive_cmd and before +the subprocess invocation, preventing silent ordering regressions. +""" + +from __future__ import annotations + +import ast + +import pytest + +from autoskillit.core import paths + +pytestmark = [pytest.mark.layer("arch"), pytest.mark.small] + +_GATE_NAME = "assert_interactive_ordering" + + +def _has_call(tree: ast.AST, name: str) -> bool: + for node in ast.walk(tree): + if isinstance(node, ast.Call): + func = node.func + if isinstance(func, ast.Name) and func.id == name: + return True + if isinstance(func, ast.Attribute) and func.attr == name: + return True + return False + + +def _session_launch_source() -> str: + return (paths.pkg_root() / "cli" / "session" / "_session_launch.py").read_text() + + +def _cook_source() -> str: + return (paths.pkg_root() / "cli" / "session" / "_cook.py").read_text() + + +def test_session_launch_calls_assert_interactive_ordering(): + source = _session_launch_source() + tree = ast.parse(source) + assert _has_call(tree, _GATE_NAME), ( + f"_session_launch.py does not call {_GATE_NAME}(). " + "Interactive launch sites must validate CmdSpec ordering before " + "passing to subprocess." + ) + + +def test_cook_calls_assert_interactive_ordering(): + source = _cook_source() + tree = ast.parse(source) + assert _has_call(tree, _GATE_NAME), ( + f"_cook.py does not call {_GATE_NAME}(). " + "Interactive launch sites must validate CmdSpec ordering before " + "passing to subprocess." + ) + + +def test_session_launch_imports_assert_interactive_ordering(): + source = _session_launch_source() + tree = ast.parse(source) + imported_names: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom): + for alias in node.names: + imported_names.add(alias.asname or alias.name) + elif isinstance(node, ast.Import): + for alias in node.names: + imported_names.add(alias.asname or alias.name) + assert _GATE_NAME in imported_names, ( + f"_session_launch.py does not import {_GATE_NAME}. " + "It must be imported and called before subprocess invocation." + ) + + +def test_cook_imports_assert_interactive_ordering(): + source = _cook_source() + tree = ast.parse(source) + imported_names: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom): + for alias in node.names: + imported_names.add(alias.asname or alias.name) + elif isinstance(node, ast.Import): + for alias in node.names: + imported_names.add(alias.asname or alias.name) + assert _GATE_NAME in imported_names, ( + f"_cook.py does not import {_GATE_NAME}. " + "It must be imported and called before subprocess invocation." + ) diff --git a/tests/arch/test_variadic_ordering.py b/tests/arch/test_variadic_ordering.py index 6c0ac0e534..2639dc67f0 100644 --- a/tests/arch/test_variadic_ordering.py +++ b/tests/arch/test_variadic_ordering.py @@ -6,7 +6,8 @@ import pytest -from autoskillit.core import VARIADIC_CLAUDE_FLAGS +from autoskillit.core import VARIADIC_CLAUDE_FLAGS, ClaudeFlags +from autoskillit.execution.backends import ClaudeCodeBackend, CodexBackend from autoskillit.execution.commands import build_interactive_cmd pytestmark = [pytest.mark.layer("arch"), pytest.mark.small] @@ -30,3 +31,22 @@ def test_positional_precedes_all_variadic_flags(variadic_kwargs): f"Positional arg at index {prompt_idx} must precede " f"variadic flag {flag!r} at index {flag_idx}" ) + + +@pytest.mark.parametrize("backend_cls", [ClaudeCodeBackend, CodexBackend]) +@pytest.mark.parametrize( + "variadic_kwargs", + [ + {"add_dirs": [Path("/tmp/a")]}, + ], +) +def test_all_backends_positional_precedes_variadic(backend_cls, variadic_kwargs): + result = backend_cls().build_interactive_cmd(initial_prompt="test prompt", **variadic_kwargs) + prompt_idx = list(result.cmd).index("test prompt") + flag_val = ClaudeFlags.ADD_DIR + if flag_val in result.cmd: + flag_idx = list(result.cmd).index(flag_val) + assert prompt_idx < flag_idx, ( + f"{backend_cls.__name__}: positional arg at index {prompt_idx} must precede " + f"variadic flag {flag_val!r} at index {flag_idx}" + ) diff --git a/tests/execution/CLAUDE.md b/tests/execution/CLAUDE.md index d462bbb36c..a09ec47aa5 100644 --- a/tests/execution/CLAUDE.md +++ b/tests/execution/CLAUDE.md @@ -121,6 +121,7 @@ Subprocess integration, headless session, process lifecycle, and session result | `test_zero_write_detection.py` | Contract: sessions expected to write must actually write (behavioral write-count gate) | | `test_session_log_recovery_split_integrity.py` | Split integrity tests for `_session_log_recovery` module | +| `test_assert_interactive_ordering.py` | Tests for the assert_interactive_ordering runtime gate — positional/variadic ordering validation | ## Architecture Notes @@ -145,3 +146,4 @@ Subprocess integration, headless session, process lifecycle, and session result | `test_codex_stream_parser.py` | Full test suite for CodexStreamParser: happy-path, item parsing, degradation, fixture-driven integration, protocol conformance | | `test_codex_result_parser.py` | Tests for CodexResultParser | | `test_codex_config.py` | Tests for TOML read/write primitives, _is_autoskillit_registered, and ensure_codex_mcp_registered | +| `test_cmd_builder.py` | CmdBuilder ordering invariant and CmdSpec origin tests | diff --git a/tests/execution/backends/test_cmd_builder.py b/tests/execution/backends/test_cmd_builder.py new file mode 100644 index 0000000000..2834b4c44f --- /dev/null +++ b/tests/execution/backends/test_cmd_builder.py @@ -0,0 +1,95 @@ +"""CmdBuilder: ordering invariant and CmdSpec origin tests.""" + +from __future__ import annotations + +import pytest +from autoskillit.execution.backends._cmd_builder import CmdBuilder, CmdOrderingError + +from autoskillit.core import VARIADIC_CLAUDE_FLAGS + +pytestmark = [pytest.mark.layer("execution"), pytest.mark.small] + + +def test_positional_always_precedes_variadic_pairs(): + builder = CmdBuilder("claude") + builder.mode_flag("--dangerously-skip-permissions") + builder.positional("my prompt") + builder.variadic_pair("--add-dir", "/path/a") + builder.variadic_pair("--tools", "AskUserQuestion") + spec = builder.build() + prompt_idx = spec.cmd.index("my prompt") + for flag in VARIADIC_CLAUDE_FLAGS: + if flag in spec.cmd: + assert prompt_idx < spec.cmd.index(flag) + + +def test_variadic_pair_before_positional_raises(): + builder = CmdBuilder("claude") + builder.variadic_pair("--add-dir", "/path/a") + with pytest.raises(CmdOrderingError): + builder.positional("my prompt") + + +def test_build_produces_cmdspec_with_origin(): + builder = CmdBuilder("claude") + builder.positional("hello") + spec = builder.build() + assert spec.origin is not None + assert spec.origin.positional == ("hello",) + + +def test_mode_flag_appears_before_positional(): + builder = CmdBuilder("claude") + builder.mode_flag("--dangerously-skip-permissions") + builder.positional("my prompt") + spec = builder.build() + assert spec.cmd.index("--dangerously-skip-permissions") < spec.cmd.index("my prompt") + + +def test_kv_flag_appears_before_positional(): + builder = CmdBuilder("claude") + builder.kv_flag("--model", "claude-sonnet-4-6") + builder.positional("my prompt") + spec = builder.build() + assert spec.cmd.index("--model") < spec.cmd.index("my prompt") + + +def test_build_assembles_correct_order(): + builder = CmdBuilder("claude") + builder.mode_flag("--dangerously-skip-permissions") + builder.kv_flag("--model", "claude-sonnet-4-6") + builder.positional("hello world") + builder.variadic_pair("--add-dir", "/a") + builder.variadic_pair("--add-dir", "/b") + spec = builder.build() + assert spec.cmd[0] == "claude" + assert "--dangerously-skip-permissions" in spec.cmd + assert "--model" in spec.cmd + assert "hello world" in spec.cmd + prompt_idx = spec.cmd.index("hello world") + add_dir_idx = spec.cmd.index("--add-dir") + assert prompt_idx < add_dir_idx + + +def test_origin_fields_populated(): + builder = CmdBuilder("claude") + builder.mode_flag("--mode-flag") + builder.kv_flag("--model", "m") + builder.positional("prompt text") + builder.variadic_pair("--add-dir", "/p") + spec = builder.build() + assert spec.origin is not None + assert spec.origin.binary == "claude" + assert "--mode-flag" in spec.origin.mode_flags + assert ("--model", "m") in spec.origin.kv_flags + assert "prompt text" in spec.origin.positional + assert ("--add-dir", "/p") in spec.origin.variadic_pairs + + +def test_no_positional_builds_without_error(): + builder = CmdBuilder("codex") + builder.variadic_pair("--add-dir", "/a") + spec = builder.build() + assert "--add-dir" in spec.cmd + assert spec.origin is not None + assert spec.origin.positional == () diff --git a/tests/execution/test_assert_interactive_ordering.py b/tests/execution/test_assert_interactive_ordering.py new file mode 100644 index 0000000000..b3c01989bf --- /dev/null +++ b/tests/execution/test_assert_interactive_ordering.py @@ -0,0 +1,106 @@ +"""Tests for the assert_interactive_ordering runtime gate.""" + +from __future__ import annotations + +import pytest + +from autoskillit.core import CmdSpec +from autoskillit.core.types._type_backend import CmdOrigin +from autoskillit.execution.headless._headless_helpers import assert_interactive_ordering + +pytestmark = [pytest.mark.layer("execution"), pytest.mark.small] + + +def test_correctly_ordered_cmd_passes(): + spec = CmdSpec( + cmd=("claude", "--dangerously-skip-permissions", "prompt", "--add-dir", "/a"), + env={}, + ) + assert_interactive_ordering(spec) + + +def test_positional_after_variadic_raises(): + spec = CmdSpec( + cmd=("claude", "--dangerously-skip-permissions", "--add-dir", "/a", "prompt"), + env={}, + ) + with pytest.raises(ValueError, match="positional.*must precede.*variadic"): + assert_interactive_ordering(spec) + + +def test_no_positional_passes(): + spec = CmdSpec( + cmd=("claude", "--dangerously-skip-permissions", "--add-dir", "/a"), + env={}, + ) + assert_interactive_ordering(spec) + + +def test_origin_does_not_bypass_validation(): + """Even with a non-None origin, the gate must scan the raw cmd tuple.""" + spec = CmdSpec( + cmd=("claude", "--add-dir", "/path", "prompt"), + env={}, + origin=CmdOrigin( + binary="claude", + positional=("prompt",), + variadic_pairs=(("--add-dir", "/path"),), + ), + ) + with pytest.raises(ValueError, match="positional.*must precede.*variadic"): + assert_interactive_ordering(spec) + + +def test_flag_value_not_mistaken_for_positional(): + """Flag values (e.g., the model name after --model) must not be classified as positional.""" + spec = CmdSpec( + cmd=("claude", "--model", "claude-sonnet-4-6", "--add-dir", "/a"), + env={}, + ) + assert_interactive_ordering(spec) + + +def test_codex_config_override_value_not_mistaken_for_positional(): + """Codex -c flag values must not be classified as positional args.""" + spec = CmdSpec( + cmd=( + "codex", + "--dangerously-bypass-approvals-and-sandbox", + "hello", + "-c", + "developer_instructions=do stuff", + "--add-dir", + "/a", + ), + env={}, + ) + assert_interactive_ordering(spec) + + +def test_tools_flag_after_positional_passes(): + spec = CmdSpec( + cmd=( + "claude", + "--dangerously-skip-permissions", + "my prompt", + "--tools", + "AskUserQuestion", + ), + env={}, + ) + assert_interactive_ordering(spec) + + +def test_tools_flag_before_positional_raises(): + spec = CmdSpec( + cmd=( + "claude", + "--dangerously-skip-permissions", + "--tools", + "AskUserQuestion", + "my prompt", + ), + env={}, + ) + with pytest.raises(ValueError, match="positional.*must precede.*variadic"): + assert_interactive_ordering(spec) From 9bce243ba17a94fd8c29dcb3c7683e92b82dcb92 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 13:52:33 -0700 Subject: [PATCH 2/8] feat: structural immunity for CmdSpec argument ordering Introduces typed CmdBuilder, NON_VARIADIC_CLAUDE_FLAGS closed categorization, CmdOrigin provenance metadata, and assert_interactive_ordering runtime gate. - core/types: CmdOrigin dataclass, origin field on CmdSpec, NON_VARIADIC_CLAUDE_FLAGS - execution/backends/_cmd_builder.py: CmdBuilder enforces positional-before-variadic by raising CmdOrderingError immediately if positional() called after variadic_pair() - ClaudeCodeBackend and CodexBackend build_interactive_cmd refactored to use CmdBuilder - _headless_helpers.py: assert_interactive_ordering scans raw cmd tuple, refuses to trust CmdOrigin alone - _session_launch.py and _cook.py: gate wired in before subprocess invocation Co-Authored-By: Claude Sonnet 4.6 --- src/autoskillit/cli/session/_cook.py | 3 + .../cli/session/_session_launch.py | 3 + src/autoskillit/core/__init__.pyi | 2 + src/autoskillit/core/types/_type_backend.py | 13 ++++ src/autoskillit/core/types/_type_enums.py | 15 ++++ src/autoskillit/execution/backends/CLAUDE.md | 1 + .../execution/backends/_cmd_builder.py | 76 +++++++++++++++++++ src/autoskillit/execution/backends/claude.py | 24 +++--- src/autoskillit/execution/backends/codex.py | 27 ++++--- .../execution/headless/_headless_helpers.py | 60 +++++++++++++++ 10 files changed, 203 insertions(+), 21 deletions(-) create mode 100644 src/autoskillit/execution/backends/_cmd_builder.py diff --git a/src/autoskillit/cli/session/_cook.py b/src/autoskillit/cli/session/_cook.py index d703b3dba7..aae8af728e 100644 --- a/src/autoskillit/cli/session/_cook.py +++ b/src/autoskillit/cli/session/_cook.py @@ -228,6 +228,8 @@ def cook( _max_reloads = 10 seen_reload_ids: set[str] = set() + from autoskillit.execution.headless._headless_helpers import assert_interactive_ordering + while True: spec = backend.build_interactive_cmd( plugin_source=plugin_source, @@ -236,6 +238,7 @@ def cook( resume_spec=current_resume_spec, env_extras=_cook_env_extras, ) + assert_interactive_ordering(spec) reload_session_id = _run_cook_session( cmd=[*spec.cmd], env=spec.env, diff --git a/src/autoskillit/cli/session/_session_launch.py b/src/autoskillit/cli/session/_session_launch.py index 191df88744..d4f4798a70 100644 --- a/src/autoskillit/cli/session/_session_launch.py +++ b/src/autoskillit/cli/session/_session_launch.py @@ -104,6 +104,8 @@ def _run_interactive_session( plugin_source = None tools_arg = () + from autoskillit.execution.headless._headless_helpers import assert_interactive_ordering + spec = backend.build_interactive_cmd( initial_prompt=initial_message, resume_spec=resume_spec if resume_spec is not None else NoResume(), @@ -113,6 +115,7 @@ def _run_interactive_session( plugin_source=plugin_source, tools=tools_arg, ) + assert_interactive_ordering(spec) cmd = [*spec.cmd] with terminal_guard(): result = subprocess.run(cmd, env=spec.env) diff --git a/src/autoskillit/core/__init__.pyi b/src/autoskillit/core/__init__.pyi index b6ebfde295..64d5876d44 100644 --- a/src/autoskillit/core/__init__.pyi +++ b/src/autoskillit/core/__init__.pyi @@ -186,6 +186,7 @@ from .types import SOUS_CHEF_MANDATORY_SECTIONS as SOUS_CHEF_MANDATORY_SECTIONS from .types import TOOL_SUBSET_TAGS as TOOL_SUBSET_TAGS from .types import UNGATED_TOOLS as UNGATED_TOOLS from .types import VARIADIC_CLAUDE_FLAGS as VARIADIC_CLAUDE_FLAGS +from .types import NON_VARIADIC_CLAUDE_FLAGS as NON_VARIADIC_CLAUDE_FLAGS from .types import WORKTREE_SKILLS as WORKTREE_SKILLS from .types import AgentPackDef as AgentPackDef from .types import AgentSessionResult as AgentSessionResult @@ -214,6 +215,7 @@ from .types import CloneGateUnpublished as CloneGateUnpublished from .types import CloneManager as CloneManager from .types import CloneResult as CloneResult from .types import CloneSuccessResult as CloneSuccessResult +from .types import CmdOrigin as CmdOrigin from .types import CmdSpec as CmdSpec from .types import CodexEventData as CodexEventData from .types import CodingAgentBackend as CodingAgentBackend diff --git a/src/autoskillit/core/types/_type_backend.py b/src/autoskillit/core/types/_type_backend.py index 980bb993aa..60e852b0b2 100644 --- a/src/autoskillit/core/types/_type_backend.py +++ b/src/autoskillit/core/types/_type_backend.py @@ -14,6 +14,7 @@ __all__ = [ "BackendCapabilities", "CLAUDE_CODE_CAPABILITIES", + "CmdOrigin", "CmdSpec", "SkillSessionConfig", "ClaudeEventData", @@ -108,6 +109,17 @@ class BackendCapabilities: ) +@dataclass(frozen=True, slots=True) +class CmdOrigin: + """Provenance metadata for a CmdSpec, capturing the structural role of each element.""" + + binary: str + mode_flags: tuple[str, ...] = () + kv_flags: tuple[tuple[str, str], ...] = () + positional: tuple[str, ...] = () + variadic_pairs: tuple[tuple[str, str], ...] = () + + @dataclass(frozen=True, slots=True) class CmdSpec: """Fully-resolved subprocess command specification passed to the runner.""" @@ -115,6 +127,7 @@ class CmdSpec: cmd: tuple[str, ...] env: Mapping[str, str] cwd: str = "" + origin: CmdOrigin | None = None @dataclass(frozen=True, slots=True) diff --git a/src/autoskillit/core/types/_type_enums.py b/src/autoskillit/core/types/_type_enums.py index 3704e019be..9c2f6ab6d9 100644 --- a/src/autoskillit/core/types/_type_enums.py +++ b/src/autoskillit/core/types/_type_enums.py @@ -16,6 +16,7 @@ "RecipeSource", "ClaudeFlags", "VARIADIC_CLAUDE_FLAGS", + "NON_VARIADIC_CLAUDE_FLAGS", "OutputFormat", "Severity", "TerminationReason", @@ -164,6 +165,20 @@ class ClaudeFlags(StrEnum): VARIADIC_CLAUDE_FLAGS: frozenset[str] = frozenset({ClaudeFlags.ADD_DIR, ClaudeFlags.TOOLS}) +NON_VARIADIC_CLAUDE_FLAGS: frozenset[str] = frozenset( + { + ClaudeFlags.ALLOW_DANGEROUSLY_SKIP_PERMISSIONS, + ClaudeFlags.DANGEROUSLY_SKIP_PERMISSIONS, + ClaudeFlags.PRINT, + ClaudeFlags.MODEL, + ClaudeFlags.PLUGIN_DIR, + ClaudeFlags.OUTPUT_FORMAT, + ClaudeFlags.VERBOSE, + ClaudeFlags.RESUME, + ClaudeFlags.APPEND_SYSTEM_PROMPT, + } +) + class OutputFormat(StrEnum): """Claude CLI output format with declared data capabilities. diff --git a/src/autoskillit/execution/backends/CLAUDE.md b/src/autoskillit/execution/backends/CLAUDE.md index 2e32878298..17be44f454 100644 --- a/src/autoskillit/execution/backends/CLAUDE.md +++ b/src/autoskillit/execution/backends/CLAUDE.md @@ -13,3 +13,4 @@ IL-1 backend abstraction layer — concrete `CodingAgentBackend` implementations | `_codex_config.py` | TOML serialization with `[[key]]` array-of-tables support, MCP registration (`ensure_codex_mcp_registered`, `_serialize_toml`) | | `_codex_parse.py` | `CodexStreamParser`, `CodexResultParser`, `_scan_codex_ndjson`, `_CodexParseAccumulator` | | `codex_scenario_player.py` | `CodexScenarioPlayer`, `make_codex_scenario_player`, `CodexStepRecord`, `CodexScenario`, `_load_manifest`, `_FakeCodexCLI`, `_write_shim_script` — scenario replay data layer for Codex backend | +| `_cmd_builder.py` | `CmdBuilder`, `CmdOrderingError` — typed builder that enforces positional-before-variadic ordering by construction | diff --git a/src/autoskillit/execution/backends/_cmd_builder.py b/src/autoskillit/execution/backends/_cmd_builder.py new file mode 100644 index 0000000000..30276732aa --- /dev/null +++ b/src/autoskillit/execution/backends/_cmd_builder.py @@ -0,0 +1,76 @@ +"""Typed command builder that enforces positional-before-variadic ordering by construction.""" + +from __future__ import annotations + +from collections.abc import Mapping + +from autoskillit.core import CmdOrigin, CmdSpec + +__all__ = ["CmdBuilder", "CmdOrderingError"] + + +class CmdOrderingError(ValueError): + """Raised when a positional argument is added after a variadic pair.""" + + +class CmdBuilder: + """Build a CmdSpec with structural enforcement of argument ordering. + + Sections are assembled in a fixed order: + binary → mode_flags → kv_flags → positional → variadic_pairs + + Calling ``positional()`` after ``variadic_pair()`` raises ``CmdOrderingError`` + immediately, making argument-ordering bugs impossible by construction. + """ + + def __init__(self, binary: str) -> None: + self._binary = binary + self._mode_flags: list[str] = [] + self._kv_flags: list[tuple[str, str]] = [] + self._positional: list[str] = [] + self._variadic_pairs: list[tuple[str, str]] = [] + self._has_variadic = False + + def mode_flag(self, flag: str) -> CmdBuilder: + self._mode_flags.append(flag) + return self + + def kv_flag(self, flag: str, value: str) -> CmdBuilder: + self._kv_flags.append((flag, value)) + return self + + def positional(self, value: str) -> CmdBuilder: + if self._has_variadic: + raise CmdOrderingError( + f"Cannot add positional arg {value!r} after variadic pairs have been added. " + "Positional args must precede all variadic flag pairs." + ) + self._positional.append(value) + return self + + def variadic_pair(self, flag: str, value: str) -> CmdBuilder: + self._variadic_pairs.append((flag, value)) + self._has_variadic = True + return self + + def build(self, env: Mapping[str, str] | None = None, cwd: str = "") -> CmdSpec: + cmd: list[str] = [self._binary] + cmd.extend(self._mode_flags) + for flag, value in self._kv_flags: + cmd.extend([flag, value]) + cmd.extend(self._positional) + for flag, value in self._variadic_pairs: + cmd.extend([flag, value]) + origin = CmdOrigin( + binary=self._binary, + mode_flags=tuple(self._mode_flags), + kv_flags=tuple(self._kv_flags), + positional=tuple(self._positional), + variadic_pairs=tuple(self._variadic_pairs), + ) + return CmdSpec( + cmd=tuple(cmd), + env=env if env is not None else {}, + cwd=cwd, + origin=origin, + ) diff --git a/src/autoskillit/execution/backends/claude.py b/src/autoskillit/execution/backends/claude.py index 2bd21e3ba1..e93c8438a5 100644 --- a/src/autoskillit/execution/backends/claude.py +++ b/src/autoskillit/execution/backends/claude.py @@ -59,6 +59,7 @@ _inject_cwd_anchor, _inject_narration_suppression, ) +from autoskillit.execution.backends._cmd_builder import CmdBuilder from autoskillit.execution.process import _marker_is_standalone from autoskillit.execution.session import parse_session_result @@ -356,40 +357,43 @@ def build_interactive_cmd( an interactive session is determined at runtime by kitchen state, not by a ``SESSION_TYPE`` env variable. """ - cmd: list[str] = ["claude", ClaudeFlags.DANGEROUSLY_SKIP_PERMISSIONS] + builder = CmdBuilder("claude") + builder.mode_flag(ClaudeFlags.DANGEROUSLY_SKIP_PERMISSIONS) match resume_spec: case NamedResume(session_id=sid): - cmd += [ClaudeFlags.RESUME, sid] + builder.kv_flag(ClaudeFlags.RESUME, sid) case BareResume(): - cmd.append(ClaudeFlags.RESUME) + builder.mode_flag(ClaudeFlags.RESUME) case NoResume(): pass if system_prompt is not None and isinstance(resume_spec, NoResume): - cmd += [ClaudeFlags.APPEND_SYSTEM_PROMPT, system_prompt] + builder.kv_flag(ClaudeFlags.APPEND_SYSTEM_PROMPT, system_prompt) if model: - cmd += [ClaudeFlags.MODEL, model] + builder.kv_flag(ClaudeFlags.MODEL, model) match plugin_source: case DirectInstall(plugin_dir=p): - cmd += [ClaudeFlags.PLUGIN_DIR, str(p)] + builder.kv_flag(ClaudeFlags.PLUGIN_DIR, str(p)) case MarketplaceInstall(): pass case None: pass if initial_prompt is not None: - cmd.append(initial_prompt) + builder.positional(initial_prompt) for d in add_dirs: - cmd += [ClaudeFlags.ADD_DIR, str(d)] + builder.variadic_pair(ClaudeFlags.ADD_DIR, str(d)) for t in tools: - cmd += [ClaudeFlags.TOOLS, t] + builder.variadic_pair(ClaudeFlags.TOOLS, t) merged: dict[str, str] = dict(_SESSION_BASELINE_ENV) if env_extras: merged.update(env_extras) interactive_base = { k: v for k, v in os.environ.items() if k not in _INTERACTIVE_ENV_EXCLUSIONS } + partial = builder.build() return CmdSpec( - cmd=tuple(cmd), + cmd=partial.cmd, env=build_agent_env(base=interactive_base, extras=merged, required=required_env), + origin=partial.origin, ) def build_resume_cmd( diff --git a/src/autoskillit/execution/backends/codex.py b/src/autoskillit/execution/backends/codex.py index e00eb22e57..8dd3e04210 100644 --- a/src/autoskillit/execution/backends/codex.py +++ b/src/autoskillit/execution/backends/codex.py @@ -47,6 +47,7 @@ _inject_cwd_anchor, _inject_narration_suppression, ) +from autoskillit.execution.backends._cmd_builder import CmdBuilder from autoskillit.execution.backends._codex_config import ensure_codex_mcp_registered from autoskillit.execution.backends._codex_parse import CodexResultParser, CodexStreamParser @@ -566,29 +567,33 @@ def build_interactive_cmd( "codex_tools_ignored", extra={"tools": list(tools)}, ) - cmd: list[str] = [] + builder = CmdBuilder("codex") + # plugin_source: explicit no-op for Codex match resume_spec: case NoResume(): - cmd = ["codex", CodexFlags.DANGEROUSLY_BYPASS] + builder.mode_flag(CodexFlags.DANGEROUSLY_BYPASS) case NamedResume(session_id=sid): - cmd = ["codex", CodexFlags.RESUME_SUBCOMMAND, sid, CodexFlags.DANGEROUSLY_BYPASS] + builder.mode_flag(CodexFlags.RESUME_SUBCOMMAND) + builder.mode_flag(sid) + builder.mode_flag(CodexFlags.DANGEROUSLY_BYPASS) case BareResume(): - cmd = ["codex", CodexFlags.RESUME_SUBCOMMAND, CodexFlags.DANGEROUSLY_BYPASS] + builder.mode_flag(CodexFlags.RESUME_SUBCOMMAND) + builder.mode_flag(CodexFlags.DANGEROUSLY_BYPASS) if model: - cmd += [CodexFlags.MODEL, model] - # plugin_source: explicit no-op for Codex + builder.kv_flag(CodexFlags.MODEL, model) + if system_prompt is not None and isinstance(resume_spec, NoResume): + builder.kv_flag(CodexFlags.CONFIG_OVERRIDE, f"developer_instructions={system_prompt}") if initial_prompt is not None: - cmd.append(initial_prompt) + builder.positional(initial_prompt) for d in add_dirs: - cmd += [CodexFlags.ADD_DIR, str(d)] - if system_prompt is not None and isinstance(resume_spec, NoResume): - cmd += [CodexFlags.CONFIG_OVERRIDE, f"developer_instructions={system_prompt}"] + builder.variadic_pair(CodexFlags.ADD_DIR, str(d)) base_env = {k: v for k, v in os.environ.items() if k not in _HEADLESS_EXCLUSIVE_VARS} merged_extras: dict[str, str] = dict(env_extras) if env_extras else {} if add_dirs: merged_extras.setdefault("CODEX_HOME", str(add_dirs[0])) env = CodexEnvPolicy().build_env(base_env, extras=merged_extras, required=required_env) - return CmdSpec(cmd=tuple(cmd), env=env) + partial = builder.build() + return CmdSpec(cmd=partial.cmd, env=env, origin=partial.origin) def build_resume_cmd( self, diff --git a/src/autoskillit/execution/headless/_headless_helpers.py b/src/autoskillit/execution/headless/_headless_helpers.py index 96b205dd5e..e119adc135 100644 --- a/src/autoskillit/execution/headless/_headless_helpers.py +++ b/src/autoskillit/execution/headless/_headless_helpers.py @@ -8,14 +8,44 @@ from typing import TYPE_CHECKING from autoskillit.core import ( + VARIADIC_CLAUDE_FLAGS, + ClaudeFlags, CmdSpec, CodingAgentBackend, SkillResult, claude_code_project_dir, get_logger, ) +from autoskillit.execution.backends.codex import CodexFlags from autoskillit.execution.headless._headless_git import _compute_loc_changed +_CLAUDE_VALUE_BEARING_FLAGS: frozenset[str] = frozenset( + { + ClaudeFlags.PRINT, + ClaudeFlags.MODEL, + ClaudeFlags.OUTPUT_FORMAT, + ClaudeFlags.RESUME, + ClaudeFlags.APPEND_SYSTEM_PROMPT, + ClaudeFlags.PLUGIN_DIR, + ClaudeFlags.ADD_DIR, + ClaudeFlags.TOOLS, + } +) + +_CODEX_VALUE_BEARING_FLAGS: frozenset[str] = frozenset( + { + CodexFlags.MODEL, + CodexFlags.MODEL_SHORT, + CodexFlags.ADD_DIR, + CodexFlags.ASK_FOR_APPROVAL, + CodexFlags.ASK_FOR_APPROVAL_SHORT, + CodexFlags.SANDBOX, + CodexFlags.CONFIG_OVERRIDE, + } +) + +_ALL_VALUE_BEARING_FLAGS: frozenset[str] = _CLAUDE_VALUE_BEARING_FLAGS | _CODEX_VALUE_BEARING_FLAGS + if TYPE_CHECKING: from autoskillit.config import AutomationConfig @@ -49,6 +79,36 @@ def assert_headless_cmd(spec: CmdSpec) -> None: ) +def assert_interactive_ordering( + spec: CmdSpec, + *, + variadic_flags: frozenset[str] = VARIADIC_CLAUDE_FLAGS, + value_bearing_flags: frozenset[str] | None = None, +) -> None: + """Validate that positional arguments precede variadic flags in an interactive CmdSpec. + + Always scans the raw cmd tuple — never trusts origin metadata alone, since + CmdOrigin is a public dataclass and callers could set it on a misordered cmd. + """ + if value_bearing_flags is None: + value_bearing_flags = _ALL_VALUE_BEARING_FLAGS + cmd = spec.cmd + positional_indices = [ + i + for i, tok in enumerate(cmd) + if not tok.startswith("-") and i > 0 and cmd[i - 1] not in value_bearing_flags + ] + for flag in variadic_flags: + if flag in cmd: + flag_idx = cmd.index(flag) + for pi in positional_indices: + if pi > flag_idx: + raise ValueError( + f"Positional arg at index {pi} ({cmd[pi]!r}) must precede " + f"variadic flag {flag!r} at index {flag_idx}" + ) + + def _resolve_session_log_dir(cwd: str, backend: CodingAgentBackend) -> Path | None: if not backend.capabilities.channel_b_capable: return None From ce96b45eed28c66a0f1c1da6399eef7fc49e84d5 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 14:08:16 -0700 Subject: [PATCH 3/8] fix: re-export assert_interactive_ordering through public API and fix test assertions - Re-export assert_interactive_ordering via execution.headless and execution __init__ - Update imports in _cook.py and _session_launch.py to use public API - Fix error message casing for consistent test matching - Update test_backend_dataclasses to expect origin field and CmdOrigin export - Update headless helpers size budget for new assert_interactive_ordering function - Sort NON_VARIADIC_CLAUDE_FLAGS re-export alphabetically in core/__init__.pyi Co-Authored-By: Claude Opus 4.6 (1M context) --- src/autoskillit/cli/session/_cook.py | 4 ++-- src/autoskillit/cli/session/_session_launch.py | 4 ++-- src/autoskillit/core/__init__.pyi | 2 +- src/autoskillit/execution/__init__.py | 2 ++ src/autoskillit/execution/headless/__init__.py | 2 ++ src/autoskillit/execution/headless/_headless_helpers.py | 4 ++-- tests/arch/test_execution_source_split.py | 2 +- tests/core/test_backend_dataclasses.py | 3 ++- tests/execution/backends/test_cmd_builder.py | 2 +- 9 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/autoskillit/cli/session/_cook.py b/src/autoskillit/cli/session/_cook.py index aae8af728e..2386951b6c 100644 --- a/src/autoskillit/cli/session/_cook.py +++ b/src/autoskillit/cli/session/_cook.py @@ -228,7 +228,7 @@ def cook( _max_reloads = 10 seen_reload_ids: set[str] = set() - from autoskillit.execution.headless._headless_helpers import assert_interactive_ordering + from autoskillit.execution import assert_interactive_ordering while True: spec = backend.build_interactive_cmd( @@ -238,7 +238,7 @@ def cook( resume_spec=current_resume_spec, env_extras=_cook_env_extras, ) - assert_interactive_ordering(spec) + assert_interactive_ordering(spec=spec) reload_session_id = _run_cook_session( cmd=[*spec.cmd], env=spec.env, diff --git a/src/autoskillit/cli/session/_session_launch.py b/src/autoskillit/cli/session/_session_launch.py index d4f4798a70..cce3bc2be3 100644 --- a/src/autoskillit/cli/session/_session_launch.py +++ b/src/autoskillit/cli/session/_session_launch.py @@ -104,7 +104,7 @@ def _run_interactive_session( plugin_source = None tools_arg = () - from autoskillit.execution.headless._headless_helpers import assert_interactive_ordering + from autoskillit.execution import assert_interactive_ordering spec = backend.build_interactive_cmd( initial_prompt=initial_message, @@ -115,7 +115,7 @@ def _run_interactive_session( plugin_source=plugin_source, tools=tools_arg, ) - assert_interactive_ordering(spec) + assert_interactive_ordering(spec=spec) cmd = [*spec.cmd] with terminal_guard(): result = subprocess.run(cmd, env=spec.env) diff --git a/src/autoskillit/core/__init__.pyi b/src/autoskillit/core/__init__.pyi index 64d5876d44..d4098b76ca 100644 --- a/src/autoskillit/core/__init__.pyi +++ b/src/autoskillit/core/__init__.pyi @@ -154,6 +154,7 @@ from .types import KNOWN_CI_EVENTS as KNOWN_CI_EVENTS from .types import LABEL_LIFECYCLE_REGISTRY as LABEL_LIFECYCLE_REGISTRY from .types import LABEL_TRANSITIONS as LABEL_TRANSITIONS from .types import LAUNCH_ID_ENV_VAR as LAUNCH_ID_ENV_VAR +from .types import NON_VARIADIC_CLAUDE_FLAGS as NON_VARIADIC_CLAUDE_FLAGS from .types import PACK_REGISTRY as PACK_REGISTRY from .types import PIPELINE_FORBIDDEN_TOOLS as PIPELINE_FORBIDDEN_TOOLS from .types import PR_TELEMETRY_SECTIONS as PR_TELEMETRY_SECTIONS @@ -186,7 +187,6 @@ from .types import SOUS_CHEF_MANDATORY_SECTIONS as SOUS_CHEF_MANDATORY_SECTIONS from .types import TOOL_SUBSET_TAGS as TOOL_SUBSET_TAGS from .types import UNGATED_TOOLS as UNGATED_TOOLS from .types import VARIADIC_CLAUDE_FLAGS as VARIADIC_CLAUDE_FLAGS -from .types import NON_VARIADIC_CLAUDE_FLAGS as NON_VARIADIC_CLAUDE_FLAGS from .types import WORKTREE_SKILLS as WORKTREE_SKILLS from .types import AgentPackDef as AgentPackDef from .types import AgentSessionResult as AgentSessionResult diff --git a/src/autoskillit/execution/__init__.py b/src/autoskillit/execution/__init__.py index cd5f0ee578..074905b18b 100644 --- a/src/autoskillit/execution/__init__.py +++ b/src/autoskillit/execution/__init__.py @@ -60,6 +60,7 @@ ) from autoskillit.execution.headless import ( DefaultHeadlessExecutor, + assert_interactive_ordering, run_headless_core, ) from autoskillit.execution.linux_tracing import ( @@ -181,6 +182,7 @@ # headless "run_headless_core", "DefaultHeadlessExecutor", + "assert_interactive_ordering", # testing "parse_pytest_summary", "check_test_passed", diff --git a/src/autoskillit/execution/headless/__init__.py b/src/autoskillit/execution/headless/__init__.py index 8f9ce788ce..5891bd4638 100644 --- a/src/autoskillit/execution/headless/__init__.py +++ b/src/autoskillit/execution/headless/__init__.py @@ -53,6 +53,7 @@ _resolve_session_log_dir, _session_log_dir, # noqa: F401 _stat_snapshot, # noqa: F401 + assert_interactive_ordering, ) from autoskillit.execution.headless._headless_path_tokens import ( # noqa: F401 _INTENTIONALLY_EXCLUDED_PATH_TOKENS, @@ -91,6 +92,7 @@ __all__ = [ "DefaultHeadlessExecutor", "PostSessionMetrics", + "assert_interactive_ordering", "run_headless_core", ] diff --git a/src/autoskillit/execution/headless/_headless_helpers.py b/src/autoskillit/execution/headless/_headless_helpers.py index e119adc135..c8d63ad528 100644 --- a/src/autoskillit/execution/headless/_headless_helpers.py +++ b/src/autoskillit/execution/headless/_headless_helpers.py @@ -104,8 +104,8 @@ def assert_interactive_ordering( for pi in positional_indices: if pi > flag_idx: raise ValueError( - f"Positional arg at index {pi} ({cmd[pi]!r}) must precede " - f"variadic flag {flag!r} at index {flag_idx}" + f"positional arg at index {pi} ({cmd[pi]!r}) must precede " + f"variadic flag {str(flag)!r} at index {flag_idx}" ) diff --git a/tests/arch/test_execution_source_split.py b/tests/arch/test_execution_source_split.py index eaf41e0a03..8037854206 100644 --- a/tests/arch/test_execution_source_split.py +++ b/tests/arch/test_execution_source_split.py @@ -17,7 +17,7 @@ ] HEADLESS_SIZE_BUDGETS = { "headless/__init__.py": 460, - "headless/_headless_helpers.py": 175, + "headless/_headless_helpers.py": 220, "headless/_headless_execute.py": 595, "headless/_headless_recovery.py": 366, "headless/_headless_path_tokens.py": 175, diff --git a/tests/core/test_backend_dataclasses.py b/tests/core/test_backend_dataclasses.py index 3b5d29e3b0..3b4ccda12d 100644 --- a/tests/core/test_backend_dataclasses.py +++ b/tests/core/test_backend_dataclasses.py @@ -23,7 +23,7 @@ def test_cmd_spec_fields(): from autoskillit.core import CmdSpec fields = {f.name for f in dataclasses.fields(CmdSpec)} - assert fields == {"cmd", "env", "cwd"} + assert fields == {"cmd", "env", "cwd", "origin"} def test_cmd_spec_env_accepts_mapping(): @@ -177,6 +177,7 @@ def test_backend_module_all_exhaustive(): assert set(__all__) == { "BackendCapabilities", "CLAUDE_CODE_CAPABILITIES", + "CmdOrigin", "CmdSpec", "SkillSessionConfig", "ClaudeEventData", diff --git a/tests/execution/backends/test_cmd_builder.py b/tests/execution/backends/test_cmd_builder.py index 2834b4c44f..283436086a 100644 --- a/tests/execution/backends/test_cmd_builder.py +++ b/tests/execution/backends/test_cmd_builder.py @@ -3,9 +3,9 @@ from __future__ import annotations import pytest -from autoskillit.execution.backends._cmd_builder import CmdBuilder, CmdOrderingError from autoskillit.core import VARIADIC_CLAUDE_FLAGS +from autoskillit.execution.backends._cmd_builder import CmdBuilder, CmdOrderingError pytestmark = [pytest.mark.layer("execution"), pytest.mark.small] From d911eb8d2734b84efb247f451d9e825e32b84c5d Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 14:46:43 -0700 Subject: [PATCH 4/8] fix(review): use builder.positional(sid) for NamedResume session_id and remove redundant comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NamedResume session_id is a positional value, not a flag — mode_flag(sid) incorrectly placed it in CmdOrigin.mode_flags. Also removes the slop comment describing absence of plugin_source handling. Co-Authored-By: Claude Opus 4.6 --- src/autoskillit/execution/backends/codex.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/autoskillit/execution/backends/codex.py b/src/autoskillit/execution/backends/codex.py index 8dd3e04210..935f8f9c52 100644 --- a/src/autoskillit/execution/backends/codex.py +++ b/src/autoskillit/execution/backends/codex.py @@ -568,13 +568,12 @@ def build_interactive_cmd( extra={"tools": list(tools)}, ) builder = CmdBuilder("codex") - # plugin_source: explicit no-op for Codex match resume_spec: case NoResume(): builder.mode_flag(CodexFlags.DANGEROUSLY_BYPASS) case NamedResume(session_id=sid): builder.mode_flag(CodexFlags.RESUME_SUBCOMMAND) - builder.mode_flag(sid) + builder.positional(sid) builder.mode_flag(CodexFlags.DANGEROUSLY_BYPASS) case BareResume(): builder.mode_flag(CodexFlags.RESUME_SUBCOMMAND) From 4f62e1781fe6608d0e34b422789e09f5ae211903 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 14:46:57 -0700 Subject: [PATCH 5/8] fix(review): add empty binary validation guard to CmdBuilder.__init__ Prevents CmdSpec with empty binary that would fail at subprocess invocation with no diagnostic context. Co-Authored-By: Claude Opus 4.6 --- src/autoskillit/execution/backends/_cmd_builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/autoskillit/execution/backends/_cmd_builder.py b/src/autoskillit/execution/backends/_cmd_builder.py index 30276732aa..79908ebbef 100644 --- a/src/autoskillit/execution/backends/_cmd_builder.py +++ b/src/autoskillit/execution/backends/_cmd_builder.py @@ -24,6 +24,8 @@ class CmdBuilder: """ def __init__(self, binary: str) -> None: + if not binary: + raise ValueError("binary must be a non-empty string") self._binary = binary self._mode_flags: list[str] = [] self._kv_flags: list[tuple[str, str]] = [] From 0177f0b408efcff14a45103d5c42c891b85e2e90 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 14:46:58 -0700 Subject: [PATCH 6/8] fix(review): use backend-specific flag constant and assert presence in ordering test Uses CodexFlags.ADD_DIR when testing CodexBackend. Adds explicit assertion that the variadic flag is present before checking ordering, converting silent skips into visible failures. Co-Authored-By: Claude Opus 4.6 --- tests/arch/test_variadic_ordering.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/arch/test_variadic_ordering.py b/tests/arch/test_variadic_ordering.py index 2639dc67f0..b2e2912d11 100644 --- a/tests/arch/test_variadic_ordering.py +++ b/tests/arch/test_variadic_ordering.py @@ -8,6 +8,7 @@ from autoskillit.core import VARIADIC_CLAUDE_FLAGS, ClaudeFlags from autoskillit.execution.backends import ClaudeCodeBackend, CodexBackend +from autoskillit.execution.backends.codex import CodexFlags from autoskillit.execution.commands import build_interactive_cmd pytestmark = [pytest.mark.layer("arch"), pytest.mark.small] @@ -43,10 +44,12 @@ def test_positional_precedes_all_variadic_flags(variadic_kwargs): def test_all_backends_positional_precedes_variadic(backend_cls, variadic_kwargs): result = backend_cls().build_interactive_cmd(initial_prompt="test prompt", **variadic_kwargs) prompt_idx = list(result.cmd).index("test prompt") - flag_val = ClaudeFlags.ADD_DIR - if flag_val in result.cmd: - flag_idx = list(result.cmd).index(flag_val) - assert prompt_idx < flag_idx, ( - f"{backend_cls.__name__}: positional arg at index {prompt_idx} must precede " - f"variadic flag {flag_val!r} at index {flag_idx}" - ) + flag_val = CodexFlags.ADD_DIR if backend_cls is CodexBackend else ClaudeFlags.ADD_DIR + assert flag_val in result.cmd, ( + f"{backend_cls.__name__}: expected variadic flag {flag_val!r} in cmd but not found" + ) + flag_idx = list(result.cmd).index(flag_val) + assert prompt_idx < flag_idx, ( + f"{backend_cls.__name__}: positional arg at index {prompt_idx} must precede " + f"variadic flag {flag_val!r} at index {flag_idx}" + ) From f08b7336f44ac92a2e29ae9e655cf0d0e46ec261 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 14:52:23 -0700 Subject: [PATCH 7/8] fix(review): update resume tests to check origin.positional instead of cmd index The NamedResume session_id is now tracked in origin.positional (not mode_flags), so tests verify presence and semantic correctness rather than position-dependent indexing. Co-Authored-By: Claude Opus 4.6 --- tests/execution/backends/test_codex_backend.py | 4 +++- tests/execution/backends/test_codex_interactive.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/execution/backends/test_codex_backend.py b/tests/execution/backends/test_codex_backend.py index 56d3546eda..bcbda28b90 100644 --- a/tests/execution/backends/test_codex_backend.py +++ b/tests/execution/backends/test_codex_backend.py @@ -678,7 +678,9 @@ def test_named_resume_produces_resume_subcommand_with_session_id(self) -> None: spec = CodexBackend().build_interactive_cmd(resume_spec=NamedResume(session_id="abc")) assert spec.cmd[0] == "codex" assert spec.cmd[1] == CodexFlags.RESUME_SUBCOMMAND - assert spec.cmd[2] == "abc" + assert "abc" in spec.cmd + assert spec.origin is not None + assert "abc" in spec.origin.positional assert CodexFlags.DANGEROUSLY_BYPASS in spec.cmd def test_bare_resume_produces_resume_subcommand_without_session_id(self) -> None: diff --git a/tests/execution/backends/test_codex_interactive.py b/tests/execution/backends/test_codex_interactive.py index 1c73960c9f..6fb4fa8160 100644 --- a/tests/execution/backends/test_codex_interactive.py +++ b/tests/execution/backends/test_codex_interactive.py @@ -38,8 +38,9 @@ def test_named_resume_includes_resume_with_session_id(self) -> None: resume_spec=NamedResume(session_id="abc123"), ) assert CodexFlags.RESUME_SUBCOMMAND in spec.cmd - idx = list(spec.cmd).index(CodexFlags.RESUME_SUBCOMMAND) - assert spec.cmd[idx + 1] == "abc123" + assert "abc123" in spec.cmd + assert spec.origin is not None + assert "abc123" in spec.origin.positional def test_bare_resume_includes_resume_without_session_id(self) -> None: spec = CodexBackend().build_interactive_cmd(resume_spec=BareResume()) From 81ab78e3280760b205cc1d1cc5cfb78cab0c6223 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 15:32:54 -0700 Subject: [PATCH 8/8] fix(review): extract _collect_imported_names helper to deduplicate AST import-collection logic Co-Authored-By: Claude Opus 4.6 --- tests/arch/test_interactive_ordering_gate.py | 25 ++++++++------------ 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/tests/arch/test_interactive_ordering_gate.py b/tests/arch/test_interactive_ordering_gate.py index 21da4d4d0f..9912f8c994 100644 --- a/tests/arch/test_interactive_ordering_gate.py +++ b/tests/arch/test_interactive_ordering_gate.py @@ -57,17 +57,21 @@ def test_cook_calls_assert_interactive_ordering(): ) -def test_session_launch_imports_assert_interactive_ordering(): - source = _session_launch_source() +def _collect_imported_names(source: str) -> set[str]: tree = ast.parse(source) - imported_names: set[str] = set() + names: set[str] = set() for node in ast.walk(tree): if isinstance(node, ast.ImportFrom): for alias in node.names: - imported_names.add(alias.asname or alias.name) + names.add(alias.asname or alias.name) elif isinstance(node, ast.Import): for alias in node.names: - imported_names.add(alias.asname or alias.name) + names.add(alias.asname or alias.name) + return names + + +def test_session_launch_imports_assert_interactive_ordering(): + imported_names = _collect_imported_names(_session_launch_source()) assert _GATE_NAME in imported_names, ( f"_session_launch.py does not import {_GATE_NAME}. " "It must be imported and called before subprocess invocation." @@ -75,16 +79,7 @@ def test_session_launch_imports_assert_interactive_ordering(): def test_cook_imports_assert_interactive_ordering(): - source = _cook_source() - tree = ast.parse(source) - imported_names: set[str] = set() - for node in ast.walk(tree): - if isinstance(node, ast.ImportFrom): - for alias in node.names: - imported_names.add(alias.asname or alias.name) - elif isinstance(node, ast.Import): - for alias in node.names: - imported_names.add(alias.asname or alias.name) + imported_names = _collect_imported_names(_cook_source()) assert _GATE_NAME in imported_names, ( f"_cook.py does not import {_GATE_NAME}. " "It must be imported and called before subprocess invocation."