From e580dc7704fed74c0cbf01b1de0f90423ced0ada Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 19:01:54 -0700 Subject: [PATCH 1/5] fix: auto-fill step_provider from RecipeStep.provider before _resolve_provider_profile When run_skill is called with step_provider="" and a step_name that maps to a RecipeStep with provider="minimax", the server now resolves step_provider server-side BEFORE calling _resolve_provider_profile. This closes the 66-line ordering gap where the late auto-fill block ran after provider resolution and never included step_provider anyway. Defense-in-depth: also updated sous-chef SKILL.md to instruct the LLM to forward step_provider from recipe step provider: fields to run_skill. Tests: 3 new tests in test_tools_execution_step_resolution.py covering auto-fill, LLM-override precedence, and observability logging. 1 new contract test in test_sous_chef_parameter_forwarding.py. Closes #3366. Co-Authored-By: Claude Opus 4.7 --- .../server/tools/tools_execution.py | 10 ++ src/autoskillit/skills/sous-chef/SKILL.md | 11 +- tests/contracts/CLAUDE.md | 2 +- .../test_sous_chef_parameter_forwarding.py | 10 ++ tests/server/CLAUDE.md | 2 +- .../test_tools_execution_step_resolution.py | 105 ++++++++++++++++++ 6 files changed, 136 insertions(+), 4 deletions(-) diff --git a/src/autoskillit/server/tools/tools_execution.py b/src/autoskillit/server/tools/tools_execution.py index 6a0e7224a2..7054487e88 100644 --- a/src/autoskillit/server/tools/tools_execution.py +++ b/src/autoskillit/server/tools/tools_execution.py @@ -371,6 +371,16 @@ async def run_skill( _resolve_provider_profile, ) + if not step_provider and step_name and tool_ctx.active_recipe_steps is not None: + _recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name) + if _recipe_step_pre is not None and _recipe_step_pre.provider: + step_provider = _recipe_step_pre.provider + logger.warning( + "step_provider_resolved_from_recipe", + step=step_name, + provider=step_provider, + ) + _profile, _env_dict = _resolve_provider_profile( step_name or "", tool_ctx.recipe_name or "", diff --git a/src/autoskillit/skills/sous-chef/SKILL.md b/src/autoskillit/skills/sous-chef/SKILL.md index 89195eebf8..4e623aefbf 100644 --- a/src/autoskillit/skills/sous-chef/SKILL.md +++ b/src/autoskillit/skills/sous-chef/SKILL.md @@ -467,7 +467,7 @@ parallel run of the same step reports under the same canonical step name. --- -## PARAMETER FORWARDING — output_dir, stale_threshold, idle_output_timeout +## PARAMETER FORWARDING — output_dir, stale_threshold, idle_output_timeout, step_provider When a recipe step's `with:` block contains `output_dir`, you MUST pass it as the `output_dir` parameter of `run_skill`. This controls the write guard — omitting it @@ -476,11 +476,18 @@ causes the session to write to the wrong directory. When a recipe step has a top-level `stale_threshold` or `idle_output_timeout` field, pass it as the corresponding `run_skill` parameter. These control session kill thresholds. +When a recipe step has a top-level `provider` field, pass the value as the +`step_provider` parameter of `run_skill`. This controls which LLM provider +(e.g., Minimax, Bedrock) the session uses. Omitting it causes the session to +fall back to the default Anthropic provider, silently ignoring the recipe's +declared provider. + **Example:** ```yaml implement: tool: run_skill stale_threshold: 2400 + provider: minimax with: skill_command: "/implement ..." cwd: "${{ context.work_dir }}" @@ -488,7 +495,7 @@ implement: output_dir: "${{ context.work_dir }}/${{ context.autoskillit_temp }}" ``` -Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400)` +Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400, step_provider="minimax")` This provides defense-in-depth: the server resolves parameters server-side, AND the LLM is instructed to forward them. diff --git a/tests/contracts/CLAUDE.md b/tests/contracts/CLAUDE.md index 282a02dd28..90a1ad92c9 100644 --- a/tests/contracts/CLAUDE.md +++ b/tests/contracts/CLAUDE.md @@ -66,7 +66,7 @@ Protocol satisfaction, package gateway, and skill contract compliance tests. | `test_skill_transition_boundaries.py` | Contract tests for anti-confirmation instructions at SKILL.md transition boundaries | | `test_skill_yaml_validation.py` | Contract: YAML workflow examples embedded in SKILL.md files must be valid recipes | | `test_skill_format_compliance.py` | Universal contract: all bundled SKILL.md files have valid frontmatter | -| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout) to run_skill | +| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout, step_provider) to run_skill | | `test_sous_chef_quota_protocol.py` | Contract test: sous-chef SKILL.md must contain QUOTA WAIT PROTOCOL section | | `test_sous_chef_routing.py` | Contract tests for the CONTEXT LIMIT ROUTING section in sous-chef SKILL.md | | `test_sous_chef_scheduling.py` | Contract tests for the PARALLEL STEP SCHEDULING section in sous-chef SKILL.md | diff --git a/tests/contracts/test_sous_chef_parameter_forwarding.py b/tests/contracts/test_sous_chef_parameter_forwarding.py index a8d8f83caf..285d1995e6 100644 --- a/tests/contracts/test_sous_chef_parameter_forwarding.py +++ b/tests/contracts/test_sous_chef_parameter_forwarding.py @@ -17,3 +17,13 @@ def test_sous_chef_skill_md_mentions_output_dir(): "The LLM must be instructed to forward output_dir from recipe " "step with: blocks to run_skill." ) + + +def test_sous_chef_skill_md_mentions_step_provider(): + """Sous-chef SKILL.md must instruct the LLM to forward step_provider.""" + skill_md = Path("src/autoskillit/skills/sous-chef/SKILL.md").read_text() + assert "step_provider" in skill_md, ( + "sous-chef SKILL.md does not mention step_provider. " + "The LLM must be instructed to forward step_provider from recipe " + "step provider: fields to run_skill." + ) diff --git a/tests/server/CLAUDE.md b/tests/server/CLAUDE.md index 1366d621bd..142fe4e85d 100644 --- a/tests/server/CLAUDE.md +++ b/tests/server/CLAUDE.md @@ -67,7 +67,7 @@ Server tool handler unit tests — kitchen, execution, CI, clone, workspace tool | `test_tools_execution_response.py` | Contract tests: MCP tool response fields use correct enum types | | `test_tools_execution_results.py` | Tests for run_skill result shapes, failure paths, timing, flush telemetry, and gate checks | | `test_tools_execution_routing.py` | Tests for run_skill routing, executor delegation, and session skill management | -| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout auto-filled from cached recipe step definitions) | +| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout, step_provider auto-filled from cached recipe step definitions) | | `test_tools_execution_backend_mixing.py` | Integration tests for per-step backend mixing in run_skill() — Codex backend + ANTHROPIC_BASE_URL derives backend_override='claude-code' | | `test_tools_execution_write_prefix.py` | Tests for allowed_write_prefix computation decoupled from read_only | | `test_tools_git.py` | Tests for merge_worktree core flow: happy path, test gate, rebase abort, bypass prevention | diff --git a/tests/server/test_tools_execution_step_resolution.py b/tests/server/test_tools_execution_step_resolution.py index 065fee4503..ef616cb9e7 100644 --- a/tests/server/test_tools_execution_step_resolution.py +++ b/tests/server/test_tools_execution_step_resolution.py @@ -211,3 +211,108 @@ async def test_run_skill_auto_fills_relative_output_dir_from_recipe( expected = str(tmp_path / ".autoskillit" / "temp") + "/" assert executor.calls[0].allowed_write_prefix == expected assert executor.calls[0].allowed_write_prefixes == (expected,) + + +@pytest.mark.anyio +async def test_run_skill_resolves_step_provider_from_recipe_step( + tool_ctx_kitchen_open, monkeypatch, tmp_path +) -> None: + """step_provider auto-filled from RecipeStep.provider when caller omits it.""" + from autoskillit.recipe.schema import RecipeStep + from tests.fakes import InMemoryHeadlessExecutor + + executor = InMemoryHeadlessExecutor() + tool_ctx_kitchen_open.executor = executor + + step = RecipeStep(name="run_canaries", provider="minimax") + tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step} + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open) + monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True) + + captured_kwargs: dict = {} + + def spy(*args, **kwargs): + captured_kwargs.update(kwargs) + return ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"}) + + monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy) + + await run_skill( + "/eval-agent --agent-name test", + str(tmp_path), + step_name="run_canaries", + step_provider="", + ) + + assert captured_kwargs["step_provider"] == "minimax" + assert executor.calls[0].provider_name == "minimax" + + +@pytest.mark.anyio +async def test_run_skill_llm_step_provider_overrides_recipe_step( + tool_ctx_kitchen_open, monkeypatch, tmp_path +) -> None: + """Explicit caller step_provider must not be overridden by recipe step.""" + from autoskillit.recipe.schema import RecipeStep + from tests.fakes import InMemoryHeadlessExecutor + + executor = InMemoryHeadlessExecutor() + tool_ctx_kitchen_open.executor = executor + + step = RecipeStep(name="run_canaries", provider="minimax") + tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step} + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open) + monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True) + + captured_kwargs: dict = {} + + def spy(*args, **kwargs): + captured_kwargs.update(kwargs) + return ("bedrock", {"AWS_REGION": "us-east-1"}) + + monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy) + + await run_skill( + "/eval-agent --agent-name test", + str(tmp_path), + step_name="run_canaries", + step_provider="bedrock", + ) + + assert captured_kwargs["step_provider"] == "bedrock" + + +@pytest.mark.anyio +async def test_run_skill_logs_warning_when_step_provider_resolved_from_recipe( + tool_ctx_kitchen_open, monkeypatch, tmp_path +) -> None: + """Server-side step_provider resolution must log for observability.""" + import structlog.testing + + from autoskillit.recipe.schema import RecipeStep + from tests.fakes import InMemoryHeadlessExecutor + + executor = InMemoryHeadlessExecutor() + tool_ctx_kitchen_open.executor = executor + + step = RecipeStep(name="run_canaries", provider="minimax") + tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step} + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open) + monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True) + monkeypatch.setattr( + "autoskillit.server._guards._resolve_provider_profile", + lambda *a, **kw: ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"}), + ) + + with structlog.testing.capture_logs() as cap: + await run_skill( + "/eval-agent --agent-name test", + str(tmp_path), + step_name="run_canaries", + step_provider="", + ) + + resolved_events = [e for e in cap if e.get("event") == "step_provider_resolved_from_recipe"] + assert len(resolved_events) == 1 + assert resolved_events[0]["step"] == "run_canaries" + assert resolved_events[0]["provider"] == "minimax" From cf062e1f0b5891809a23d499856ee6a724ebdffa Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 19:26:51 -0700 Subject: [PATCH 2/5] fix(review): remove misleading executor.provider_name assertion in step_provider test The spy always returns a hardcoded ('minimax', ...) tuple, so the assertion executor.calls[0].provider_name == 'minimax' cannot distinguish whether auto-fill logic ran vs. the spy's hardcoded return. The meaningful check captured_kwargs['step_provider'] == 'minimax' is already present and sufficient. Co-Authored-By: Claude Sonnet 4.6 --- tests/server/test_tools_execution_step_resolution.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/server/test_tools_execution_step_resolution.py b/tests/server/test_tools_execution_step_resolution.py index ef616cb9e7..dac46599b3 100644 --- a/tests/server/test_tools_execution_step_resolution.py +++ b/tests/server/test_tools_execution_step_resolution.py @@ -245,7 +245,6 @@ def spy(*args, **kwargs): ) assert captured_kwargs["step_provider"] == "minimax" - assert executor.calls[0].provider_name == "minimax" @pytest.mark.anyio From c04d2f40e375da79988f09a65a3c4a3cc7ab9abf Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 19:45:27 -0700 Subject: [PATCH 3/5] fix(review): move step_provider auto-fill outside providers feature gate step_provider was the only recipe-step parameter whose server-side auto-fill was gated on is_feature_enabled('providers'). The analogous auto-fills for output_dir, stale_threshold, and idle_output_timeout live in the unconditional block at the bottom of run_skill. Move step_provider there for consistency, reusing the already-fetched _recipe_step variable. Co-Authored-By: Claude Sonnet 4.6 --- .../server/tools/tools_execution.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/autoskillit/server/tools/tools_execution.py b/src/autoskillit/server/tools/tools_execution.py index 7054487e88..684a921d29 100644 --- a/src/autoskillit/server/tools/tools_execution.py +++ b/src/autoskillit/server/tools/tools_execution.py @@ -371,16 +371,6 @@ async def run_skill( _resolve_provider_profile, ) - if not step_provider and step_name and tool_ctx.active_recipe_steps is not None: - _recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name) - if _recipe_step_pre is not None and _recipe_step_pre.provider: - step_provider = _recipe_step_pre.provider - logger.warning( - "step_provider_resolved_from_recipe", - step=step_name, - provider=step_provider, - ) - _profile, _env_dict = _resolve_provider_profile( step_name or "", tool_ctx.recipe_name or "", @@ -485,6 +475,14 @@ async def run_skill( value=idle_output_timeout, ) + if not step_provider and _recipe_step.provider: + step_provider = _recipe_step.provider + logger.warning( + "step_provider_resolved_from_recipe", + step=step_name, + provider=step_provider, + ) + write_watch_dirs: list[Path] = [] if output_dir: resolved_dir = Path(output_dir) From 8e8edc6a01141b637f4c60e9c6327d9f9fe72a7b Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 19:45:39 -0700 Subject: [PATCH 4/5] fix(review): use __file__-anchored path in test_sous_chef_skill_md_mentions_step_provider Path("src/...").read_text() is cwd-relative and raises FileNotFoundError when pytest is invoked from a non-root directory. Use Path(__file__).parents[2] to anchor the path to the test file's location. Co-Authored-By: Claude Sonnet 4.6 --- tests/contracts/test_sous_chef_parameter_forwarding.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/contracts/test_sous_chef_parameter_forwarding.py b/tests/contracts/test_sous_chef_parameter_forwarding.py index 285d1995e6..c1049f2701 100644 --- a/tests/contracts/test_sous_chef_parameter_forwarding.py +++ b/tests/contracts/test_sous_chef_parameter_forwarding.py @@ -21,7 +21,9 @@ def test_sous_chef_skill_md_mentions_output_dir(): def test_sous_chef_skill_md_mentions_step_provider(): """Sous-chef SKILL.md must instruct the LLM to forward step_provider.""" - skill_md = Path("src/autoskillit/skills/sous-chef/SKILL.md").read_text() + skill_md = ( + Path(__file__).parents[2] / "src/autoskillit/skills/sous-chef/SKILL.md" + ).read_text() assert "step_provider" in skill_md, ( "sous-chef SKILL.md does not mention step_provider. " "The LLM must be instructed to forward step_provider from recipe " From 77f693ab1c3bc65de818e783abb10bfa6c7e5fe2 Mon Sep 17 00:00:00 2001 From: Trecek Date: Sat, 30 May 2026 19:51:04 -0700 Subject: [PATCH 5/5] fix(review): correct step_provider auto-fill placement to pre-providers-gate The first fix placed the auto-fill in the post-gate unconditional block, but _resolve_provider_profile is called inside the gate, so step_provider must be filled before the gate check. Move it to the pre-gate position (after _cfg but before is_feature_enabled) so it runs unconditionally and the filled value reaches _resolve_provider_profile on the same execution path. Co-Authored-By: Claude Sonnet 4.6 --- .../server/tools/tools_execution.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/autoskillit/server/tools/tools_execution.py b/src/autoskillit/server/tools/tools_execution.py index 684a921d29..cac03cb03a 100644 --- a/src/autoskillit/server/tools/tools_execution.py +++ b/src/autoskillit/server/tools/tools_execution.py @@ -363,6 +363,16 @@ async def run_skill( ) _cfg = _get_config() + if not step_provider and step_name and tool_ctx.active_recipe_steps is not None: + _recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name) + if _recipe_step_pre is not None and _recipe_step_pre.provider: + step_provider = _recipe_step_pre.provider + logger.warning( + "step_provider_resolved_from_recipe", + step=step_name, + provider=step_provider, + ) + if is_feature_enabled( "providers", _cfg.features, experimental_enabled=_cfg.experimental_enabled ): @@ -475,14 +485,6 @@ async def run_skill( value=idle_output_timeout, ) - if not step_provider and _recipe_step.provider: - step_provider = _recipe_step.provider - logger.warning( - "step_provider_resolved_from_recipe", - step=step_name, - provider=step_provider, - ) - write_watch_dirs: list[Path] = [] if output_dir: resolved_dir = Path(output_dir)