From e580dc7704fed74c0cbf01b1de0f90423ced0ada Mon Sep 17 00:00:00 2001
From: Trecek <talontrecek@gmail.com>
Date: Sat, 30 May 2026 19:01:54 -0700
Subject: [PATCH 1/5] fix: auto-fill step_provider from RecipeStep.provider
 before _resolve_provider_profile

When run_skill is called with step_provider="" and a step_name that maps to a
RecipeStep with provider="minimax", the server now resolves step_provider
server-side BEFORE calling _resolve_provider_profile. This closes the 66-line
ordering gap where the late auto-fill block ran after provider resolution and
never included step_provider anyway.

Defense-in-depth: also updated sous-chef SKILL.md to instruct the LLM to
forward step_provider from recipe step provider: fields to run_skill.

Tests: 3 new tests in test_tools_execution_step_resolution.py covering
auto-fill, LLM-override precedence, and observability logging. 1 new
contract test in test_sous_chef_parameter_forwarding.py.

Closes #3366.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../server/tools/tools_execution.py           |  10 ++
 src/autoskillit/skills/sous-chef/SKILL.md     |  11 +-
 tests/contracts/CLAUDE.md                     |   2 +-
 .../test_sous_chef_parameter_forwarding.py    |  10 ++
 tests/server/CLAUDE.md                        |   2 +-
 .../test_tools_execution_step_resolution.py   | 105 ++++++++++++++++++
 6 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/src/autoskillit/server/tools/tools_execution.py b/src/autoskillit/server/tools/tools_execution.py
index 6a0e7224a2..7054487e88 100644
--- a/src/autoskillit/server/tools/tools_execution.py
+++ b/src/autoskillit/server/tools/tools_execution.py
@@ -371,6 +371,16 @@ async def run_skill(
                     _resolve_provider_profile,
                 )
 
+                if not step_provider and step_name and tool_ctx.active_recipe_steps is not None:
+                    _recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name)
+                    if _recipe_step_pre is not None and _recipe_step_pre.provider:
+                        step_provider = _recipe_step_pre.provider
+                        logger.warning(
+                            "step_provider_resolved_from_recipe",
+                            step=step_name,
+                            provider=step_provider,
+                        )
+
                 _profile, _env_dict = _resolve_provider_profile(
                     step_name or "",
                     tool_ctx.recipe_name or "",
diff --git a/src/autoskillit/skills/sous-chef/SKILL.md b/src/autoskillit/skills/sous-chef/SKILL.md
index 89195eebf8..4e623aefbf 100644
--- a/src/autoskillit/skills/sous-chef/SKILL.md
+++ b/src/autoskillit/skills/sous-chef/SKILL.md
@@ -467,7 +467,7 @@ parallel run of the same step reports under the same canonical step name.
 
 ---
 
-## PARAMETER FORWARDING — output_dir, stale_threshold, idle_output_timeout
+## PARAMETER FORWARDING — output_dir, stale_threshold, idle_output_timeout, step_provider
 
 When a recipe step's `with:` block contains `output_dir`, you MUST pass it as the
 `output_dir` parameter of `run_skill`. This controls the write guard — omitting it
@@ -476,11 +476,18 @@ causes the session to write to the wrong directory.
 When a recipe step has a top-level `stale_threshold` or `idle_output_timeout` field,
 pass it as the corresponding `run_skill` parameter. These control session kill thresholds.
 
+When a recipe step has a top-level `provider` field, pass the value as the
+`step_provider` parameter of `run_skill`. This controls which LLM provider
+(e.g., Minimax, Bedrock) the session uses. Omitting it causes the session to
+fall back to the default Anthropic provider, silently ignoring the recipe's
+declared provider.
+
 **Example:**
 ```yaml
 implement:
   tool: run_skill
   stale_threshold: 2400
+  provider: minimax
   with:
     skill_command: "/implement ..."
     cwd: "${{ context.work_dir }}"
@@ -488,7 +495,7 @@ implement:
     output_dir: "${{ context.work_dir }}/${{ context.autoskillit_temp }}"
 ```
 
-Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400)`
+Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400, step_provider="minimax")`
 
 This provides defense-in-depth: the server resolves parameters server-side, AND the LLM is instructed to forward them.
 
diff --git a/tests/contracts/CLAUDE.md b/tests/contracts/CLAUDE.md
index 282a02dd28..90a1ad92c9 100644
--- a/tests/contracts/CLAUDE.md
+++ b/tests/contracts/CLAUDE.md
@@ -66,7 +66,7 @@ Protocol satisfaction, package gateway, and skill contract compliance tests.
 | `test_skill_transition_boundaries.py` | Contract tests for anti-confirmation instructions at SKILL.md transition boundaries |
 | `test_skill_yaml_validation.py` | Contract: YAML workflow examples embedded in SKILL.md files must be valid recipes |
 | `test_skill_format_compliance.py` | Universal contract: all bundled SKILL.md files have valid frontmatter |
-| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout) to run_skill |
+| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout, step_provider) to run_skill |
 | `test_sous_chef_quota_protocol.py` | Contract test: sous-chef SKILL.md must contain QUOTA WAIT PROTOCOL section |
 | `test_sous_chef_routing.py` | Contract tests for the CONTEXT LIMIT ROUTING section in sous-chef SKILL.md |
 | `test_sous_chef_scheduling.py` | Contract tests for the PARALLEL STEP SCHEDULING section in sous-chef SKILL.md |
diff --git a/tests/contracts/test_sous_chef_parameter_forwarding.py b/tests/contracts/test_sous_chef_parameter_forwarding.py
index a8d8f83caf..285d1995e6 100644
--- a/tests/contracts/test_sous_chef_parameter_forwarding.py
+++ b/tests/contracts/test_sous_chef_parameter_forwarding.py
@@ -17,3 +17,13 @@ def test_sous_chef_skill_md_mentions_output_dir():
         "The LLM must be instructed to forward output_dir from recipe "
         "step with: blocks to run_skill."
     )
+
+
+def test_sous_chef_skill_md_mentions_step_provider():
+    """Sous-chef SKILL.md must instruct the LLM to forward step_provider."""
+    skill_md = Path("src/autoskillit/skills/sous-chef/SKILL.md").read_text()
+    assert "step_provider" in skill_md, (
+        "sous-chef SKILL.md does not mention step_provider. "
+        "The LLM must be instructed to forward step_provider from recipe "
+        "step provider: fields to run_skill."
+    )
diff --git a/tests/server/CLAUDE.md b/tests/server/CLAUDE.md
index 1366d621bd..142fe4e85d 100644
--- a/tests/server/CLAUDE.md
+++ b/tests/server/CLAUDE.md
@@ -67,7 +67,7 @@ Server tool handler unit tests — kitchen, execution, CI, clone, workspace tool
 | `test_tools_execution_response.py` | Contract tests: MCP tool response fields use correct enum types |
 | `test_tools_execution_results.py` | Tests for run_skill result shapes, failure paths, timing, flush telemetry, and gate checks |
 | `test_tools_execution_routing.py` | Tests for run_skill routing, executor delegation, and session skill management |
-| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout auto-filled from cached recipe step definitions) |
+| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout, step_provider auto-filled from cached recipe step definitions) |
 | `test_tools_execution_backend_mixing.py` | Integration tests for per-step backend mixing in run_skill() — Codex backend + ANTHROPIC_BASE_URL derives backend_override='claude-code' |
 | `test_tools_execution_write_prefix.py` | Tests for allowed_write_prefix computation decoupled from read_only |
 | `test_tools_git.py` | Tests for merge_worktree core flow: happy path, test gate, rebase abort, bypass prevention |
diff --git a/tests/server/test_tools_execution_step_resolution.py b/tests/server/test_tools_execution_step_resolution.py
index 065fee4503..ef616cb9e7 100644
--- a/tests/server/test_tools_execution_step_resolution.py
+++ b/tests/server/test_tools_execution_step_resolution.py
@@ -211,3 +211,108 @@ async def test_run_skill_auto_fills_relative_output_dir_from_recipe(
     expected = str(tmp_path / ".autoskillit" / "temp") + "/"
     assert executor.calls[0].allowed_write_prefix == expected
     assert executor.calls[0].allowed_write_prefixes == (expected,)
+
+
+@pytest.mark.anyio
+async def test_run_skill_resolves_step_provider_from_recipe_step(
+    tool_ctx_kitchen_open, monkeypatch, tmp_path
+) -> None:
+    """step_provider auto-filled from RecipeStep.provider when caller omits it."""
+    from autoskillit.recipe.schema import RecipeStep
+    from tests.fakes import InMemoryHeadlessExecutor
+
+    executor = InMemoryHeadlessExecutor()
+    tool_ctx_kitchen_open.executor = executor
+
+    step = RecipeStep(name="run_canaries", provider="minimax")
+    tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step}
+    monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open)
+    monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True)
+
+    captured_kwargs: dict = {}
+
+    def spy(*args, **kwargs):
+        captured_kwargs.update(kwargs)
+        return ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"})
+
+    monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy)
+
+    await run_skill(
+        "/eval-agent --agent-name test",
+        str(tmp_path),
+        step_name="run_canaries",
+        step_provider="",
+    )
+
+    assert captured_kwargs["step_provider"] == "minimax"
+    assert executor.calls[0].provider_name == "minimax"
+
+
+@pytest.mark.anyio
+async def test_run_skill_llm_step_provider_overrides_recipe_step(
+    tool_ctx_kitchen_open, monkeypatch, tmp_path
+) -> None:
+    """Explicit caller step_provider must not be overridden by recipe step."""
+    from autoskillit.recipe.schema import RecipeStep
+    from tests.fakes import InMemoryHeadlessExecutor
+
+    executor = InMemoryHeadlessExecutor()
+    tool_ctx_kitchen_open.executor = executor
+
+    step = RecipeStep(name="run_canaries", provider="minimax")
+    tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step}
+    monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open)
+    monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True)
+
+    captured_kwargs: dict = {}
+
+    def spy(*args, **kwargs):
+        captured_kwargs.update(kwargs)
+        return ("bedrock", {"AWS_REGION": "us-east-1"})
+
+    monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy)
+
+    await run_skill(
+        "/eval-agent --agent-name test",
+        str(tmp_path),
+        step_name="run_canaries",
+        step_provider="bedrock",
+    )
+
+    assert captured_kwargs["step_provider"] == "bedrock"
+
+
+@pytest.mark.anyio
+async def test_run_skill_logs_warning_when_step_provider_resolved_from_recipe(
+    tool_ctx_kitchen_open, monkeypatch, tmp_path
+) -> None:
+    """Server-side step_provider resolution must log for observability."""
+    import structlog.testing
+
+    from autoskillit.recipe.schema import RecipeStep
+    from tests.fakes import InMemoryHeadlessExecutor
+
+    executor = InMemoryHeadlessExecutor()
+    tool_ctx_kitchen_open.executor = executor
+
+    step = RecipeStep(name="run_canaries", provider="minimax")
+    tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step}
+    monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open)
+    monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True)
+    monkeypatch.setattr(
+        "autoskillit.server._guards._resolve_provider_profile",
+        lambda *a, **kw: ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"}),
+    )
+
+    with structlog.testing.capture_logs() as cap:
+        await run_skill(
+            "/eval-agent --agent-name test",
+            str(tmp_path),
+            step_name="run_canaries",
+            step_provider="",
+        )
+
+    resolved_events = [e for e in cap if e.get("event") == "step_provider_resolved_from_recipe"]
+    assert len(resolved_events) == 1
+    assert resolved_events[0]["step"] == "run_canaries"
+    assert resolved_events[0]["provider"] == "minimax"

From cf062e1f0b5891809a23d499856ee6a724ebdffa Mon Sep 17 00:00:00 2001
From: Trecek <talontrecek@gmail.com>
Date: Sat, 30 May 2026 19:26:51 -0700
Subject: [PATCH 2/5] fix(review): remove misleading executor.provider_name
 assertion in step_provider test

The spy always returns a hardcoded ('minimax', ...) tuple, so the assertion
executor.calls[0].provider_name == 'minimax' cannot distinguish whether
auto-fill logic ran vs. the spy's hardcoded return.  The meaningful check
captured_kwargs['step_provider'] == 'minimax' is already present and sufficient.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/server/test_tools_execution_step_resolution.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/server/test_tools_execution_step_resolution.py b/tests/server/test_tools_execution_step_resolution.py
index ef616cb9e7..dac46599b3 100644
--- a/tests/server/test_tools_execution_step_resolution.py
+++ b/tests/server/test_tools_execution_step_resolution.py
@@ -245,7 +245,6 @@ def spy(*args, **kwargs):
     )
 
     assert captured_kwargs["step_provider"] == "minimax"
-    assert executor.calls[0].provider_name == "minimax"
 
 
 @pytest.mark.anyio

From c04d2f40e375da79988f09a65a3c4a3cc7ab9abf Mon Sep 17 00:00:00 2001
From: Trecek <talontrecek@gmail.com>
Date: Sat, 30 May 2026 19:45:27 -0700
Subject: [PATCH 3/5] fix(review): move step_provider auto-fill outside
 providers feature gate

step_provider was the only recipe-step parameter whose server-side auto-fill
was gated on is_feature_enabled('providers'). The analogous auto-fills for
output_dir, stale_threshold, and idle_output_timeout live in the unconditional
block at the bottom of run_skill. Move step_provider there for consistency,
reusing the already-fetched _recipe_step variable.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../server/tools/tools_execution.py            | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/autoskillit/server/tools/tools_execution.py b/src/autoskillit/server/tools/tools_execution.py
index 7054487e88..684a921d29 100644
--- a/src/autoskillit/server/tools/tools_execution.py
+++ b/src/autoskillit/server/tools/tools_execution.py
@@ -371,16 +371,6 @@ async def run_skill(
                     _resolve_provider_profile,
                 )
 
-                if not step_provider and step_name and tool_ctx.active_recipe_steps is not None:
-                    _recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name)
-                    if _recipe_step_pre is not None and _recipe_step_pre.provider:
-                        step_provider = _recipe_step_pre.provider
-                        logger.warning(
-                            "step_provider_resolved_from_recipe",
-                            step=step_name,
-                            provider=step_provider,
-                        )
-
                 _profile, _env_dict = _resolve_provider_profile(
                     step_name or "",
                     tool_ctx.recipe_name or "",
@@ -485,6 +475,14 @@ async def run_skill(
                             value=idle_output_timeout,
                         )
 
+                    if not step_provider and _recipe_step.provider:
+                        step_provider = _recipe_step.provider
+                        logger.warning(
+                            "step_provider_resolved_from_recipe",
+                            step=step_name,
+                            provider=step_provider,
+                        )
+
             write_watch_dirs: list[Path] = []
             if output_dir:
                 resolved_dir = Path(output_dir)

From 8e8edc6a01141b637f4c60e9c6327d9f9fe72a7b Mon Sep 17 00:00:00 2001
From: Trecek <talontrecek@gmail.com>
Date: Sat, 30 May 2026 19:45:39 -0700
Subject: [PATCH 4/5] fix(review): use __file__-anchored path in
 test_sous_chef_skill_md_mentions_step_provider

Path("src/...").read_text() is cwd-relative and raises FileNotFoundError when
pytest is invoked from a non-root directory. Use Path(__file__).parents[2] to
anchor the path to the test file's location.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/contracts/test_sous_chef_parameter_forwarding.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/contracts/test_sous_chef_parameter_forwarding.py b/tests/contracts/test_sous_chef_parameter_forwarding.py
index 285d1995e6..c1049f2701 100644
--- a/tests/contracts/test_sous_chef_parameter_forwarding.py
+++ b/tests/contracts/test_sous_chef_parameter_forwarding.py
@@ -21,7 +21,9 @@ def test_sous_chef_skill_md_mentions_output_dir():
 
 def test_sous_chef_skill_md_mentions_step_provider():
     """Sous-chef SKILL.md must instruct the LLM to forward step_provider."""
-    skill_md = Path("src/autoskillit/skills/sous-chef/SKILL.md").read_text()
+    skill_md = (
+        Path(__file__).parents[2] / "src/autoskillit/skills/sous-chef/SKILL.md"
+    ).read_text()
     assert "step_provider" in skill_md, (
         "sous-chef SKILL.md does not mention step_provider. "
         "The LLM must be instructed to forward step_provider from recipe "

From 77f693ab1c3bc65de818e783abb10bfa6c7e5fe2 Mon Sep 17 00:00:00 2001
From: Trecek <talontrecek@gmail.com>
Date: Sat, 30 May 2026 19:51:04 -0700
Subject: [PATCH 5/5] fix(review): correct step_provider auto-fill placement to
 pre-providers-gate

The first fix placed the auto-fill in the post-gate unconditional block, but
_resolve_provider_profile is called inside the gate, so step_provider must be
filled before the gate check. Move it to the pre-gate position (after _cfg but
before is_feature_enabled) so it runs unconditionally and the filled value
reaches _resolve_provider_profile on the same execution path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../server/tools/tools_execution.py            | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/autoskillit/server/tools/tools_execution.py b/src/autoskillit/server/tools/tools_execution.py
index 684a921d29..cac03cb03a 100644
--- a/src/autoskillit/server/tools/tools_execution.py
+++ b/src/autoskillit/server/tools/tools_execution.py
@@ -363,6 +363,16 @@ async def run_skill(
             )
 
             _cfg = _get_config()
+            if not step_provider and step_name and tool_ctx.active_recipe_steps is not None:
+                _recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name)
+                if _recipe_step_pre is not None and _recipe_step_pre.provider:
+                    step_provider = _recipe_step_pre.provider
+                    logger.warning(
+                        "step_provider_resolved_from_recipe",
+                        step=step_name,
+                        provider=step_provider,
+                    )
+
             if is_feature_enabled(
                 "providers", _cfg.features, experimental_enabled=_cfg.experimental_enabled
             ):
@@ -475,14 +485,6 @@ async def run_skill(
                             value=idle_output_timeout,
                         )
 
-                    if not step_provider and _recipe_step.provider:
-                        step_provider = _recipe_step.provider
-                        logger.warning(
-                            "step_provider_resolved_from_recipe",
-                            step=step_name,
-                            provider=step_provider,
-                        )
-
             write_watch_dirs: list[Path] = []
             if output_dir:
                 resolved_dir = Path(output_dir)