diff --git a/src/autoskillit/server/tools/tools_execution.py b/src/autoskillit/server/tools/tools_execution.py index 6a0e7224a2..cac03cb03a 100644 --- a/src/autoskillit/server/tools/tools_execution.py +++ b/src/autoskillit/server/tools/tools_execution.py @@ -363,6 +363,16 @@ async def run_skill( ) _cfg = _get_config() + if not step_provider and step_name and tool_ctx.active_recipe_steps is not None: + _recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name) + if _recipe_step_pre is not None and _recipe_step_pre.provider: + step_provider = _recipe_step_pre.provider + logger.warning( + "step_provider_resolved_from_recipe", + step=step_name, + provider=step_provider, + ) + if is_feature_enabled( "providers", _cfg.features, experimental_enabled=_cfg.experimental_enabled ): diff --git a/src/autoskillit/skills/sous-chef/SKILL.md b/src/autoskillit/skills/sous-chef/SKILL.md index 89195eebf8..4e623aefbf 100644 --- a/src/autoskillit/skills/sous-chef/SKILL.md +++ b/src/autoskillit/skills/sous-chef/SKILL.md @@ -467,7 +467,7 @@ parallel run of the same step reports under the same canonical step name. --- -## PARAMETER FORWARDING — output_dir, stale_threshold, idle_output_timeout +## PARAMETER FORWARDING — output_dir, stale_threshold, idle_output_timeout, step_provider When a recipe step's `with:` block contains `output_dir`, you MUST pass it as the `output_dir` parameter of `run_skill`. This controls the write guard — omitting it @@ -476,11 +476,18 @@ causes the session to write to the wrong directory. When a recipe step has a top-level `stale_threshold` or `idle_output_timeout` field, pass it as the corresponding `run_skill` parameter. These control session kill thresholds. +When a recipe step has a top-level `provider` field, pass the value as the +`step_provider` parameter of `run_skill`. This controls which LLM provider +(e.g., Minimax, Bedrock) the session uses. Omitting it causes the session to +fall back to the default Anthropic provider, silently ignoring the recipe's +declared provider. + **Example:** ```yaml implement: tool: run_skill stale_threshold: 2400 + provider: minimax with: skill_command: "/implement ..." cwd: "${{ context.work_dir }}" @@ -488,7 +495,7 @@ implement: output_dir: "${{ context.work_dir }}/${{ context.autoskillit_temp }}" ``` -Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400)` +Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400, step_provider="minimax")` This provides defense-in-depth: the server resolves parameters server-side, AND the LLM is instructed to forward them. diff --git a/tests/contracts/CLAUDE.md b/tests/contracts/CLAUDE.md index 282a02dd28..90a1ad92c9 100644 --- a/tests/contracts/CLAUDE.md +++ b/tests/contracts/CLAUDE.md @@ -66,7 +66,7 @@ Protocol satisfaction, package gateway, and skill contract compliance tests. | `test_skill_transition_boundaries.py` | Contract tests for anti-confirmation instructions at SKILL.md transition boundaries | | `test_skill_yaml_validation.py` | Contract: YAML workflow examples embedded in SKILL.md files must be valid recipes | | `test_skill_format_compliance.py` | Universal contract: all bundled SKILL.md files have valid frontmatter | -| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout) to run_skill | +| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout, step_provider) to run_skill | | `test_sous_chef_quota_protocol.py` | Contract test: sous-chef SKILL.md must contain QUOTA WAIT PROTOCOL section | | `test_sous_chef_routing.py` | Contract tests for the CONTEXT LIMIT ROUTING section in sous-chef SKILL.md | | `test_sous_chef_scheduling.py` | Contract tests for the PARALLEL STEP SCHEDULING section in sous-chef SKILL.md | diff --git a/tests/contracts/test_sous_chef_parameter_forwarding.py b/tests/contracts/test_sous_chef_parameter_forwarding.py index a8d8f83caf..c1049f2701 100644 --- a/tests/contracts/test_sous_chef_parameter_forwarding.py +++ b/tests/contracts/test_sous_chef_parameter_forwarding.py @@ -17,3 +17,15 @@ def test_sous_chef_skill_md_mentions_output_dir(): "The LLM must be instructed to forward output_dir from recipe " "step with: blocks to run_skill." ) + + +def test_sous_chef_skill_md_mentions_step_provider(): + """Sous-chef SKILL.md must instruct the LLM to forward step_provider.""" + skill_md = ( + Path(__file__).parents[2] / "src/autoskillit/skills/sous-chef/SKILL.md" + ).read_text() + assert "step_provider" in skill_md, ( + "sous-chef SKILL.md does not mention step_provider. " + "The LLM must be instructed to forward step_provider from recipe " + "step provider: fields to run_skill." + ) diff --git a/tests/server/CLAUDE.md b/tests/server/CLAUDE.md index 1366d621bd..142fe4e85d 100644 --- a/tests/server/CLAUDE.md +++ b/tests/server/CLAUDE.md @@ -67,7 +67,7 @@ Server tool handler unit tests — kitchen, execution, CI, clone, workspace tool | `test_tools_execution_response.py` | Contract tests: MCP tool response fields use correct enum types | | `test_tools_execution_results.py` | Tests for run_skill result shapes, failure paths, timing, flush telemetry, and gate checks | | `test_tools_execution_routing.py` | Tests for run_skill routing, executor delegation, and session skill management | -| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout auto-filled from cached recipe step definitions) | +| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout, step_provider auto-filled from cached recipe step definitions) | | `test_tools_execution_backend_mixing.py` | Integration tests for per-step backend mixing in run_skill() — Codex backend + ANTHROPIC_BASE_URL derives backend_override='claude-code' | | `test_tools_execution_write_prefix.py` | Tests for allowed_write_prefix computation decoupled from read_only | | `test_tools_git.py` | Tests for merge_worktree core flow: happy path, test gate, rebase abort, bypass prevention | diff --git a/tests/server/test_tools_execution_step_resolution.py b/tests/server/test_tools_execution_step_resolution.py index 065fee4503..dac46599b3 100644 --- a/tests/server/test_tools_execution_step_resolution.py +++ b/tests/server/test_tools_execution_step_resolution.py @@ -211,3 +211,107 @@ async def test_run_skill_auto_fills_relative_output_dir_from_recipe( expected = str(tmp_path / ".autoskillit" / "temp") + "/" assert executor.calls[0].allowed_write_prefix == expected assert executor.calls[0].allowed_write_prefixes == (expected,) + + +@pytest.mark.anyio +async def test_run_skill_resolves_step_provider_from_recipe_step( + tool_ctx_kitchen_open, monkeypatch, tmp_path +) -> None: + """step_provider auto-filled from RecipeStep.provider when caller omits it.""" + from autoskillit.recipe.schema import RecipeStep + from tests.fakes import InMemoryHeadlessExecutor + + executor = InMemoryHeadlessExecutor() + tool_ctx_kitchen_open.executor = executor + + step = RecipeStep(name="run_canaries", provider="minimax") + tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step} + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open) + monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True) + + captured_kwargs: dict = {} + + def spy(*args, **kwargs): + captured_kwargs.update(kwargs) + return ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"}) + + monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy) + + await run_skill( + "/eval-agent --agent-name test", + str(tmp_path), + step_name="run_canaries", + step_provider="", + ) + + assert captured_kwargs["step_provider"] == "minimax" + + +@pytest.mark.anyio +async def test_run_skill_llm_step_provider_overrides_recipe_step( + tool_ctx_kitchen_open, monkeypatch, tmp_path +) -> None: + """Explicit caller step_provider must not be overridden by recipe step.""" + from autoskillit.recipe.schema import RecipeStep + from tests.fakes import InMemoryHeadlessExecutor + + executor = InMemoryHeadlessExecutor() + tool_ctx_kitchen_open.executor = executor + + step = RecipeStep(name="run_canaries", provider="minimax") + tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step} + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open) + monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True) + + captured_kwargs: dict = {} + + def spy(*args, **kwargs): + captured_kwargs.update(kwargs) + return ("bedrock", {"AWS_REGION": "us-east-1"}) + + monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy) + + await run_skill( + "/eval-agent --agent-name test", + str(tmp_path), + step_name="run_canaries", + step_provider="bedrock", + ) + + assert captured_kwargs["step_provider"] == "bedrock" + + +@pytest.mark.anyio +async def test_run_skill_logs_warning_when_step_provider_resolved_from_recipe( + tool_ctx_kitchen_open, monkeypatch, tmp_path +) -> None: + """Server-side step_provider resolution must log for observability.""" + import structlog.testing + + from autoskillit.recipe.schema import RecipeStep + from tests.fakes import InMemoryHeadlessExecutor + + executor = InMemoryHeadlessExecutor() + tool_ctx_kitchen_open.executor = executor + + step = RecipeStep(name="run_canaries", provider="minimax") + tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step} + monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open) + monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True) + monkeypatch.setattr( + "autoskillit.server._guards._resolve_provider_profile", + lambda *a, **kw: ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"}), + ) + + with structlog.testing.capture_logs() as cap: + await run_skill( + "/eval-agent --agent-name test", + str(tmp_path), + step_name="run_canaries", + step_provider="", + ) + + resolved_events = [e for e in cap if e.get("event") == "step_provider_resolved_from_recipe"] + assert len(resolved_events) == 1 + assert resolved_events[0]["step"] == "run_canaries" + assert resolved_events[0]["provider"] == "minimax"