Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/autoskillit/server/tools/tools_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,16 @@ async def run_skill(
)

_cfg = _get_config()
if not step_provider and step_name and tool_ctx.active_recipe_steps is not None:
_recipe_step_pre = tool_ctx.active_recipe_steps.get(step_name)
if _recipe_step_pre is not None and _recipe_step_pre.provider:
step_provider = _recipe_step_pre.provider
logger.warning(
"step_provider_resolved_from_recipe",
step=step_name,
provider=step_provider,
)

if is_feature_enabled(
"providers", _cfg.features, experimental_enabled=_cfg.experimental_enabled
):
Expand Down
11 changes: 9 additions & 2 deletions src/autoskillit/skills/sous-chef/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ parallel run of the same step reports under the same canonical step name.

---

## PARAMETER FORWARDING β€” output_dir, stale_threshold, idle_output_timeout
## PARAMETER FORWARDING β€” output_dir, stale_threshold, idle_output_timeout, step_provider

When a recipe step's `with:` block contains `output_dir`, you MUST pass it as the
`output_dir` parameter of `run_skill`. This controls the write guard β€” omitting it
Expand All @@ -476,19 +476,26 @@ causes the session to write to the wrong directory.
When a recipe step has a top-level `stale_threshold` or `idle_output_timeout` field,
pass it as the corresponding `run_skill` parameter. These control session kill thresholds.

When a recipe step has a top-level `provider` field, pass the value as the
`step_provider` parameter of `run_skill`. This controls which LLM provider
(e.g., Minimax, Bedrock) the session uses. Omitting it causes the session to
fall back to the default Anthropic provider, silently ignoring the recipe's
declared provider.

**Example:**
```yaml
implement:
tool: run_skill
stale_threshold: 2400
provider: minimax
with:
skill_command: "/implement ..."
cwd: "${{ context.work_dir }}"
step_name: implement
output_dir: "${{ context.work_dir }}/${{ context.autoskillit_temp }}"
```

Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400)`
Call: `run_skill(skill_command=..., cwd=..., step_name="implement", output_dir="...", stale_threshold=2400, step_provider="minimax")`

This provides defense-in-depth: the server resolves parameters server-side, AND the LLM is instructed to forward them.

Expand Down
2 changes: 1 addition & 1 deletion tests/contracts/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Protocol satisfaction, package gateway, and skill contract compliance tests.
| `test_skill_transition_boundaries.py` | Contract tests for anti-confirmation instructions at SKILL.md transition boundaries |
| `test_skill_yaml_validation.py` | Contract: YAML workflow examples embedded in SKILL.md files must be valid recipes |
| `test_skill_format_compliance.py` | Universal contract: all bundled SKILL.md files have valid frontmatter |
| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout) to run_skill |
| `test_sous_chef_parameter_forwarding.py` | Architectural contract: sous-chef SKILL.md must instruct the LLM to forward recipe step parameters (output_dir, stale_threshold, idle_output_timeout, step_provider) to run_skill |
| `test_sous_chef_quota_protocol.py` | Contract test: sous-chef SKILL.md must contain QUOTA WAIT PROTOCOL section |
| `test_sous_chef_routing.py` | Contract tests for the CONTEXT LIMIT ROUTING section in sous-chef SKILL.md |
| `test_sous_chef_scheduling.py` | Contract tests for the PARALLEL STEP SCHEDULING section in sous-chef SKILL.md |
Expand Down
12 changes: 12 additions & 0 deletions tests/contracts/test_sous_chef_parameter_forwarding.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,15 @@ def test_sous_chef_skill_md_mentions_output_dir():
"The LLM must be instructed to forward output_dir from recipe "
"step with: blocks to run_skill."
)


def test_sous_chef_skill_md_mentions_step_provider():
"""Sous-chef SKILL.md must instruct the LLM to forward step_provider."""
skill_md = (
Path(__file__).parents[2] / "src/autoskillit/skills/sous-chef/SKILL.md"
).read_text()
assert "step_provider" in skill_md, (
"sous-chef SKILL.md does not mention step_provider. "
"The LLM must be instructed to forward step_provider from recipe "
"step provider: fields to run_skill."
)
2 changes: 1 addition & 1 deletion tests/server/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Server tool handler unit tests β€” kitchen, execution, CI, clone, workspace tool
| `test_tools_execution_response.py` | Contract tests: MCP tool response fields use correct enum types |
| `test_tools_execution_results.py` | Tests for run_skill result shapes, failure paths, timing, flush telemetry, and gate checks |
| `test_tools_execution_routing.py` | Tests for run_skill routing, executor delegation, and session skill management |
| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout auto-filled from cached recipe step definitions) |
| `test_tools_execution_step_resolution.py` | Tests for server-side recipe step parameter resolution in run_skill (output_dir, stale_threshold, idle_output_timeout, step_provider auto-filled from cached recipe step definitions) |
| `test_tools_execution_backend_mixing.py` | Integration tests for per-step backend mixing in run_skill() β€” Codex backend + ANTHROPIC_BASE_URL derives backend_override='claude-code' |
| `test_tools_execution_write_prefix.py` | Tests for allowed_write_prefix computation decoupled from read_only |
| `test_tools_git.py` | Tests for merge_worktree core flow: happy path, test gate, rebase abort, bypass prevention |
Expand Down
104 changes: 104 additions & 0 deletions tests/server/test_tools_execution_step_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,107 @@ async def test_run_skill_auto_fills_relative_output_dir_from_recipe(
expected = str(tmp_path / ".autoskillit" / "temp") + "/"
assert executor.calls[0].allowed_write_prefix == expected
assert executor.calls[0].allowed_write_prefixes == (expected,)


@pytest.mark.anyio
async def test_run_skill_resolves_step_provider_from_recipe_step(
tool_ctx_kitchen_open, monkeypatch, tmp_path
) -> None:
"""step_provider auto-filled from RecipeStep.provider when caller omits it."""
from autoskillit.recipe.schema import RecipeStep
from tests.fakes import InMemoryHeadlessExecutor

executor = InMemoryHeadlessExecutor()
tool_ctx_kitchen_open.executor = executor

step = RecipeStep(name="run_canaries", provider="minimax")
tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step}
monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open)
monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True)

captured_kwargs: dict = {}

def spy(*args, **kwargs):
captured_kwargs.update(kwargs)
return ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"})

monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy)

await run_skill(
"/eval-agent --agent-name test",
str(tmp_path),
step_name="run_canaries",
step_provider="",
)

assert captured_kwargs["step_provider"] == "minimax"


@pytest.mark.anyio
async def test_run_skill_llm_step_provider_overrides_recipe_step(
tool_ctx_kitchen_open, monkeypatch, tmp_path
) -> None:
"""Explicit caller step_provider must not be overridden by recipe step."""
from autoskillit.recipe.schema import RecipeStep
from tests.fakes import InMemoryHeadlessExecutor

executor = InMemoryHeadlessExecutor()
tool_ctx_kitchen_open.executor = executor

step = RecipeStep(name="run_canaries", provider="minimax")
tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step}
monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open)
monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True)

captured_kwargs: dict = {}

def spy(*args, **kwargs):
captured_kwargs.update(kwargs)
return ("bedrock", {"AWS_REGION": "us-east-1"})

monkeypatch.setattr("autoskillit.server._guards._resolve_provider_profile", spy)

await run_skill(
"/eval-agent --agent-name test",
str(tmp_path),
step_name="run_canaries",
step_provider="bedrock",
)

assert captured_kwargs["step_provider"] == "bedrock"


@pytest.mark.anyio
async def test_run_skill_logs_warning_when_step_provider_resolved_from_recipe(
tool_ctx_kitchen_open, monkeypatch, tmp_path
) -> None:
"""Server-side step_provider resolution must log for observability."""
import structlog.testing

from autoskillit.recipe.schema import RecipeStep
from tests.fakes import InMemoryHeadlessExecutor

executor = InMemoryHeadlessExecutor()
tool_ctx_kitchen_open.executor = executor

step = RecipeStep(name="run_canaries", provider="minimax")
tool_ctx_kitchen_open.active_recipe_steps = {"run_canaries": step}
monkeypatch.setattr("autoskillit.server._ctx", tool_ctx_kitchen_open)
monkeypatch.setattr("autoskillit.core.is_feature_enabled", lambda *a, **kw: True)
monkeypatch.setattr(
"autoskillit.server._guards._resolve_provider_profile",
lambda *a, **kw: ("minimax", {"ANTHROPIC_BASE_URL": "https://api.minimax.chat/v1"}),
)

with structlog.testing.capture_logs() as cap:
await run_skill(
"/eval-agent --agent-name test",
str(tmp_path),
step_name="run_canaries",
step_provider="",
)

resolved_events = [e for e in cap if e.get("event") == "step_provider_resolved_from_recipe"]
assert len(resolved_events) == 1
assert resolved_events[0]["step"] == "run_canaries"
assert resolved_events[0]["provider"] == "minimax"
Loading