diff --git a/config.example.yaml b/config.example.yaml index df519d0..f494f8d 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -20,6 +20,15 @@ agent: title_model: claude-haiku-4-5-20251001 # Session title generation max_turns: 50 # Max agentic turns per request max_concurrent: 4 # Max concurrent agent sessions + thinking: max # Thinking budget for the main model + effort: max # Reasoning effort for the main model + # Cron / hook sessions use the cron_model (Sonnet). Under Claude OAuth + # (subscription) Sonnet caps thinking budget and rejects "max" with + # level "max" not supported, valid levels: low, medium, high + # Keep these at "high" unless your provider accepts "max" for the + # cron model. + cron_thinking: high + cron_effort: high # First-prompt rewrite — the web UI can refine the opening message of a # new chat with a fast model, preview it, and send only after approval. prompt_rewrite: diff --git a/docs/config.md b/docs/config.md index f24c5b3..aa92843 100644 --- a/docs/config.md +++ b/docs/config.md @@ -40,6 +40,10 @@ from any working directory: | `agent.cron_model` | string | `claude-sonnet-4-6` | Model for cron jobs (cheaper) | | `agent.max_turns` | int | `50` | Max agentic turns per request | | `agent.max_concurrent` | int | `4` | Max concurrent agent sessions | +| `agent.thinking` | string | `max` | Thinking budget for the main model: `max` / `high` / `medium` / `low` / `disabled` / `adaptive` / explicit token count | +| `agent.effort` | string | `max` | Reasoning effort for the main model: `max` / `high` / `medium` / `low` | +| `agent.cron_thinking` | string | `high` | Thinking budget for cron and hook sessions (which run on `cron_model`). Claude OAuth caps non-flagship models at `high` and rejects `max` with `level "max" not supported` — keep this at `high` unless you're using a provider that accepts `max` for the cron model. | +| `agent.cron_effort` | string | `high` | Reasoning effort for cron and hook sessions. Same caveat as `cron_thinking`. | | `agent.prompt_rewrite.enabled` | bool | `true` | Offer the first-prompt rewrite feature in the web UI (per-user toggle lives in the composer) | | `agent.prompt_rewrite.model` | string | `""` | Model for prompt rewriting (empty = `agent.model`, the chat model) | | `agent.prompt_rewrite.max_tokens` | int | `1024` | Max tokens for the rewritten prompt | diff --git a/nerve/agent/engine.py b/nerve/agent/engine.py index 0ae31e5..aa34088 100644 --- a/nerve/agent/engine.py +++ b/nerve/agent/engine.py @@ -70,6 +70,29 @@ _SURROGATE_RE = re.compile(r"[\ud800-\udfff]") +def _select_thinking_effort(agent_config: Any, source: str) -> tuple[str, str]: + """Pick (thinking, effort) settings for a session based on its source. + + Cron and hook sessions run on `cron_model` (typically Sonnet), which + under Claude OAuth (subscription) does not accept `level=max` for + thinking/effort and rejects requests with + `level "max" not supported, valid levels: low, medium, high`. Use + the dedicated `cron_*` overrides for those sources so cron jobs + don't get blocked while interactive sessions keep their full + thinking budget. + """ + is_cron_like = source in ("cron", "hook") + thinking_value = ( + agent_config.cron_thinking if is_cron_like + else agent_config.thinking + ) + effort_value = ( + agent_config.cron_effort if is_cron_like + else agent_config.effort + ) + return thinking_value, effort_value + + # Anthropic API image limits _MAX_IMAGE_BYTES = 5 * 1024 * 1024 # 5 MB _IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp"} @@ -965,12 +988,15 @@ def _build_options( else: system_prompt = system_prompt_str + thinking_value, effort_value = _select_thinking_effort( + self.config.agent, source, + ) thinking_config = self._parse_thinking_config( - self.config.agent.thinking, + thinking_value, model or self.config.agent.model, ) effort = self._effective_effort( - self.config.agent.effort, + effort_value, model or self.config.agent.model, ) betas = ( diff --git a/nerve/bootstrap.py b/nerve/bootstrap.py index 387c168..733e500 100644 --- a/nerve/bootstrap.py +++ b/nerve/bootstrap.py @@ -1962,6 +1962,8 @@ def _write_config_yaml(self) -> None: "max_concurrent": 4, "thinking": "max", "effort": "max", + "cron_thinking": "high", + "cron_effort": "high", "context_1m": True, }, "gateway": { diff --git a/nerve/config.py b/nerve/config.py index b9a9bcb..d870b0b 100644 --- a/nerve/config.py +++ b/nerve/config.py @@ -138,6 +138,14 @@ class AgentConfig: max_concurrent: int = 4 thinking: str = "max" # max, high, medium, low, disabled, adaptive, or number (budget_tokens) effort: str = "max" # max, xhigh, high, medium, low + # Cron / hook overrides — Claude OAuth (subscription) caps thinking budget + # for non-flagship models like Sonnet, rejecting effort/thinking="max" with + # `level "max" not supported, valid levels: low, medium, high`. Use a + # separate, lower setting for cron sessions (which run on `cron_model`) + # so cron jobs don't get blocked while keeping `effort=max` for the main + # interactive model. + cron_thinking: str = "high" + cron_effort: str = "high" context_1m: bool = True # Enable 1M context window beta # Hung-CLI detection: max idle time between SDK messages on a single # turn before the engine treats the subprocess as dead and falls into @@ -157,6 +165,8 @@ def from_dict(cls, d: dict) -> AgentConfig: max_concurrent=d.get("max_concurrent", 4), thinking=str(d.get("thinking", "max")), effort=str(d.get("effort", "max")), + cron_thinking=str(d.get("cron_thinking", "high")), + cron_effort=str(d.get("cron_effort", "high")), context_1m=d.get("context_1m", True), cli_idle_timeout_seconds=int(d.get("cli_idle_timeout_seconds", 900)), prompt_rewrite=PromptRewriteConfig.from_dict(d.get("prompt_rewrite") or {}), diff --git a/tests/test_engine_options.py b/tests/test_engine_options.py new file mode 100644 index 0000000..c448602 --- /dev/null +++ b/tests/test_engine_options.py @@ -0,0 +1,112 @@ +"""Tests for engine option helpers — thinking/effort selection per source. + +Regression for the issue where every cron run failed with +``API Error: 400 level "max" not supported, valid levels: low, medium, high`` +because the global ``effort=max`` / ``thinking=max`` settings were applied +to cron sessions that run on ``cron_model`` (Sonnet) under Claude OAuth, +which caps non-flagship models at ``high``. + +The fix introduces dedicated ``agent.cron_thinking`` / ``agent.cron_effort`` +fields and a ``_select_thinking_effort`` helper that picks the right pair +based on ``source`` (``cron`` / ``hook`` get the cron overrides, everything +else keeps the main settings). +""" + +from __future__ import annotations + +import pytest + +from nerve.agent.engine import _select_thinking_effort +from nerve.config import AgentConfig + + +@pytest.fixture +def agent_config() -> AgentConfig: + """Default AgentConfig — main = max, cron = high.""" + return AgentConfig() + + +class TestSelectThinkingEffort: + """``_select_thinking_effort`` routes settings by session source.""" + + def test_defaults_match_documented_values(self, agent_config: AgentConfig): + """Sanity check: the defaults are the ones the docs promise.""" + assert agent_config.thinking == "max" + assert agent_config.effort == "max" + assert agent_config.cron_thinking == "high" + assert agent_config.cron_effort == "high" + + @pytest.mark.parametrize("source", ["web", "telegram", "discord", "api", ""]) + def test_interactive_sources_use_main_settings( + self, agent_config: AgentConfig, source: str, + ): + """Anything that isn't cron/hook keeps the main thinking/effort.""" + thinking, effort = _select_thinking_effort(agent_config, source) + assert thinking == "max" + assert effort == "max" + + @pytest.mark.parametrize("source", ["cron", "hook"]) + def test_cron_and_hook_use_cron_overrides( + self, agent_config: AgentConfig, source: str, + ): + """Cron and hook sessions must use ``cron_thinking`` / ``cron_effort``. + + This is the regression — the original code always passed + ``effort=max`` regardless of source, blocking every cron run. + """ + thinking, effort = _select_thinking_effort(agent_config, source) + assert thinking == "high" + assert effort == "high" + + def test_overrides_propagate_from_config(self): + """Custom values in AgentConfig are honored, not overwritten.""" + config = AgentConfig( + thinking="medium", + effort="medium", + cron_thinking="low", + cron_effort="low", + ) + assert _select_thinking_effort(config, "web") == ("medium", "medium") + assert _select_thinking_effort(config, "cron") == ("low", "low") + assert _select_thinking_effort(config, "hook") == ("low", "low") + + def test_main_and_cron_can_differ_independently(self): + """Cron settings don't have to mirror main settings.""" + config = AgentConfig( + thinking="max", effort="max", + cron_thinking="medium", cron_effort="low", + ) + thinking_main, effort_main = _select_thinking_effort(config, "web") + thinking_cron, effort_cron = _select_thinking_effort(config, "cron") + assert (thinking_main, effort_main) == ("max", "max") + assert (thinking_cron, effort_cron) == ("medium", "low") + + +class TestAgentConfigFromDict: + """``AgentConfig.from_dict`` accepts the new cron_* fields.""" + + def test_omitted_cron_fields_default_to_high(self): + """Configs predating this fix still load — defaults kick in.""" + config = AgentConfig.from_dict({}) + assert config.cron_thinking == "high" + assert config.cron_effort == "high" + + def test_cron_fields_are_loaded_from_dict(self): + config = AgentConfig.from_dict({ + "cron_thinking": "medium", + "cron_effort": "low", + }) + assert config.cron_thinking == "medium" + assert config.cron_effort == "low" + + def test_main_settings_unaffected_by_cron_fields(self): + config = AgentConfig.from_dict({ + "thinking": "max", + "effort": "max", + "cron_thinking": "low", + "cron_effort": "low", + }) + assert config.thinking == "max" + assert config.effort == "max" + assert config.cron_thinking == "low" + assert config.cron_effort == "low"