ClickHouse · constkolesnyak · Apr 25, 2026
diff --git a/config.example.yaml b/config.example.yaml
@@ -20,6 +20,15 @@ agent:
   title_model: claude-haiku-4-5-20251001  # Session title generation
   max_turns: 50                  # Max agentic turns per request
   max_concurrent: 4              # Max concurrent agent sessions
+  thinking: max                  # Thinking budget for the main model
+  effort: max                    # Reasoning effort for the main model
+  # Cron / hook sessions use the cron_model (Sonnet). Under Claude OAuth
+  # (subscription) Sonnet caps thinking budget and rejects "max" with
+  #   level "max" not supported, valid levels: low, medium, high
+  # Keep these at "high" unless your provider accepts "max" for the
+  # cron model.
+  cron_thinking: high
+  cron_effort: high
   # First-prompt rewrite — the web UI can refine the opening message of a
   # new chat with a fast model, preview it, and send only after approval.
   prompt_rewrite:

diff --git a/docs/config.md b/docs/config.md
@@ -40,6 +40,10 @@ from any working directory:
 | `agent.cron_model` | string | `claude-sonnet-4-6` | Model for cron jobs (cheaper) |
 | `agent.max_turns` | int | `50` | Max agentic turns per request |
 | `agent.max_concurrent` | int | `4` | Max concurrent agent sessions |
+| `agent.thinking` | string | `max` | Thinking budget for the main model: `max` / `high` / `medium` / `low` / `disabled` / `adaptive` / explicit token count |
+| `agent.effort` | string | `max` | Reasoning effort for the main model: `max` / `high` / `medium` / `low` |
+| `agent.cron_thinking` | string | `high` | Thinking budget for cron and hook sessions (which run on `cron_model`). Claude OAuth caps non-flagship models at `high` and rejects `max` with `level "max" not supported` — keep this at `high` unless you're using a provider that accepts `max` for the cron model. |
+| `agent.cron_effort` | string | `high` | Reasoning effort for cron and hook sessions. Same caveat as `cron_thinking`. |
 | `agent.prompt_rewrite.enabled` | bool | `true` | Offer the first-prompt rewrite feature in the web UI (per-user toggle lives in the composer) |
 | `agent.prompt_rewrite.model` | string | `""` | Model for prompt rewriting (empty = `agent.model`, the chat model) |
 | `agent.prompt_rewrite.max_tokens` | int | `1024` | Max tokens for the rewritten prompt |

diff --git a/nerve/agent/engine.py b/nerve/agent/engine.py
@@ -70,6 +70,29 @@
 
 _SURROGATE_RE = re.compile(r"[\ud800-\udfff]")
 
+def _select_thinking_effort(agent_config: Any, source: str) -> tuple[str, str]:
+    """Pick (thinking, effort) settings for a session based on its source.
+
+    Cron and hook sessions run on `cron_model` (typically Sonnet), which
+    under Claude OAuth (subscription) does not accept `level=max` for
+    thinking/effort and rejects requests with
+    `level "max" not supported, valid levels: low, medium, high`. Use
+    the dedicated `cron_*` overrides for those sources so cron jobs
+    don't get blocked while interactive sessions keep their full
+    thinking budget.
+    """
+    is_cron_like = source in ("cron", "hook")
+    thinking_value = (
+        agent_config.cron_thinking if is_cron_like
+        else agent_config.thinking
+    )
+    effort_value = (
+        agent_config.cron_effort if is_cron_like
+        else agent_config.effort
+    )
+    return thinking_value, effort_value
+
+
 # Anthropic API image limits
 _MAX_IMAGE_BYTES = 5 * 1024 * 1024  # 5 MB
 _IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
@@ -965,12 +988,15 @@ def _build_options(
         else:
             system_prompt = system_prompt_str
 
+        thinking_value, effort_value = _select_thinking_effort(
+            self.config.agent, source,
+        )
         thinking_config = self._parse_thinking_config(
-            self.config.agent.thinking,
+            thinking_value,
             model or self.config.agent.model,
         )
         effort = self._effective_effort(
-            self.config.agent.effort,
+            effort_value,
             model or self.config.agent.model,
         )
         betas = (

diff --git a/nerve/bootstrap.py b/nerve/bootstrap.py
@@ -1962,6 +1962,8 @@ def _write_config_yaml(self) -> None:
                 "max_concurrent": 4,
                 "thinking": "max",
                 "effort": "max",
+                "cron_thinking": "high",
+                "cron_effort": "high",
                 "context_1m": True,
             },
             "gateway": {

diff --git a/nerve/config.py b/nerve/config.py
@@ -138,6 +138,14 @@ class AgentConfig:
     max_concurrent: int = 4
     thinking: str = "max"       # max, high, medium, low, disabled, adaptive, or number (budget_tokens)
     effort: str = "max"         # max, xhigh, high, medium, low
+    # Cron / hook overrides — Claude OAuth (subscription) caps thinking budget
+    # for non-flagship models like Sonnet, rejecting effort/thinking="max" with
+    # `level "max" not supported, valid levels: low, medium, high`. Use a
+    # separate, lower setting for cron sessions (which run on `cron_model`)
+    # so cron jobs don't get blocked while keeping `effort=max` for the main
+    # interactive model.
+    cron_thinking: str = "high"
+    cron_effort: str = "high"
     context_1m: bool = True     # Enable 1M context window beta
     # Hung-CLI detection: max idle time between SDK messages on a single
     # turn before the engine treats the subprocess as dead and falls into
@@ -157,6 +165,8 @@ def from_dict(cls, d: dict) -> AgentConfig:
             max_concurrent=d.get("max_concurrent", 4),
             thinking=str(d.get("thinking", "max")),
             effort=str(d.get("effort", "max")),
+            cron_thinking=str(d.get("cron_thinking", "high")),
+            cron_effort=str(d.get("cron_effort", "high")),
             context_1m=d.get("context_1m", True),
             cli_idle_timeout_seconds=int(d.get("cli_idle_timeout_seconds", 900)),
             prompt_rewrite=PromptRewriteConfig.from_dict(d.get("prompt_rewrite") or {}),

diff --git a/tests/test_engine_options.py b/tests/test_engine_options.py
@@ -0,0 +1,112 @@
+"""Tests for engine option helpers — thinking/effort selection per source.
+
+Regression for the issue where every cron run failed with
+``API Error: 400 level "max" not supported, valid levels: low, medium, high``
+because the global ``effort=max`` / ``thinking=max`` settings were applied
+to cron sessions that run on ``cron_model`` (Sonnet) under Claude OAuth,
+which caps non-flagship models at ``high``.
+
+The fix introduces dedicated ``agent.cron_thinking`` / ``agent.cron_effort``
+fields and a ``_select_thinking_effort`` helper that picks the right pair
+based on ``source`` (``cron`` / ``hook`` get the cron overrides, everything
+else keeps the main settings).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from nerve.agent.engine import _select_thinking_effort
+from nerve.config import AgentConfig
+
+
+@pytest.fixture
+def agent_config() -> AgentConfig:
+    """Default AgentConfig — main = max, cron = high."""
+    return AgentConfig()
+
+
+class TestSelectThinkingEffort:
+    """``_select_thinking_effort`` routes settings by session source."""
+
+    def test_defaults_match_documented_values(self, agent_config: AgentConfig):
+        """Sanity check: the defaults are the ones the docs promise."""
+        assert agent_config.thinking == "max"
+        assert agent_config.effort == "max"
+        assert agent_config.cron_thinking == "high"
+        assert agent_config.cron_effort == "high"
+
+    @pytest.mark.parametrize("source", ["web", "telegram", "discord", "api", ""])
+    def test_interactive_sources_use_main_settings(
+        self, agent_config: AgentConfig, source: str,
+    ):
+        """Anything that isn't cron/hook keeps the main thinking/effort."""
+        thinking, effort = _select_thinking_effort(agent_config, source)
+        assert thinking == "max"
+        assert effort == "max"
+
+    @pytest.mark.parametrize("source", ["cron", "hook"])
+    def test_cron_and_hook_use_cron_overrides(
+        self, agent_config: AgentConfig, source: str,
+    ):
+        """Cron and hook sessions must use ``cron_thinking`` / ``cron_effort``.
+
+        This is the regression — the original code always passed
+        ``effort=max`` regardless of source, blocking every cron run.
+        """
+        thinking, effort = _select_thinking_effort(agent_config, source)
+        assert thinking == "high"
+        assert effort == "high"
+
+    def test_overrides_propagate_from_config(self):
+        """Custom values in AgentConfig are honored, not overwritten."""
+        config = AgentConfig(
+            thinking="medium",
+            effort="medium",
+            cron_thinking="low",
+            cron_effort="low",
+        )
+        assert _select_thinking_effort(config, "web") == ("medium", "medium")
+        assert _select_thinking_effort(config, "cron") == ("low", "low")
+        assert _select_thinking_effort(config, "hook") == ("low", "low")
+
+    def test_main_and_cron_can_differ_independently(self):
+        """Cron settings don't have to mirror main settings."""
+        config = AgentConfig(
+            thinking="max", effort="max",
+            cron_thinking="medium", cron_effort="low",
+        )
+        thinking_main, effort_main = _select_thinking_effort(config, "web")
+        thinking_cron, effort_cron = _select_thinking_effort(config, "cron")
+        assert (thinking_main, effort_main) == ("max", "max")
+        assert (thinking_cron, effort_cron) == ("medium", "low")
+
+
+class TestAgentConfigFromDict:
+    """``AgentConfig.from_dict`` accepts the new cron_* fields."""
+
+    def test_omitted_cron_fields_default_to_high(self):
+        """Configs predating this fix still load — defaults kick in."""
+        config = AgentConfig.from_dict({})
+        assert config.cron_thinking == "high"
+        assert config.cron_effort == "high"
+
+    def test_cron_fields_are_loaded_from_dict(self):
+        config = AgentConfig.from_dict({
+            "cron_thinking": "medium",
+            "cron_effort": "low",
+        })
+        assert config.cron_thinking == "medium"
+        assert config.cron_effort == "low"
+
+    def test_main_settings_unaffected_by_cron_fields(self):
+        config = AgentConfig.from_dict({
+            "thinking": "max",
+            "effort": "max",
+            "cron_thinking": "low",
+            "cron_effort": "low",
+        })
+        assert config.thinking == "max"
+        assert config.effort == "max"
+        assert config.cron_thinking == "low"
+        assert config.cron_effort == "low"