Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ agent:
title_model: claude-haiku-4-5-20251001 # Session title generation
max_turns: 50 # Max agentic turns per request
max_concurrent: 4 # Max concurrent agent sessions
thinking: max # Thinking budget for the main model
effort: max # Reasoning effort for the main model
# Cron / hook sessions use the cron_model (Sonnet). Under Claude OAuth
# (subscription) Sonnet caps thinking budget and rejects "max" with
# level "max" not supported, valid levels: low, medium, high
# Keep these at "high" unless your provider accepts "max" for the
# cron model.
cron_thinking: high
cron_effort: high
# First-prompt rewrite — the web UI can refine the opening message of a
# new chat with a fast model, preview it, and send only after approval.
prompt_rewrite:
Expand Down
4 changes: 4 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ from any working directory:
| `agent.cron_model` | string | `claude-sonnet-4-6` | Model for cron jobs (cheaper) |
| `agent.max_turns` | int | `50` | Max agentic turns per request |
| `agent.max_concurrent` | int | `4` | Max concurrent agent sessions |
| `agent.thinking` | string | `max` | Thinking budget for the main model: `max` / `high` / `medium` / `low` / `disabled` / `adaptive` / explicit token count |
| `agent.effort` | string | `max` | Reasoning effort for the main model: `max` / `high` / `medium` / `low` |
| `agent.cron_thinking` | string | `high` | Thinking budget for cron and hook sessions (which run on `cron_model`). Claude OAuth caps non-flagship models at `high` and rejects `max` with `level "max" not supported` — keep this at `high` unless you're using a provider that accepts `max` for the cron model. |
| `agent.cron_effort` | string | `high` | Reasoning effort for cron and hook sessions. Same caveat as `cron_thinking`. |
| `agent.prompt_rewrite.enabled` | bool | `true` | Offer the first-prompt rewrite feature in the web UI (per-user toggle lives in the composer) |
| `agent.prompt_rewrite.model` | string | `""` | Model for prompt rewriting (empty = `agent.model`, the chat model) |
| `agent.prompt_rewrite.max_tokens` | int | `1024` | Max tokens for the rewritten prompt |
Expand Down
30 changes: 28 additions & 2 deletions nerve/agent/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,29 @@

_SURROGATE_RE = re.compile(r"[\ud800-\udfff]")

def _select_thinking_effort(agent_config: Any, source: str) -> tuple[str, str]:
"""Pick (thinking, effort) settings for a session based on its source.

Cron and hook sessions run on `cron_model` (typically Sonnet), which
under Claude OAuth (subscription) does not accept `level=max` for
thinking/effort and rejects requests with
`level "max" not supported, valid levels: low, medium, high`. Use
the dedicated `cron_*` overrides for those sources so cron jobs
don't get blocked while interactive sessions keep their full
thinking budget.
"""
is_cron_like = source in ("cron", "hook")
thinking_value = (
agent_config.cron_thinking if is_cron_like
else agent_config.thinking
)
effort_value = (
agent_config.cron_effort if is_cron_like
else agent_config.effort
)
return thinking_value, effort_value


# Anthropic API image limits
_MAX_IMAGE_BYTES = 5 * 1024 * 1024 # 5 MB
_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
Expand Down Expand Up @@ -965,12 +988,15 @@ def _build_options(
else:
system_prompt = system_prompt_str

thinking_value, effort_value = _select_thinking_effort(
self.config.agent, source,
)
thinking_config = self._parse_thinking_config(
self.config.agent.thinking,
thinking_value,
model or self.config.agent.model,
)
effort = self._effective_effort(
self.config.agent.effort,
effort_value,
model or self.config.agent.model,
)
betas = (
Expand Down
2 changes: 2 additions & 0 deletions nerve/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1962,6 +1962,8 @@ def _write_config_yaml(self) -> None:
"max_concurrent": 4,
"thinking": "max",
"effort": "max",
"cron_thinking": "high",
"cron_effort": "high",
"context_1m": True,
},
"gateway": {
Expand Down
10 changes: 10 additions & 0 deletions nerve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ class AgentConfig:
max_concurrent: int = 4
thinking: str = "max" # max, high, medium, low, disabled, adaptive, or number (budget_tokens)
effort: str = "max" # max, xhigh, high, medium, low
# Cron / hook overrides — Claude OAuth (subscription) caps thinking budget
# for non-flagship models like Sonnet, rejecting effort/thinking="max" with
# `level "max" not supported, valid levels: low, medium, high`. Use a
# separate, lower setting for cron sessions (which run on `cron_model`)
# so cron jobs don't get blocked while keeping `effort=max` for the main
# interactive model.
cron_thinking: str = "high"
cron_effort: str = "high"
context_1m: bool = True # Enable 1M context window beta
# Hung-CLI detection: max idle time between SDK messages on a single
# turn before the engine treats the subprocess as dead and falls into
Expand All @@ -157,6 +165,8 @@ def from_dict(cls, d: dict) -> AgentConfig:
max_concurrent=d.get("max_concurrent", 4),
thinking=str(d.get("thinking", "max")),
effort=str(d.get("effort", "max")),
cron_thinking=str(d.get("cron_thinking", "high")),
cron_effort=str(d.get("cron_effort", "high")),
context_1m=d.get("context_1m", True),
cli_idle_timeout_seconds=int(d.get("cli_idle_timeout_seconds", 900)),
prompt_rewrite=PromptRewriteConfig.from_dict(d.get("prompt_rewrite") or {}),
Expand Down
112 changes: 112 additions & 0 deletions tests/test_engine_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Tests for engine option helpers — thinking/effort selection per source.

Regression for the issue where every cron run failed with
``API Error: 400 level "max" not supported, valid levels: low, medium, high``
because the global ``effort=max`` / ``thinking=max`` settings were applied
to cron sessions that run on ``cron_model`` (Sonnet) under Claude OAuth,
which caps non-flagship models at ``high``.

The fix introduces dedicated ``agent.cron_thinking`` / ``agent.cron_effort``
fields and a ``_select_thinking_effort`` helper that picks the right pair
based on ``source`` (``cron`` / ``hook`` get the cron overrides, everything
else keeps the main settings).
"""

from __future__ import annotations

import pytest

from nerve.agent.engine import _select_thinking_effort
from nerve.config import AgentConfig


@pytest.fixture
def agent_config() -> AgentConfig:
"""Default AgentConfig — main = max, cron = high."""
return AgentConfig()


class TestSelectThinkingEffort:
"""``_select_thinking_effort`` routes settings by session source."""

def test_defaults_match_documented_values(self, agent_config: AgentConfig):
"""Sanity check: the defaults are the ones the docs promise."""
assert agent_config.thinking == "max"
assert agent_config.effort == "max"
assert agent_config.cron_thinking == "high"
assert agent_config.cron_effort == "high"

@pytest.mark.parametrize("source", ["web", "telegram", "discord", "api", ""])
def test_interactive_sources_use_main_settings(
self, agent_config: AgentConfig, source: str,
):
"""Anything that isn't cron/hook keeps the main thinking/effort."""
thinking, effort = _select_thinking_effort(agent_config, source)
assert thinking == "max"
assert effort == "max"

@pytest.mark.parametrize("source", ["cron", "hook"])
def test_cron_and_hook_use_cron_overrides(
self, agent_config: AgentConfig, source: str,
):
"""Cron and hook sessions must use ``cron_thinking`` / ``cron_effort``.

This is the regression — the original code always passed
``effort=max`` regardless of source, blocking every cron run.
"""
thinking, effort = _select_thinking_effort(agent_config, source)
assert thinking == "high"
assert effort == "high"

def test_overrides_propagate_from_config(self):
"""Custom values in AgentConfig are honored, not overwritten."""
config = AgentConfig(
thinking="medium",
effort="medium",
cron_thinking="low",
cron_effort="low",
)
assert _select_thinking_effort(config, "web") == ("medium", "medium")
assert _select_thinking_effort(config, "cron") == ("low", "low")
assert _select_thinking_effort(config, "hook") == ("low", "low")

def test_main_and_cron_can_differ_independently(self):
"""Cron settings don't have to mirror main settings."""
config = AgentConfig(
thinking="max", effort="max",
cron_thinking="medium", cron_effort="low",
)
thinking_main, effort_main = _select_thinking_effort(config, "web")
thinking_cron, effort_cron = _select_thinking_effort(config, "cron")
assert (thinking_main, effort_main) == ("max", "max")
assert (thinking_cron, effort_cron) == ("medium", "low")


class TestAgentConfigFromDict:
"""``AgentConfig.from_dict`` accepts the new cron_* fields."""

def test_omitted_cron_fields_default_to_high(self):
"""Configs predating this fix still load — defaults kick in."""
config = AgentConfig.from_dict({})
assert config.cron_thinking == "high"
assert config.cron_effort == "high"

def test_cron_fields_are_loaded_from_dict(self):
config = AgentConfig.from_dict({
"cron_thinking": "medium",
"cron_effort": "low",
})
assert config.cron_thinking == "medium"
assert config.cron_effort == "low"

def test_main_settings_unaffected_by_cron_fields(self):
config = AgentConfig.from_dict({
"thinking": "max",
"effort": "max",
"cron_thinking": "low",
"cron_effort": "low",
})
assert config.thinking == "max"
assert config.effort == "max"
assert config.cron_thinking == "low"
assert config.cron_effort == "low"