From dee1a25bc6381db59e199e08c40ad5ffbf3580db Mon Sep 17 00:00:00 2001 From: Payne Date: Wed, 3 Jun 2026 20:22:47 +0300 Subject: [PATCH 1/2] Add routing directives ("pins") to force a model from the prompt Adds an opt-in routing layer that lets a request's prompt force which backend serves it, overriding the orchestrator/worker selection AND the Auto Router. The motivating use case: an automated multi-agent workflow where each spawned sub-agent must land on a specific model by role (e.g. plan->opus, code->composer, review->codex, fix->claude). The workflow script bakes a role tag into each agent() prompt; the proxy hard-pins that request deterministically. Marker tiers (most explicit first; a tier wins only if it resolves to exactly one configured backend, so naming two models is ambiguous -> ignored): 1. [[route:NAME]] sentinel (stripped before forwarding) 2. @NAME / use:NAME / route:NAME / model:NAME tag (stripped) 3. natural language ("have codex review it") fallback (UC_DIRECTIVES_NL) NAME resolves via an alias table auto-derived from configured model ids + display names (composer/codex/opus/minimax/mimo... work with no setup), plus optional directives.aliases overrides. An optional planner routes plan-mode turns (detected structurally via ExitPlanMode) to a chosen model. Also adds a first-class "claude-opus" route to config.example.json: a real-Claude Anthropic-passthrough pick with a clean id. The id is deliberately NOT "claude-opus-4-8" (which the workflow engine hardcodes for background traffic that the orchestrator/worker layer remaps), so "claude-opus" is recognized as a deliberate pick and a directive target ([[route:opus]]) without colliding with stock traffic. This makes the role-pipeline example runnable on a fresh config and gives real Opus as a distinct orchestrator alongside a different worker model - which include_stock_models alone can't provide. Backward compatibility: - OPT-IN, OFF by default. With no directives block (or enabled:false) and no UC_DIRECTIVES env, behavior is byte-for-byte unchanged -- a no-op for existing setups until explicitly enabled. - Safe-degrading: _directive_pin is wrapped in try/except and returns None on any error or when no marker is present; unknown/ambiguous/auto markers are ignored. A request is never broken. - Core pipeline untouched (envelope, Auto Router, orchestrator/worker, openai_compat/codex/passthrough, image forwarding, /uc/select). The only pre-existing function changed is _last_user_text, refactored to delegate to _latest_user_turn with identical behavior (covered by existing tests). Knobs: UC_DIRECTIVES=1/0 (force on/off), UC_DIRECTIVES_NL=0 (tags only), UC_DIRECTIVES_LOG=1 (log decisions). Includes unit + dispatch tests (incl. the opt-in default-off guarantee), a docs/DIRECTIVES.md guide, a runnable plan->code->review->fix workflow at examples/role_pipeline_workflow.js, and a documented (disabled) config.example.json block. Full suite + doctor pass. Co-Authored-By: Claude Opus 4.8 --- README.md | 1 + config.example.json | 17 ++ docs/DIRECTIVES.md | 143 +++++++++++++++ examples/role_pipeline_workflow.js | 121 +++++++++++++ proxy.py | 269 ++++++++++++++++++++++++++--- test_proxy.py | 63 +++++++ 6 files changed, 594 insertions(+), 20 deletions(-) create mode 100644 docs/DIRECTIVES.md create mode 100644 examples/role_pipeline_workflow.js diff --git a/README.md b/README.md index 25dc708..8ffc1c9 100644 --- a/README.md +++ b/README.md @@ -321,6 +321,7 @@ test → troubleshoot) written for an AI to follow. | [docs/SETUP.md](docs/SETUP.md) | Human setup guide (Windows + macOS/Linux) | | [docs/HOW_IT_WORKS.md](docs/HOW_IT_WORKS.md) | The mechanism + reverse-engineering evidence | | [docs/AUTO_ROUTER.md](docs/AUTO_ROUTER.md) | The Auto Router — pick the right model per task automatically | +| [docs/DIRECTIVES.md](docs/DIRECTIVES.md) | Routing directives — pin a request to a model from the prompt (per-role multi-agent workflows) | | [docs/ADD_A_MODEL.md](docs/ADD_A_MODEL.md) | Add any backend to the `/model` menu | | [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) | Symptom → cause → fix | diff --git a/config.example.json b/config.example.json index 6cbf3ec..b49fbc8 100644 --- a/config.example.json +++ b/config.example.json @@ -18,6 +18,7 @@ "_models": "What shows in Claude Code's /model picker. id MUST start with 'claude' or 'anthropic' (others are silently dropped). display_name is the label you see. 'claude-auto' is the Auto Router (see the 'router' section below): pick it and the proxy chooses the cheapest configured backend that can handle each task.", "models": [ { "id": "claude-auto", "display_name": "Auto (smart routing)" }, + { "id": "claude-opus", "display_name": "Claude Opus 4.8 (real)" }, { "id": "claude-gpt-5.5-codex", "display_name": "GPT-5.5 (Codex OAuth)" }, { "id": "claude-minimax-m3", "display_name": "MiniMax-M3" }, { "id": "claude-mimo", "display_name": "MiMo v2.5 Pro" }, @@ -37,6 +38,10 @@ "_": "The Auto Router. type:auto means this is NOT a real backend - the proxy asks the cheap 'classifier' model (configured in the 'router' block below) to score the candidates and routes each task to the cheapest one that clears the quality bar. Pick 'Auto (smart routing)' in /model or the selector.", "type": "auto" }, + "claude-opus": { + "_": "Real Anthropic Claude Opus 4.8, as a first-class pick. Anthropic passthrough: no 'type' (forwards unchanged), no 'upstream' (defaults to api.anthropic.com), no 'auth' (reuses your existing Claude OAuth login - no API key needed). The id is 'claude-opus' ON PURPOSE: it must NOT be 'claude-opus-4-8', because the dynamic-workflow engine hardcodes that exact id for its background traffic and the orchestrator/worker layer remaps it onto your pick - so a route named 'claude-opus-4-8' would be ambiguous with stock traffic. A distinct id ('claude-opus') is recognized as a deliberate orchestrator/worker pick and as a routing-directive target ([[route:opus]]). The 'model' below is what's actually sent upstream. (This is also what include_stock_models can't give you on its own: real Opus as a DISTINCT orchestrator while a different/cheaper model runs the workers.)", + "model": "claude-opus-4-8" + }, "claude-gpt-5.5-codex": { "_": "Needs `codex login` once; no API key. effort/tier via UC_CODEX_EFFORT/UC_CODEX_SERVICE_TIER.", "type": "codex_oauth", @@ -152,5 +157,17 @@ "card": "Highest cost here; frontier reasoning and agentic coding. Best for the hardest work: large multi-file refactors, subtle debugging, architecture and design, long autonomous/dynamic workflows, and anything requiring images. Reserve for tasks the cheaper models would likely fail." } ] + }, + + "_directives": "Routing directives ('pins') - optional, OPT-IN (OFF until you set enabled:true below, or UC_DIRECTIVES=1). A request's PROMPT can FORCE a specific backend, overriding the orchestrator/worker pick AND the Auto Router. This is how an automated multi-agent workflow lands each spawned sub-agent on the right model BY ROLE: tag each agent()'s prompt with [[route:NAME]] (or @NAME / use:NAME). Names auto-derive from your model ids + display names (composer/codex/minimax/mimo... already work). No tag, two names, or an unknown name -> normal routing decides. Full guide: docs/DIRECTIVES.md; runnable plan->code->review->fix pipeline: examples/role_pipeline_workflow.js.", + "directives": { + "_enabled": "OFF by default so this never changes existing behavior. Set true to turn directives on (or UC_DIRECTIVES=1).", + "enabled": false, + "_aliases": "Optional name -> route id overrides ON TOP of the auto-derived table. Right side MUST be a route id from 'routes'. Add entries only to introduce a new name or disambiguate one (e.g. bare 'deepseek' maps to two routes and is dropped unless pinned here).", + "aliases": {}, + "_planner": "Optional. When set to a route id, interactive plan-mode turns with NO explicit pin auto-route there (e.g. let your strongest model write every plan). null = disabled.", + "planner": null, + "_strip": "Remove the [[route:...]] / @name / use:name marker from the prompt before forwarding (recommended).", + "strip": true } } diff --git a/docs/DIRECTIVES.md b/docs/DIRECTIVES.md new file mode 100644 index 0000000..a08dae7 --- /dev/null +++ b/docs/DIRECTIVES.md @@ -0,0 +1,143 @@ +# Routing directives — pin any request to a specific model + +Routing directives ("pins") let a request's **prompt** force which backend serves +it, overriding the orchestrator/worker selection **and** the [Auto Router](AUTO_ROUTER.md). +They exist for one job in particular: making an **automated multi-agent workflow** +land each spawned sub-agent on the right model **by role** — e.g. + +> **opus** writes the plan → **composer** writes the code → **codex** adversarially +> reviews it → **claude** fixes what the review got right. + +You don't drive that turn-by-turn. The workflow script bakes a role tag into each +`agent()` prompt; the proxy reads the tag, hard-pins that request, strips the tag, +and forwards the rest. No tag → nothing changes, the normal routing flow decides. + +It's **opt-in — OFF by default** (enable with `"directives": {"enabled": true}` +in `config.json`, or `UC_DIRECTIVES=1`), so pulling this feature never changes an +existing setup until you ask for it. Once on, it's fully local and degrades +safely: an unknown name, two names in one message, or a name that maps to the +synthetic `auto` route are all ignored, so a request is never broken. + +--- + +## How a request gets pinned (30 seconds) + +On every request the proxy looks at the **latest real user turn** (tool-result-only +turns are skipped, so a sub-agent's tag stays sticky across its tool calls) and +scans for a marker, most-explicit tier first. A tier wins only if it resolves to +**exactly one** configured backend: + +| Tier | Form | Example | Stripped before forwarding? | +|------|------|---------|------------------------------| +| 1. Sentinel | `[[route:NAME]]` | `[[route:codex]] review this diff` | yes | +| 2. Tag | `@NAME` · `use:NAME` · `route:NAME` · `model:NAME` | `@composer implement the parser` | yes | +| 3. Natural language | `use/have/ask/let/with/via NAME` | `please have codex review it` | no (it's prose) | + +`NAME` is resolved through an **alias table** (below). If it resolves to one +backend, that request is pinned there — skipping both the worker/orchestrator pick +and the Auto Router. If it resolves to **two or more** distinct backends (e.g. you +wrote `@opus then @composer`), that's ambiguous → ignored. If it resolves to +nothing, → ignored. + +> Earlier picks: this implements **"explicit tag wins"** + **hard pin**. The tag +> is authoritative; natural language is only a fallback when no tag is present. + +Watch decisions with `UC_DIRECTIVES_LOG=1`: + +``` +directive pin: tier=fast claude-deepseek-v4-flash -> claude-composer +[directive] ambiguous (claude-composer, claude-gpt-5.5-codex named); ignored +``` + +--- + +## Names (the alias table) + +Names **auto-derive** from your configured model ids and display names, so the +obvious ones already work with no setup: + +| You type | Resolves to (in the shipped config) | +|----------|--------------------------------------| +| `opus`, `claude` | `claude-opus` | +| `composer` | `claude-composer` | +| `codex`, `gpt` | `claude-gpt-5.5-codex` | +| `minimax` | `claude-minimax-m3` | +| `mimo` | `claude-mimo` | +| `deepseek-v4-pro`, `deepseek-v4-flash` | the matching route | + +Matching is case- and punctuation-insensitive (`GPT-5.5`, `gpt5.5`, `gpt_5_5` all +collapse to the same key). A name that would map to **two** routes (e.g. bare +`deepseek`) is dropped as ambiguous — use the specific id, or pin it explicitly in +config. + +### Configure it (optional) + +The `directives` block in `config.json` (already present in the shipped config): + +```jsonc +"directives": { + "enabled": true, // OFF by default — set true to turn the feature on + // Friendly name -> route id. The common names (opus, claude, composer, codex, + // minimax, mimo, ...) AUTO-DERIVE from your models, so list entries here only to + // ADD a custom name or disambiguate one. RHS must be a route id. + "aliases": { + "fixer": "claude-opus", // a custom role name -> a route + "deepseek": "claude-deepseek-v4-pro" // disambiguate a name that maps to two routes + }, + // Optional: interactive plan-mode turns with NO explicit pin auto-route here. + "planner": "claude-opus", + // Strip the marker from the prompt before forwarding (recommended). + "strip": true +} +``` + +| Field | Meaning | +|-------|---------| +| `enabled` | Turn directives on/off. **Defaults to off.** An explicit `UC_DIRECTIVES` env var (1/0) overrides this either way. | +| `aliases` | `name → route id` overrides on top of the auto-derived table. The right side must be a route id in `routes`. | +| `planner` | If set, **plan-mode** turns (the interactive planning loop, detected structurally via the `ExitPlanMode` tool) with no explicit pin route here. Set `null` to disable. | +| `strip` | Remove the matched marker from the prompt before forwarding so the backend never sees it. | + +### Knobs + +| Env var | Default | Effect | +|---------|---------|--------| +| `UC_DIRECTIVES` | unset | `1` force-enables, `0` force-disables — overrides `directives.enabled`. Unset → follow config (default off). | +| `UC_DIRECTIVES_NL` | `1` | `0` disables the natural-language tier; only sentinel/tag pins count. | +| `UC_DIRECTIVES_LOG` | `0` | `1` logs every pin / ambiguity / ignore decision. | + +--- + +## The point: automated multi-agent workflows + +Because a workflow script is something you (or the orchestrator) author, you don't +need any fuzzy "which phase is this?" inference — you state the role **in the +prompt**, deterministically: + +```js +const plan = await agent(`[[route:opus]] Write a plan for: ${task}`) +const code = await agent(`[[route:composer]] Implement this plan:\n${plan}`) +const review = await agent(`[[route:codex]] Adversarially review:\n${code}`, { schema: REVIEW }) +const fixed = await agent(`[[route:claude]] Fix the valid issues:\n${JSON.stringify(review)}`) +``` + +Each `agent()` is a separate request; the proxy pins each one independently, so +every spawned sub-agent lands exactly where you declared — regardless of which +single worker model is selected in `/model`. + +A complete, runnable version (with a structured review verdict and a conditional +fix step) ships at [`examples/role_pipeline_workflow.js`](../examples/role_pipeline_workflow.js). +Save it as `.claude/workflows/role-pipeline.js` to invoke by name, and pass the +task via `args`. + +--- + +## Failure behavior (never breaks a request) + +| Situation | What happens | +|-----------|--------------| +| No marker in the prompt | Normal routing (tier/worker selection, then Auto Router). | +| Name resolves to nothing | Ignored; normal routing. | +| Two+ distinct names in one turn | Ambiguous → ignored; normal routing. | +| Name maps to the `auto` route | Not a real backend → ignored. | +| Pinned backend errors at dispatch | Same handling as any other route (the pin only chooses *where*, not *how*). | diff --git a/examples/role_pipeline_workflow.js b/examples/role_pipeline_workflow.js new file mode 100644 index 0000000..7483e4c --- /dev/null +++ b/examples/role_pipeline_workflow.js @@ -0,0 +1,121 @@ +// Role pipeline: plan (opus) -> code (composer) -> adversarial review (codex) -> fix (claude) +// --------------------------------------------------------------------------------------------- +// A ready-to-run multi-agent Workflow that lands each spawned sub-agent on a +// SPECIFIC model by role, with no turn-by-turn driving. It does this purely by +// baking a routing directive ("pin") into each agent()'s prompt: +// +// [[route:opus]] -> claude-opus (the planner) +// [[route:composer]] -> claude-composer (the implementer) +// [[route:codex]] -> claude-gpt-5.5-codex (the adversarial reviewer) +// [[route:claude]] -> claude-opus (the fixer) +// +// UltraCode-Shim's proxy sees that tag on each sub-agent request, HARD-PINS that +// request to the named backend (overriding the worker/orchestrator pick AND the +// Auto Router), strips the tag, and forwards the rest. The names resolve through +// the alias table in config.json ("directives" block) -- auto-derived from your +// model ids + display names, so composer/codex/opus already work out of the box. +// See docs/DIRECTIVES.md. +// +// Run it (from a project where you've launched Claude Code through the shim): +// - Save this as .claude/workflows/role-pipeline.js and invoke it by name, OR +// - paste it into the Workflow tool's `script` field. +// Pass the task via args, e.g. args: "Add a --json flag to the export command". + +export const meta = { + name: 'role-pipeline', + description: 'plan (opus) -> code (composer) -> adversarial review (codex) -> fix (claude)', + phases: [ + { title: 'Plan', detail: 'opus drafts the implementation plan' }, + { title: 'Code', detail: 'composer implements the plan' }, + { title: 'Review', detail: 'codex adversarially reviews the implementation' }, + { title: 'Fix', detail: 'claude fixes the issues the review deems valid' }, + ], +} + +// Accept the task as a plain string (args: "...") or {task: "..."}. +const task = (typeof args === 'string' && args.trim()) + ? args.trim() + : (args && typeof args.task === 'string' && args.task.trim()) + ? args.task.trim() + : null + +if (!task) { + log('No task provided. Pass it via Workflow args, e.g. args: "Add a --json flag".') + return { error: 'no task provided' } +} + +// 1) PLAN -- pinned to opus. +phase('Plan') +const plan = await agent( + `[[route:opus]] You are the PLANNER. Write a precise, step-by-step implementation ` + + `plan for the task below: the files to touch, the approach, data/flow changes, and ` + + `the edge cases that matter. Do NOT write the final code yet.\n\nTASK:\n${task}`, + { label: 'plan:opus', phase: 'Plan' }, +) + +// 2) CODE -- pinned to composer. +phase('Code') +const code = await agent( + `[[route:composer]] You are the IMPLEMENTER. Implement the plan below in full and ` + + `produce the actual code (diffs or complete files). Follow the plan; where it is ` + + `underspecified, make the smallest reasonable choice and note it inline.\n\n` + + `PLAN:\n${plan}\n\nORIGINAL TASK:\n${task}`, + { label: 'code:composer', phase: 'Code' }, +) + +// 3) REVIEW -- pinned to codex, structured so we can branch on the verdict. +phase('Review') +const REVIEW_SCHEMA = { + type: 'object', + additionalProperties: false, + properties: { + verdict: { type: 'string', enum: ['ship', 'fix', 'reject'] }, + issues: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + properties: { + severity: { type: 'string', enum: ['high', 'medium', 'low'] }, + title: { type: 'string' }, + detail: { type: 'string' }, + }, + required: ['severity', 'title', 'detail'], + }, + }, + summary: { type: 'string' }, + }, + required: ['verdict', 'issues', 'summary'], +} +const review = await agent( + `[[route:codex]] You are an ADVERSARIAL, SKEPTICAL reviewer. Actively try to BREAK ` + + `the implementation below: correctness bugs, missed requirements, unhandled edge ` + + `cases, race conditions, and security issues. Be concrete and cite specifics. Then ` + + `return your structured verdict (ship = no real problems; fix = real issues to ` + + `address; reject = fundamentally wrong).\n\nTASK:\n${task}\n\nIMPLEMENTATION:\n${code}`, + { label: 'review:codex', phase: 'Review', schema: REVIEW_SCHEMA }, +) + +// 4) FIX -- pinned to claude, only if the review found issues worth fixing. +let fixed = null +const actionable = (review.issues || []).filter(i => i.severity === 'high' || i.severity === 'medium') +if (review.verdict !== 'ship' && actionable.length) { + phase('Fix') + fixed = await agent( + `[[route:claude]] You are the FIXER. The adversarial review below flagged issues. ` + + `Fix ONLY the ones that are genuinely correct; for any you judge a false positive, ` + + `leave the code as-is and briefly explain why. Return the corrected implementation.\n\n` + + `IMPLEMENTATION:\n${code}\n\nREVIEW:\n${JSON.stringify(review, null, 2)}`, + { label: 'fix:claude', phase: 'Fix' }, + ) +} else { + log(`review verdict=${review.verdict}; no high/medium issues -> skipping fix`) +} + +return { + task, + plan, + implementation: fixed || code, + review, + fixed: Boolean(fixed), +} diff --git a/proxy.py b/proxy.py index 72aa099..63ff59d 100644 --- a/proxy.py +++ b/proxy.py @@ -132,6 +132,20 @@ ROUTER_MAX_TOKENS = int(os.environ.get("UC_ROUTER_MAX_TOKENS", "600")) ROUTER_LOG = os.environ.get("UC_ROUTER_LOG", "0") == "1" +# Routing directives ("pins"): a prompt tag like [[route:codex]] / @codex forces a +# single request onto a specific backend, overriding orchestrator/worker selection +# AND the Auto Router. This is what lets an automated multi-agent workflow land each +# spawned sub-agent on the right model by role (plan->opus, code->composer, ...). +# OPT-IN: OFF unless turned on via "directives": {"enabled": true} in config.json +# (or UC_DIRECTIVES=1). Default => exact prior behavior, so this never disrupts an +# existing setup that hasn't asked for it. Final value is resolved in +# _configure_directives(); this is only the pre-config default. See docs/DIRECTIVES.md. +DIRECTIVES_ENABLED = os.environ.get("UC_DIRECTIVES") == "1" +DIRECTIVES_NL = os.environ.get("UC_DIRECTIVES_NL", "1") != "0" # natural-language fallback +DIRECTIVES_LOG = os.environ.get("UC_DIRECTIVES_LOG", "0") == "1" +DIRECTIVES = {"planner": None, "strip": True} # filled from config in main() +_ROUTE_ALIASES = {} # normalized token -> concrete route id + try: UC_MODEL_MAP = json.loads(os.environ.get("UC_MODEL_MAP", "") or "{}") if not isinstance(UC_MODEL_MAP, dict): @@ -613,6 +627,215 @@ def _wire_orchestrator_worker(): % (len(_ORCH_PICK_IDS), ", ".join(sorted(_WORKER_MAP)))) +# -------------------------------------------------------------------------- +# Routing directives ("pins") -- force a request onto a specific backend +# -------------------------------------------------------------------------- +# A workflow (or a human) can tag a request's prompt to pin it to ONE backend, +# overriding the orchestrator/worker selection AND the Auto Router. This is how an +# automated multi-agent workflow lands each spawned sub-agent on the right model by +# role -- e.g. plan->opus, code->composer, review->codex, fix->claude -- with no +# turn-by-turn driving: the workflow script bakes a role tag into each agent() +# prompt and the proxy hard-pins that request. +# +# Marker tiers (case-insensitive), most explicit first; a tier wins only if it +# resolves to EXACTLY ONE configured backend (naming two models is ambiguous -> +# ignored, normal routing decides): +# 1. [[route:codex]] sentinel (stripped before forwarding) +# 2. @codex use:codex route:codex model:codex tag (stripped) +# 3. "...have codex review...", "ask codex to ..." natural language (UC_DIRECTIVES_NL) +# +# The token after a marker is resolved through an alias table auto-derived from +# your model ids + display names (plus router.aliases / directives.aliases +# overrides). A pin to an unconfigured or "auto" route is ignored so a request is +# never broken. +_DIRECTIVE_SENTINEL = re.compile(r"\[\[\s*(?:route|model|use)\s*:\s*([A-Za-z0-9._\-]+)\s*\]\]", re.I) +_DIRECTIVE_TAG = re.compile(r"(?:^|[\s(])(?:@|(?:route|model|use)\s*:\s*)([A-Za-z0-9._\-]+)", re.I) +_DIRECTIVE_NL = re.compile(r"\b(?:use|using|have|ask|let|route\s+to|via|with)\s+([A-Za-z0-9._\-]+)", re.I) + + +def _norm_alias(s): + """Lowercase + strip non-alphanumerics so 'GPT-5.5', 'gpt5.5', 'gpt_5_5' all + collapse to one matchable key.""" + return re.sub(r"[^a-z0-9]+", "", str(s).lower()) + + +def _resolve_alias(token): + return _ROUTE_ALIASES.get(_norm_alias(token)) + + +def _latest_user_turn(anth_body): + """(message_dict, plain_text) of the newest user turn carrying real + instruction text. Pure tool_result turns (tool round-trips) are skipped so a + sub-agent's task tag stays sticky across its tool calls. (None, "") if none.""" + for m in reversed(anth_body.get("messages") or []): + if not isinstance(m, dict) or m.get("role") != "user": + continue + content = m.get("content") + if isinstance(content, list): + non_tool = [b for b in content + if not (isinstance(b, dict) and b.get("type") == "tool_result")] + if not non_tool: + continue + txt = _text_from_anthropic_content(non_tool) + else: + txt = content if isinstance(content, str) else _text_from_anthropic_content(content) + txt = (txt or "").strip() + if txt: + return m, txt + return None, "" + + +def _detect_directive(text): + """(route_ids, spans, tier) for the most explicit marker tier that resolves to + one or more configured backends. `spans` are the literal marker substrings to + strip (empty for the natural-language tier -- that's prose, left intact).""" + def scan(pattern): + ids, spans, seen = [], [], set() + for m in pattern.finditer(text): + rid = _resolve_alias(m.group(1)) + if not rid: + continue + spans.append(m.group(0)) + if rid not in seen: + seen.add(rid) + ids.append(rid) + return ids, spans + ids, spans = scan(_DIRECTIVE_SENTINEL) + if ids: + return ids, spans, "sentinel" + ids, spans = scan(_DIRECTIVE_TAG) + if ids: + return ids, spans, "tag" + if DIRECTIVES_NL: + ids, _ = scan(_DIRECTIVE_NL) + if ids: + return ids, [], "nl" + return [], [], None + + +def _strip_spans_in_msg(msg, spans): + """Remove matched marker substrings from a user turn's text in-place so the + backend model never sees the routing tag.""" + if not spans or not isinstance(msg, dict): + return + def clean(s): + for sp in spans: + s = s.replace(sp, " ") + return re.sub(r"[ \t]{2,}", " ", s).strip() + content = msg.get("content") + if isinstance(content, str): + msg["content"] = clean(content) + elif isinstance(content, list): + for b in content: + if isinstance(b, dict) and b.get("type") == "text" and isinstance(b.get("text"), str): + b["text"] = clean(b["text"]) + + +def _directive_pin(body): + """Route id this request is pinned to by a prompt directive, or None. Strips + the marker text in-place when a pin is found. Never raises.""" + if not DIRECTIVES_ENABLED: + return None + try: + msg, text = _latest_user_turn(body) + if not text: + return None + ids, spans, tier = _detect_directive(text) + if len(ids) != 1: + if len(ids) > 1 and DIRECTIVES_LOG: + log("[directive] ambiguous (%s named); ignored" % ", ".join(ids)) + return None + rid = ids[0] + slot = UC_SLOT_MAP.get(rid) + if not isinstance(slot, dict) or slot.get("type") == "auto": + if DIRECTIVES_LOG: + log("[directive] '%s' (%s) not a usable backend; ignored" % (rid, tier)) + return None + if DIRECTIVES.get("strip", True) and spans: + _strip_spans_in_msg(msg, spans) + return rid + except Exception as e: + if DIRECTIVES_LOG: + log("[directive] error: %s" % e) + return None + + +def _is_plan_mode(body): + """True when the request is the interactive planning loop (the harness offers + ExitPlanMode only while in plan mode).""" + for t in body.get("tools") or []: + if isinstance(t, dict) and t.get("name") == "ExitPlanMode": + return True + return False + + +def _configure_directives(cfg): + """Build the alias table for prompt routing directives from configured + models/routes, plus optional overrides. Idempotent; called from main().""" + global _ROUTE_ALIASES, DIRECTIVES_ENABLED + if not isinstance(cfg, dict): + cfg = {} + aliases = {} + STOP = {"the", "real", "auto", "smart", "routing", "router", "worker", "experimental", + "cursor", "oauth", "fast", "flash", "pro", "plus", "max", "mini", "via", "pay", + "you", "model", "plan", "code", "chat", "api", "beta", "preview"} + + def add(token, rid): + key = _norm_alias(token) + if not key or key in STOP: + return + if key in aliases: + if aliases[key] != rid: + aliases[key] = None # collision -> ambiguous, disable + else: + aliases[key] = rid + + display = {m.get("id"): m.get("display_name", "") for m in (UC_MODELS or [])} + for rid, slot in (UC_SLOT_MAP or {}).items(): + if not isinstance(slot, dict) or slot.get("type") == "auto": + continue + if rid.startswith(WORKER_ID_PREFIX): + continue + add(rid, rid) + if rid.startswith("claude-"): + add(rid[len("claude-"):], rid) + for w in re.findall(r"[A-Za-z][A-Za-z0-9.]+", display.get(rid, "")): + if w.lower() not in STOP and len(w) >= 3: + add(w.lower(), rid) + mv = slot.get("model") + if isinstance(mv, str) and mv: + seg = mv.split("/")[-1] + head = re.split(r"[^A-Za-z]", seg)[0] + if head and head.lower() not in STOP and len(head) >= 3: + add(head.lower(), rid) + aliases = {k: v for k, v in aliases.items() if v} # drop ambiguous + + # Explicit overrides always win (directives.aliases preferred over router.aliases). + rcfg = cfg.get("router") if isinstance(cfg.get("router"), dict) else {} + dcfg = cfg.get("directives") if isinstance(cfg.get("directives"), dict) else {} + for src in (rcfg.get("aliases"), dcfg.get("aliases")): + if isinstance(src, dict): + for tok, rid in src.items(): + if isinstance(rid, str) and rid in UC_SLOT_MAP: + aliases[_norm_alias(tok)] = rid + _ROUTE_ALIASES = aliases + + planner = dcfg.get("planner") or rcfg.get("planner") + DIRECTIVES["planner"] = planner if planner in UC_SLOT_MAP else None + DIRECTIVES["strip"] = bool(dcfg.get("strip", True)) + # Opt-in resolution: an explicit env var wins (UC_DIRECTIVES=1 on, =0 off); + # otherwise follow config, which defaults to OFF so a fresh upgrade is a no-op. + env = os.environ.get("UC_DIRECTIVES") + if env is not None: + DIRECTIVES_ENABLED = env != "0" + else: + DIRECTIVES_ENABLED = bool(dcfg.get("enabled", False)) + if DIRECTIVES_ENABLED and aliases: + log("directives: %d alias(es) over %s%s" + % (len(aliases), ", ".join(sorted(set(aliases.values()))), + ("; planner=%s" % DIRECTIVES["planner"]) if DIRECTIVES["planner"] else "")) + + # -------------------------------------------------------------------------- # UltraCode envelope (the heart of the proxy) # -------------------------------------------------------------------------- @@ -678,6 +901,27 @@ def transform_messages_body(raw: bytes): remap = ("%s->%s" % (model_before, routed_id)) if routed_id != model_before else (model_before or "-") log("tier=%s model=%s" % (tier, remap)) + # Routing directive ("pin"): a prompt tag forces THIS request onto a specific + # backend, overriding the worker/orchestrator selection above AND the Auto + # Router below (the pin sets a concrete model id, so the type=="auto" branch + # never fires). This is how an automated multi-agent workflow lands each + # spawned sub-agent on the right model by role. Falls back silently to normal + # routing when no (or an ambiguous/unknown) directive is present. + pin_id = _directive_pin(body) + if pin_id and pin_id != body.get("model"): + if DIRECTIVES_LOG or TIER_LOG or ROUTER_LOG: + log("directive pin: tier=%s %s -> %s" % (tier, body.get("model"), pin_id)) + body["model"] = pin_id + changed = True + elif (not pin_id and DIRECTIVES.get("planner") and _is_plan_mode(body) + and DIRECTIVES["planner"] != body.get("model")): + # No explicit pin, but this is the interactive planning loop -> planner. + planner = DIRECTIVES["planner"] + if DIRECTIVES_LOG or TIER_LOG or ROUTER_LOG: + log("directive plan-mode: tier=%s %s -> %s" % (tier, body.get("model"), planner)) + body["model"] = planner + changed = True + # Auto Router: a slot of type "auto" is not a real backend -- it asks a cheap # classifier model to score the configured candidates and routes this request # to the cheapest one that clears the quality bar. Resolve it to a concrete @@ -1300,26 +1544,10 @@ def _clamp01(x): def _last_user_text(anth_body): - """Sanitized text of the latest user turn -- the task to classify.""" - msgs = anth_body.get("messages") or [] - for m in reversed(msgs): - if not isinstance(m, dict) or m.get("role") != "user": - continue - content = m.get("content") - # A user turn that is ONLY tool_result blocks is a tool round-trip, not a - # fresh ask; skip it so the cache key stays on the real instruction. - if isinstance(content, list): - non_tool = [b for b in content - if not (isinstance(b, dict) and b.get("type") == "tool_result")] - if not non_tool: - continue - txt = _text_from_anthropic_content(non_tool) - else: - txt = content if isinstance(content, str) else _text_from_anthropic_content(content) - txt = (txt or "").strip() - if txt: - return txt - return "" + """Sanitized text of the latest user turn -- the task to classify. A turn that + is ONLY tool_result blocks is a tool round-trip, not a fresh ask, so it is + skipped (keeps the router cache key on the real instruction).""" + return _latest_user_turn(anth_body)[1] def _has_images(anth_body): @@ -2233,6 +2461,7 @@ def main(): log(" including %d stock Claude model(s) on GET /v1/models [%s] (real " "Claude stays visible): %s" % (len(stock), src, ", ".join(m["id"] for m in stock))) + _configure_directives(cfg) if UC_MODELS: log(" advertising %d configured model(s) on GET /v1/models:" % len(UC_MODELS)) for m in UC_MODELS: diff --git a/test_proxy.py b/test_proxy.py index 3df40b0..35b949d 100755 --- a/test_proxy.py +++ b/test_proxy.py @@ -375,6 +375,69 @@ def main(): {"role": "user", "content": [{"type": "image", "source": {}}]}]}) is True print("[ok] auto router unit: selection / image-reject / score-parse / signal") + # Routing directives ("pins"): a prompt tag forces one backend, overriding + # tier/worker selection AND the Auto Router. Aliases auto-derive from model + # ids + display names; ambiguous/unknown/auto markers are ignored so a + # request never breaks. This is how a multi-agent workflow lands each + # spawned sub-agent on the right model by role. + _saved = (up.UC_SLOT_MAP, up.UC_MODELS, dict(up._ROUTE_ALIASES), dict(up.DIRECTIVES)) + up.UC_SLOT_MAP = { + "claude-opus": {"model": "claude-opus-4-8"}, + "claude-composer": {"type": "openai_compat", "model": "cursor/composer-2.5"}, + "claude-gpt-5.5-codex": {"type": "codex_oauth", "model": "gpt-5.5"}, + "claude-auto": {"type": "auto"}, + } + up.UC_MODELS = [ + {"id": "claude-opus", "display_name": "Claude Opus 4.8 (real)"}, + {"id": "claude-composer", "display_name": "Composer 2.5 (Cursor, experimental)"}, + {"id": "claude-gpt-5.5-codex", "display_name": "GPT-5.5 (Codex OAuth)"}, + {"id": "claude-auto", "display_name": "Auto (smart routing)"}, + ] + up._configure_directives({"directives": { + "enabled": True, + "aliases": {"claude": "claude-opus", "smart": "claude-auto"}, + "planner": "claude-opus"}}) + # aliases auto-derive from display names/ids; explicit override wins + assert up._resolve_alias("composer") == "claude-composer" + assert up._resolve_alias("codex") == "claude-gpt-5.5-codex" + assert up._resolve_alias("opus") == "claude-opus" + assert up._resolve_alias("claude") == "claude-opus" + + def _pin(text): + b = {"messages": [{"role": "user", "content": text}]} + return up._directive_pin(b), up._latest_user_turn(b)[1] + # sentinel / tag / natural-language tiers each resolve a single pin + assert _pin("[[route:codex]] review this diff") == ("claude-gpt-5.5-codex", "review this diff") + assert _pin("@composer implement the parser")[0] == "claude-composer" + assert _pin("please have codex review it")[0] == "claude-gpt-5.5-codex" # NL fallback + # no marker, ambiguous (two named in one tier), unknown, or auto -> ignored + assert _pin("just write some code")[0] is None + assert _pin("@opus then @composer")[0] is None # ambiguous (tag) + assert _pin("use opus and use composer")[0] is None # ambiguous (NL) + assert _pin("[[route:doesnotexist]] hi")[0] is None # unknown alias + assert _pin("@smart do it")[0] is None # resolves to auto route + # the pin reaches the dispatcher: a tagged worker request overrides tier + up._set_selection(orch="claude-opus", worker="claude-opus") + out, _ = up.transform_messages_body(json.dumps({ + "model": "claude-opus-4-8", "max_tokens": 16, + "messages": [{"role": "user", "content": "@composer write a haiku"}]}).encode()) + assert json.loads(out)["model"] == "cursor/composer-2.5", json.loads(out)["model"] + up._set_selection(orch=None, worker=None) + up._ACTIVE.update({"orch": None, "worker": None, "worker_explicit": False}) + # plan-mode detection drives the optional planner auto-route + assert up._is_plan_mode({"tools": [{"name": "ExitPlanMode"}]}) is True + assert up._is_plan_mode({"tools": [{"name": "Bash"}]}) is False + # OPT-IN: with no enable flag and no UC_DIRECTIVES env, the feature is OFF, + # so pulling this change is a no-op for existing setups -- a tag is left as-is + # and normal routing decides. (This is the backward-compat guarantee.) + os.environ.pop("UC_DIRECTIVES", None) + up._configure_directives({"directives": {"aliases": {"composer": "claude-composer"}}}) + assert up.DIRECTIVES_ENABLED is False + assert _pin("@composer do it")[0] is None + up.UC_SLOT_MAP, up.UC_MODELS, up._ROUTE_ALIASES, up.DIRECTIVES = ( + _saved[0], _saved[1], _saved[2], _saved[3]) + print("[ok] routing directives: opt-in default-off / alias-derive / sentinel+tag+NL pin / strip / ambiguous-ignore / dispatch") + # issue #3: a rejected tool call (with or without a comment) must not leave # an assistant tool_calls message unanswered, and tool replies must come # BEFORE the user's text — otherwise strict backends (DeepSeek) 400 with From 29547b519001fb9e5a98948caafbdcbeda9f1ec6 Mon Sep 17 00:00:00 2001 From: Payne Date: Wed, 3 Jun 2026 22:00:51 +0300 Subject: [PATCH 2/2] Address adversarial review: planner gating, surgical strip, NL opt-in, gpt docs, doctor Found via an independent gpt-5.5 review of the directives feature. - planner: gate the plan-mode auto-route on DIRECTIVES_ENABLED. It was applied whenever a planner was configured, even with directives.enabled:false / UC_DIRECTIVES=0 -- so "off" wasn't fully off. Now it's a true hard-off. - strip: (a) _DIRECTIVE_TAG used a leading capturing boundary, so stripping a tag like "(@composer)" swallowed the "(" and left an orphan ")"; switched to a fixed-width negative lookbehind so the boundary char is preserved. (b) the strip globally collapsed runs of spaces/tabs, flattening code indentation in a pinned prompt; now it removes only the marker and trims trailing/edge whitespace. - natural-language tier is now OPT-IN (UC_DIRECTIVES_NL defaults off). Prose that merely mentions a model after a trigger word ("does this work with Claude?") was silently rerouting; explicit sentinel/tag pins are unaffected. - docs: the alias table claimed `gpt` -> claude-gpt-5.5-codex, but in the shipped example `gpt` is dropped as ambiguous (collides with the Ollama gpt-oss model head). Corrected to `codex`, and documented the collision. - doctor.py: validate directive alias overrides + planner against real routes. - tests: cover planner-gated-when-disabled, surgical strip (paren + indentation), NL opt-in on/off, and the gpt collision (the reduced fixture previously missed it). Co-Authored-By: Claude Opus 4.8 --- docs/DIRECTIVES.md | 21 +++++++++++++++------ proxy.py | 17 +++++++++++------ scripts/doctor.py | 22 ++++++++++++++++++++++ test_proxy.py | 42 ++++++++++++++++++++++++++++++++++++++---- 4 files changed, 86 insertions(+), 16 deletions(-) diff --git a/docs/DIRECTIVES.md b/docs/DIRECTIVES.md index a08dae7..884043a 100644 --- a/docs/DIRECTIVES.md +++ b/docs/DIRECTIVES.md @@ -31,7 +31,13 @@ scans for a marker, most-explicit tier first. A tier wins only if it resolves to |------|------|---------|------------------------------| | 1. Sentinel | `[[route:NAME]]` | `[[route:codex]] review this diff` | yes | | 2. Tag | `@NAME` · `use:NAME` · `route:NAME` · `model:NAME` | `@composer implement the parser` | yes | -| 3. Natural language | `use/have/ask/let/with/via NAME` | `please have codex review it` | no (it's prose) | +| 3. Natural language **(opt-in, off by default)** | `use/have/ask/let/with/via NAME` | `please have codex review it` | no (it's prose) | + +> The natural-language tier is **off by default** — enable it with +> `UC_DIRECTIVES_NL=1`. It's deliberately opt-in because ordinary prose that merely +> mentions a model name after a trigger word (e.g. "*does this work **with Claude**?*") +> would otherwise silently reroute the request. With it off, only the explicit +> sentinel/tag forms pin. `NAME` is resolved through an **alias table** (below). If it resolves to one backend, that request is pinned there — skipping both the worker/orchestrator pick @@ -60,15 +66,18 @@ obvious ones already work with no setup: |----------|--------------------------------------| | `opus`, `claude` | `claude-opus` | | `composer` | `claude-composer` | -| `codex`, `gpt` | `claude-gpt-5.5-codex` | +| `codex` | `claude-gpt-5.5-codex` | | `minimax` | `claude-minimax-m3` | | `mimo` | `claude-mimo` | | `deepseek-v4-pro`, `deepseek-v4-flash` | the matching route | Matching is case- and punctuation-insensitive (`GPT-5.5`, `gpt5.5`, `gpt_5_5` all -collapse to the same key). A name that would map to **two** routes (e.g. bare -`deepseek`) is dropped as ambiguous — use the specific id, or pin it explicitly in -config. +collapse to the same key). A name that would map to **two** routes is dropped as +ambiguous — use the specific id, or pin it explicitly in `aliases`. In the shipped +example two such names are dropped: bare **`deepseek`** (matches both v4-pro and +v4-flash) and **`gpt`** (matches both `claude-gpt-5.5-codex` via `gpt-5.5` *and* +`claude-ollama-cloud` via its `gpt-oss` model) — so use `codex` for GPT-5.5, not +`gpt`. ### Configure it (optional) @@ -103,7 +112,7 @@ The `directives` block in `config.json` (already present in the shipped config): | Env var | Default | Effect | |---------|---------|--------| | `UC_DIRECTIVES` | unset | `1` force-enables, `0` force-disables — overrides `directives.enabled`. Unset → follow config (default off). | -| `UC_DIRECTIVES_NL` | `1` | `0` disables the natural-language tier; only sentinel/tag pins count. | +| `UC_DIRECTIVES_NL` | `0` | `1` enables the natural-language tier. **Off by default** (avoids prose like "with Claude" silently rerouting); only sentinel/tag pins count unless enabled. | | `UC_DIRECTIVES_LOG` | `0` | `1` logs every pin / ambiguity / ignore decision. | --- diff --git a/proxy.py b/proxy.py index 63ff59d..d85a7f3 100644 --- a/proxy.py +++ b/proxy.py @@ -141,7 +141,7 @@ # existing setup that hasn't asked for it. Final value is resolved in # _configure_directives(); this is only the pre-config default. See docs/DIRECTIVES.md. DIRECTIVES_ENABLED = os.environ.get("UC_DIRECTIVES") == "1" -DIRECTIVES_NL = os.environ.get("UC_DIRECTIVES_NL", "1") != "0" # natural-language fallback +DIRECTIVES_NL = os.environ.get("UC_DIRECTIVES_NL", "0") == "1" # natural-language tier: opt-in (off by default) DIRECTIVES_LOG = os.environ.get("UC_DIRECTIVES_LOG", "0") == "1" DIRECTIVES = {"planner": None, "strip": True} # filled from config in main() _ROUTE_ALIASES = {} # normalized token -> concrete route id @@ -649,7 +649,7 @@ def _wire_orchestrator_worker(): # overrides). A pin to an unconfigured or "auto" route is ignored so a request is # never broken. _DIRECTIVE_SENTINEL = re.compile(r"\[\[\s*(?:route|model|use)\s*:\s*([A-Za-z0-9._\-]+)\s*\]\]", re.I) -_DIRECTIVE_TAG = re.compile(r"(?:^|[\s(])(?:@|(?:route|model|use)\s*:\s*)([A-Za-z0-9._\-]+)", re.I) +_DIRECTIVE_TAG = re.compile(r"(? %s" % (tier, body.get("model"), pin_id)) body["model"] = pin_id changed = True - elif (not pin_id and DIRECTIVES.get("planner") and _is_plan_mode(body) - and DIRECTIVES["planner"] != body.get("model")): + elif (DIRECTIVES_ENABLED and not pin_id and DIRECTIVES.get("planner") + and _is_plan_mode(body) and DIRECTIVES["planner"] != body.get("model")): # No explicit pin, but this is the interactive planning loop -> planner. + # Gated on DIRECTIVES_ENABLED so "enabled:false" / UC_DIRECTIVES=0 is a + # true hard-off (the planner is otherwise applied independently of pins). planner = DIRECTIVES["planner"] if DIRECTIVES_LOG or TIER_LOG or ROUTER_LOG: log("directive plan-mode: tier=%s %s -> %s" % (tier, body.get("model"), planner)) diff --git a/scripts/doctor.py b/scripts/doctor.py index 663b319..967bdcb 100755 --- a/scripts/doctor.py +++ b/scripts/doctor.py @@ -237,6 +237,28 @@ def main(): else: note("router: no classifier set - router will pick the cheapest candidate without scoring") + # 6b. routing directives (pins): validate alias overrides + planner when set + directives = cfg.get("directives") if isinstance(cfg.get("directives"), dict) else {} + if directives: + if directives.get("enabled"): + al = directives.get("aliases") if isinstance(directives.get("aliases"), dict) else {} + bad = [v for v in al.values() if not (isinstance(v, str) and v in routes)] + if bad: + (note if using_example else fail)( + "directives: alias override(s) point to missing route(s): %s" % ", ".join(map(str, bad))) + elif al: + ok("directives: %d alias override(s) map to real routes" % len(al)) + planner = directives.get("planner") + if not planner: + ok("directives: enabled (no planner)") + elif planner in routes: + ok("directives: planner '%s' is a configured route" % planner) + else: + (note if using_example else fail)( + "directives: planner '%s' has no matching route - it will be ignored" % planner) + else: + ok("directives: present but disabled (no-op)") + # 7. port free proxy_cfg = cfg.get("proxy") if isinstance(cfg.get("proxy"), dict) else {} port = int(os.environ.get("UC_LISTEN_PORT") or proxy_cfg.get("listen_port") or 8141) diff --git a/test_proxy.py b/test_proxy.py index 35b949d..5e1184d 100755 --- a/test_proxy.py +++ b/test_proxy.py @@ -406,16 +406,31 @@ def main(): def _pin(text): b = {"messages": [{"role": "user", "content": text}]} return up._directive_pin(b), up._latest_user_turn(b)[1] - # sentinel / tag / natural-language tiers each resolve a single pin + # sentinel + tag tiers resolve a single pin and strip the marker cleanly assert _pin("[[route:codex]] review this diff") == ("claude-gpt-5.5-codex", "review this diff") assert _pin("@composer implement the parser")[0] == "claude-composer" - assert _pin("please have codex review it")[0] == "claude-gpt-5.5-codex" # NL fallback + # strip is SURGICAL: a leading "(" is preserved (not swallowed), and code + # indentation is NOT flattened (regression test for the marker-strip bug) + pin_id, txt = _pin("Document the literal token (@composer) exactly.") + assert pin_id == "claude-composer" and txt == "Document the literal token () exactly.", (pin_id, txt) + _, code_txt = _pin("[[route:composer]] code:\ndef f():\n if x:\n return 1") + assert code_txt == "code:\ndef f():\n if x:\n return 1", repr(code_txt) # no marker, ambiguous (two named in one tier), unknown, or auto -> ignored assert _pin("just write some code")[0] is None assert _pin("@opus then @composer")[0] is None # ambiguous (tag) - assert _pin("use opus and use composer")[0] is None # ambiguous (NL) assert _pin("[[route:doesnotexist]] hi")[0] is None # unknown alias assert _pin("@smart do it")[0] is None # resolves to auto route + # natural-language tier is OPT-IN (off by default) -- ordinary prose like + # "have codex review it" must NOT pin until UC_DIRECTIVES_NL is on; this is + # the fix for the "with Claude"-style false-routing footgun + assert up.DIRECTIVES_NL is False + assert _pin("please have codex review it")[0] is None # NL off -> no pin + up.DIRECTIVES_NL = True + try: + assert _pin("please have codex review it")[0] == "claude-gpt-5.5-codex" + assert _pin("use opus and use composer")[0] is None # ambiguous (NL) + finally: + up.DIRECTIVES_NL = False # the pin reaches the dispatcher: a tagged worker request overrides tier up._set_selection(orch="claude-opus", worker="claude-opus") out, _ = up.transform_messages_body(json.dumps({ @@ -427,6 +442,25 @@ def _pin(text): # plan-mode detection drives the optional planner auto-route assert up._is_plan_mode({"tools": [{"name": "ExitPlanMode"}]}) is True assert up._is_plan_mode({"tools": [{"name": "Bash"}]}) is False + # a name that maps to TWO routes (gpt-5.5 head AND a gpt-oss model head) is + # dropped as ambiguous -> resolves to nothing (regression for the docs/gpt gap) + _slots0, _models0 = up.UC_SLOT_MAP, up.UC_MODELS + up.UC_SLOT_MAP = {"claude-gpt-5.5-codex": {"type": "codex_oauth", "model": "gpt-5.5"}, + "claude-ollama": {"type": "openai_compat", "model": "gpt-oss:120b"}} + up.UC_MODELS = [{"id": "claude-gpt-5.5-codex", "display_name": "GPT-5.5 (Codex OAuth)"}, + {"id": "claude-ollama", "display_name": "Ollama Cloud"}] + up._configure_directives({"directives": {"enabled": True}}) + assert up._resolve_alias("gpt") is None, up._resolve_alias("gpt") # ambiguous -> dropped + assert up._resolve_alias("codex") == "claude-gpt-5.5-codex" # unique -> resolves + up.UC_SLOT_MAP, up.UC_MODELS = _slots0, _models0 + # FIX: the planner must NOT fire when directives are disabled (hard-off). + up._configure_directives({"directives": {"enabled": False, "planner": "claude-opus"}}) + assert up.DIRECTIVES_ENABLED is False and up.DIRECTIVES["planner"] == "claude-opus" + out_pm, _ = up.transform_messages_body(json.dumps({ + "model": "claude-composer", "max_tokens": 16, + "tools": [{"name": "ExitPlanMode"}], + "messages": [{"role": "user", "content": "make a plan"}]}).encode()) + assert json.loads(out_pm)["model"] == "cursor/composer-2.5", json.loads(out_pm)["model"] # OPT-IN: with no enable flag and no UC_DIRECTIVES env, the feature is OFF, # so pulling this change is a no-op for existing setups -- a tag is left as-is # and normal routing decides. (This is the backward-compat guarantee.) @@ -436,7 +470,7 @@ def _pin(text): assert _pin("@composer do it")[0] is None up.UC_SLOT_MAP, up.UC_MODELS, up._ROUTE_ALIASES, up.DIRECTIVES = ( _saved[0], _saved[1], _saved[2], _saved[3]) - print("[ok] routing directives: opt-in default-off / alias-derive / sentinel+tag+NL pin / strip / ambiguous-ignore / dispatch") + print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch") # issue #3: a rejected tool call (with or without a comment) must not leave # an assistant tool_calls message unanswered, and tool replies must come