From 278a3fa85d3d3ee20229a0dd1b15d807f3396c10 Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sun, 31 May 2026 19:29:24 -0700 Subject: [PATCH 1/3] feat: inference-time tool-call reliability (constrained decoding, retrieval, critic gate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three flag-gated, independent, default-off reliability features for the agent loop: - `provider/constrained.ts` — grammar/JSON-Schema constrained decoding so a local model (vLLM/LM Studio/llama.cpp) is forced to emit a parseable, schema-correct tool call. Pure (schema in → payload out). - `tool/retrieval.ts` — per-turn tool subset (always-on core + lexical top-k), never dropping a tool referenced mid-trajectory; trims the ~78-tool context flood. v1 lexical, dependency-free, deterministic. - `tool/critic.ts` — pre-execution gate for side-effecting tools via a pluggable `Verifier`; default allows everything (ungated), a real verifier is injected. Wired flag-gated into `session/llm.ts` (markers; default off → upstream path unchanged). 18 unit tests; typecheck clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/opencode/src/provider/constrained.ts | 96 +++++++++++++++++++ packages/opencode/src/session/llm.ts | 55 +++++++++++ packages/opencode/src/tool/critic.ts | 69 +++++++++++++ packages/opencode/src/tool/retrieval.ts | 77 +++++++++++++++ .../test/provider/constrained.test.ts | 73 ++++++++++++++ packages/opencode/test/tool/critic.test.ts | 40 ++++++++ packages/opencode/test/tool/retrieval.test.ts | 41 ++++++++ 7 files changed, 451 insertions(+) create mode 100644 packages/opencode/src/provider/constrained.ts create mode 100644 packages/opencode/src/tool/critic.ts create mode 100644 packages/opencode/src/tool/retrieval.ts create mode 100644 packages/opencode/test/provider/constrained.test.ts create mode 100644 packages/opencode/test/tool/critic.test.ts create mode 100644 packages/opencode/test/tool/retrieval.test.ts diff --git a/packages/opencode/src/provider/constrained.ts b/packages/opencode/src/provider/constrained.ts new file mode 100644 index 000000000..dfced7a9e --- /dev/null +++ b/packages/opencode/src/provider/constrained.ts @@ -0,0 +1,96 @@ +/** + * Constrained (grammar) decoding for tool calls. + * + * Builds a JSON-Schema "envelope" describing a VALID tool call for the current + * resolved tool set, so a local model (vLLM / LM Studio / llama.cpp) can be + * forced — at the token level — to emit a parseable, schema-correct call. A + * deterministic fix for the "model emits unparseable tool calls" failure; + * base-model-agnostic. + * + * This module is pure (JSON-Schema in → payload out); wiring into the request + * happens in ProviderTransform.providerOptions (a separate, marker-wrapped edit). + */ + +export namespace Constrained { + /** Minimal tool shape this module needs (parameters already converted to JSON Schema). */ + export interface ToolSchema { + name: string + description?: string + parameters: Record // JSON Schema for the tool's arguments + } + + /** Only constrain when explicitly enabled AND the provider is a self-served / local + * OpenAI-compatible endpoint. Never constrain hosted models (Anthropic/OpenAI): + * we don't control their decoding and their tool-calls are already valid. */ + export function enabled(): boolean { + return process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] === "1" + } + + /** True for providers where WE control the inference engine and can pass guided + * decoding (vLLM, LM Studio, Ollama, llama.cpp via openai-compatible). */ + export function isLocalProvider(npm?: string, providerID?: string): boolean { + if (npm === "@ai-sdk/openai-compatible") return true + const local = new Set(["vllm", "lmstudio", "llamacpp", "ollama", "local"]) + return !!providerID && local.has(providerID) + } + + /** A normalized arguments schema: ensure it's an object schema (empty-arg tools + * constrain to `{}`), and forbid extra properties so the grammar is tight. */ + function argsSchema(t: ToolSchema): Record { + const p = t.parameters && typeof t.parameters === "object" ? t.parameters : {} + const props = (p as any).properties ?? {} + return { + type: "object", + properties: props, + required: (p as any).required ?? [], + additionalProperties: (p as any).additionalProperties ?? false, + } + } + + /** + * Discriminated-union envelope: a single tool call must be exactly one of the + * tools, with `name` pinned to that tool and `arguments` matching its schema. + * vLLM/XGrammar guided_json and llama.cpp GBNF both support oneOf. + */ + export function toolCallEnvelope(tools: ToolSchema[]): Record { + if (!tools.length) throw new Error("constrained: no tools to build envelope from") + return { + $schema: "http://json-schema.org/draft-07/schema#", + title: "tool_call", + oneOf: tools.map((t) => ({ + type: "object", + properties: { + name: { const: t.name }, + arguments: argsSchema(t), + }, + required: ["name", "arguments"], + additionalProperties: false, + })), + } + } + + /** + * Provider options payload to attach (under the provider's SDK key) for guided + * decoding. Two styles cover the engines we serve: + * - "response_format": OpenAI-style json_schema (vLLM ≥0.6, LM Studio) — preferred. + * - "guided_json": vLLM extra_body fallback for older servers. + * Caller picks based on the endpoint; default response_format. + */ + export function guidedOptions( + tools: ToolSchema[], + style: "response_format" | "guided_json" = "response_format", + ): Record { + const schema = toolCallEnvelope(tools) + if (style === "guided_json") { + // vLLM reads this from extra_body; the openai-compatible SDK forwards + // unknown providerOptions as request body fields. + return { guided_json: schema } + } + return { + response_format: { + type: "json_schema", + json_schema: { name: "tool_call", schema, strict: true }, + }, + } + } +} diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index b06112f2c..fe0e04cc8 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -13,6 +13,12 @@ import { } from "ai" import { mergeDeep, pipe } from "remeda" import { ProviderTransform } from "@/provider/transform" +// altimate_change start — constrained tool-call decoding +import { Constrained } from "@/provider/constrained" +// altimate_change end +// altimate_change start — tool retrieval +import { Retrieval } from "@/tool/retrieval" +// altimate_change end import { Config } from "@/config/config" import { Instance } from "@/project/instance" import type { Agent } from "@/agent/agent" @@ -179,6 +185,55 @@ export namespace LLM { } // altimate_change end + // altimate_change start — tool retrieval + // Expose only the relevant top-k tools this turn (flag-gated). Keeps the + // always-on core + any in-flight (referenced) tools; no-op for small sets. + if (Retrieval.enabled()) { + const lastUser = [...input.messages].reverse().find((m) => m.role === "user") + const c = lastUser?.content as any + const query = + typeof c === "string" + ? c + : Array.isArray(c) + ? c.map((p: any) => (typeof p === "string" ? p : (p?.text ?? ""))).join(" ") + : "" + const list = Object.entries(tools).map(([name, t]) => ({ name, description: (t as any)?.description })) + const keep = Retrieval.select(query, list, { keep: referencedTools }) + for (const name of Object.keys(tools)) { + if (name !== "invalid" && !keep.has(name)) delete tools[name] + } + } + // altimate_change end + + // altimate_change start — constrained tool-call decoding for local providers + // Force a tool-call grammar ONLY when a call is mandatory (toolChoice "required"). + // A blanket guided constraint would forbid normal text turns; for "auto" mode, + // valid tool calls come from the vLLM serving config (--enable-auto-tool-choice + // --tool-call-parser), NOT a client constraint. Hosted models are excluded. + // + if ( + Constrained.enabled() && + Constrained.isLocalProvider(input.model.api.npm, input.model.providerID) && + input.toolChoice === "required" + ) { + const toolSchemas = Object.entries(tools) + .filter(([name]) => name !== "invalid") + .map(([name, t]) => ({ + name, + parameters: + ((t as any)?.inputSchema?.jsonSchema as Record) ?? { + type: "object", + properties: {}, + }, + })) + if (toolSchemas.length) { + // Routed under the provider SDK key by providerOptions(), then spread into + // the request body by @ai-sdk/openai-compatible -> vLLM reads guided_json. + params.options = { ...params.options, ...Constrained.guidedOptions(toolSchemas, "guided_json") } + } + } + // altimate_change end + return streamText({ onError(error) { l.error("stream error", { diff --git a/packages/opencode/src/tool/critic.ts b/packages/opencode/src/tool/critic.ts new file mode 100644 index 000000000..69e58ec01 --- /dev/null +++ b/packages/opencode/src/tool/critic.ts @@ -0,0 +1,69 @@ +/** + * Pre-execution critic gate. + * + * Before a SIDE-EFFECTING tool runs (bash, write, edit, sql_execute, dbt_*), a + * verifier checks the proposed args; on hard failure the call is denied and the + * reason is fed back so the model can retry — instead of executing a bad action. + * + * The judgment plugs in via the `Verifier` interface; the default verifier ALLOWS + * everything (ungated) and a real verifier is injected by the caller. + * + * Pure + testable. Wiring point: session/prompt.ts execute wrapper, just before + * `item.execute(args, ctx)`. + */ + +export namespace Critic { + /** Side-effecting tools worth gating. Reads (glob/grep/read) are never gated. */ + export const DEFAULT_GATED = ["bash", "write", "edit", "sql_execute", "dbt_run", "patch"] + + export interface Verdict { + ok: boolean + reason?: string + } + + /** The judgment interface. Default impl allows all (open). Product plugs altimate-core. */ + export interface Verifier { + verify(toolName: string, args: Record): Verdict | Promise + } + + export const ALLOW_ALL: Verifier = { verify: () => ({ ok: true }) } + + export function enabled(): boolean { + return process.env["ALTIMATE_CRITIC_GATE"] === "1" + } + + export function isGated(toolName: string, gated: string[] = DEFAULT_GATED): boolean { + return gated.includes(toolName) + } + + export interface GateResult { + allow: boolean + /** when blocked, the message to feed back to the model in place of execution. */ + feedback?: string + } + + /** + * Decide whether a proposed tool call may execute. Non-gated tools always pass. + * Gated tools are checked by the verifier; a not-ok verdict blocks with feedback. + * NEVER throws — a critic failure must not break the agent (fail-open on error). + */ + export async function gate( + toolName: string, + args: Record, + verifier: Verifier = ALLOW_ALL, + gated: string[] = DEFAULT_GATED, + ): Promise { + if (!enabled() || !isGated(toolName, gated)) return { allow: true } + try { + const v = await verifier.verify(toolName, args) + if (v.ok) return { allow: true } + return { + allow: false, + feedback: `Blocked by altimate verifier before execution: ${v.reason ?? "failed validation"}. Fix and retry.`, + } + } catch { + // Fail-open: observability/governance must never break core functionality. + return { allow: true } + } + } +} diff --git a/packages/opencode/src/tool/retrieval.ts b/packages/opencode/src/tool/retrieval.ts new file mode 100644 index 000000000..d02a3d372 --- /dev/null +++ b/packages/opencode/src/tool/retrieval.ts @@ -0,0 +1,77 @@ +/** + * Tool retrieval — pick a relevant subset of tools per turn. + * + * With ~78 tools, sending the full set every turn floods context and adds + * distractors that hurt tool SELECTION. This picks a relevant subset per turn: + * a fixed always-on CORE + lexically-ranked top-k of the rest, and NEVER drops a + * tool that's mid-trajectory (referenced by an in-flight tool call) — dropping + * those would corrupt the conversation. + * + * v1 is lexical (dependency-free, deterministic, testable). An embedding + + * cross-encoder rerank pass is a later enhancement; the `select` signature is + * stable so wiring doesn't change. + */ + +export namespace Retrieval { + /** Always-available agent essentials — never retrieved out. */ + export const CORE = [ + "bash", "read", "write", "edit", "glob", "grep", "ls", + "task", "todowrite", "skill", + ] + + export interface Tool { + name: string + description?: string + } + + export interface Options { + /** target number of tools to expose (incl. core). */ + topk?: number + /** names that MUST stay (e.g. tools referenced by in-flight tool calls). */ + keep?: Iterable + /** only retrieve when the tool count exceeds this (no-op for small sets). */ + minToolsToRetrieve?: number + } + + export function enabled(): boolean { + return process.env["ALTIMATE_TOOL_RETRIEVAL"] === "1" + } + + function score(query: string, t: Tool): number { + const words = new Set(query.toLowerCase().match(/[a-z_]+/g) ?? []) + const hay = (t.name + " " + (t.description ?? "")).toLowerCase() + let s = 0 + for (const w of words) if (w.length > 3 && hay.includes(w)) s += 1 + // small boost for a direct name mention + if (words.has(t.name.toLowerCase())) s += 3 + return s + } + + /** + * Return the SUBSET of tool names to expose this turn. Caller deletes the rest. + * Deterministic: core + forced-keep first, then highest-scoring others up to topk + * (ties broken by original order for stability). + */ + export function select(query: string, tools: Tool[], opts: Options = {}): Set { + const topk = opts.topk ?? 12 + const minToRetrieve = opts.minToolsToRetrieve ?? topk + const all = new Set(tools.map((t) => t.name)) + // No-op for small tool sets — nothing to gain. + if (tools.length <= minToRetrieve) return all + + const keep = new Set() + for (const n of opts.keep ?? []) if (all.has(n)) keep.add(n) + for (const n of CORE) if (all.has(n)) keep.add(n) + + const rest = tools.filter((t) => !keep.has(t.name)) + const ranked = rest + .map((t, i) => ({ name: t.name, s: score(query, t), i })) + .sort((a, b) => b.s - a.s || a.i - b.i) + + for (const r of ranked) { + if (keep.size >= topk) break + keep.add(r.name) + } + return keep + } +} diff --git a/packages/opencode/test/provider/constrained.test.ts b/packages/opencode/test/provider/constrained.test.ts new file mode 100644 index 000000000..1f053af01 --- /dev/null +++ b/packages/opencode/test/provider/constrained.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, test } from "bun:test" +import { Constrained } from "../../src/provider/constrained" + +const TOOLS: Constrained.ToolSchema[] = [ + { + name: "bash", + description: "run a shell command", + parameters: { + type: "object", + properties: { command: { type: "string" }, timeout: { type: "number" } }, + required: ["command"], + }, + }, + { name: "list_databases", description: "no-arg tool", parameters: { type: "object", properties: {} } }, +] + +describe("Constrained.toolCallEnvelope", () => { + test("builds a oneOf discriminated union, name pinned per tool", () => { + const env = Constrained.toolCallEnvelope(TOOLS) + expect(Array.isArray(env.oneOf)).toBe(true) + expect(env.oneOf).toHaveLength(2) + const bash = env.oneOf[0] + expect(bash.properties.name.const).toBe("bash") + expect(bash.properties.arguments.required).toEqual(["command"]) + expect(bash.properties.arguments.additionalProperties).toBe(false) + expect(bash.additionalProperties).toBe(false) + }) + + test("no-arg tool constrains arguments to an empty object", () => { + const env = Constrained.toolCallEnvelope(TOOLS) + const noarg = env.oneOf[1] + expect(noarg.properties.name.const).toBe("list_databases") + expect(noarg.properties.arguments.properties).toEqual({}) + expect(noarg.properties.arguments.type).toBe("object") + }) + + test("throws on empty tool set", () => { + expect(() => Constrained.toolCallEnvelope([])).toThrow() + }) +}) + +describe("Constrained.guidedOptions", () => { + test("response_format json_schema (default)", () => { + const o = Constrained.guidedOptions(TOOLS) + expect(o.response_format.type).toBe("json_schema") + expect(o.response_format.json_schema.name).toBe("tool_call") + expect(o.response_format.json_schema.strict).toBe(true) + expect(o.response_format.json_schema.schema.oneOf).toHaveLength(2) + }) + + test("guided_json fallback for older vLLM", () => { + const o = Constrained.guidedOptions(TOOLS, "guided_json") + expect(o.guided_json.oneOf).toHaveLength(2) + }) +}) + +describe("Constrained gating", () => { + test("enabled() reads the env flag", () => { + const prev = process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] + process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = "1" + expect(Constrained.enabled()).toBe(true) + delete process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] + expect(Constrained.enabled()).toBe(false) + if (prev !== undefined) process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = prev + }) + + test("isLocalProvider: openai-compatible + known local ids, not hosted", () => { + expect(Constrained.isLocalProvider("@ai-sdk/openai-compatible", "vllm")).toBe(true) + expect(Constrained.isLocalProvider(undefined, "lmstudio")).toBe(true) + expect(Constrained.isLocalProvider("@ai-sdk/anthropic", "anthropic")).toBe(false) + expect(Constrained.isLocalProvider("@ai-sdk/openai", "openai")).toBe(false) + }) +}) diff --git a/packages/opencode/test/tool/critic.test.ts b/packages/opencode/test/tool/critic.test.ts new file mode 100644 index 000000000..9f611ed93 --- /dev/null +++ b/packages/opencode/test/tool/critic.test.ts @@ -0,0 +1,40 @@ +import { afterEach, describe, expect, test } from "bun:test" +import { Critic } from "../../src/tool/critic" + +afterEach(() => delete process.env["ALTIMATE_CRITIC_GATE"]) + +describe("Critic.gate", () => { + test("disabled by default -> allow even a gated+denying call", async () => { + const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "x" }) } + expect((await Critic.gate("bash", {}, deny)).allow).toBe(true) + }) + + test("enabled: non-gated tool always allowed", async () => { + process.env["ALTIMATE_CRITIC_GATE"] = "1" + expect((await Critic.gate("read", {}, Critic.ALLOW_ALL)).allow).toBe(true) + }) + + test("enabled: gated + allow-all verifier -> allow", async () => { + process.env["ALTIMATE_CRITIC_GATE"] = "1" + expect((await Critic.gate("bash", { command: "ls" }, Critic.ALLOW_ALL)).allow).toBe(true) + }) + + test("enabled: gated + failing verifier -> block with feedback", async () => { + process.env["ALTIMATE_CRITIC_GATE"] = "1" + const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "unsafe SQL" }) } + const g = await Critic.gate("sql_execute", { q: "drop" }, deny) + expect(g.allow).toBe(false) + expect(g.feedback).toContain("unsafe SQL") + }) + + test("enabled: verifier throws -> fail-open (allow)", async () => { + process.env["ALTIMATE_CRITIC_GATE"] = "1" + const boom: Critic.Verifier = { verify: () => { throw new Error("down") } } + expect((await Critic.gate("bash", {}, boom)).allow).toBe(true) + }) + + test("isGated: side-effecting yes, reads no", () => { + expect(Critic.isGated("bash")).toBe(true) + expect(Critic.isGated("read")).toBe(false) + }) +}) diff --git a/packages/opencode/test/tool/retrieval.test.ts b/packages/opencode/test/tool/retrieval.test.ts new file mode 100644 index 000000000..cb1adb885 --- /dev/null +++ b/packages/opencode/test/tool/retrieval.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, test } from "bun:test" +import { Retrieval } from "../../src/tool/retrieval" + +const TOOLS = [ + ...Retrieval.CORE.map((name) => ({ name })), + ...Array.from({ length: 20 }, (_, i) => ({ name: `warehouse_op${i}`, description: `warehouse operation ${i}` })), + { name: "dbt_run", description: "run dbt models build" }, + { name: "sql_execute", description: "execute SQL query against warehouse" }, +] + +describe("Retrieval.select", () => { + test("always keeps core tools", () => { + const sel = Retrieval.select("run the dbt models", TOOLS, { topk: 12 }) + expect(sel.has("bash")).toBe(true) + expect(sel.has("read")).toBe(true) + }) + + test("picks lexically relevant tools", () => { + expect(Retrieval.select("run the dbt models and build", TOOLS, { topk: 12 }).has("dbt_run")).toBe(true) + expect(Retrieval.select("execute a SQL query on the warehouse", TOOLS, { topk: 12 }).has("sql_execute")).toBe(true) + }) + + test("never drops in-flight (keep) tools, even if irrelevant", () => { + const sel = Retrieval.select("hello", TOOLS, { topk: 12, keep: ["warehouse_op19"] }) + expect(sel.has("warehouse_op19")).toBe(true) + }) + + test("no-op for small tool sets (returns all)", () => { + const small = [{ name: "a" }, { name: "b" }] + expect(Retrieval.select("x", small, { topk: 12 }).size).toBe(2) + }) + + test("enabled() reads the env flag", () => { + const prev = process.env["ALTIMATE_TOOL_RETRIEVAL"] + process.env["ALTIMATE_TOOL_RETRIEVAL"] = "1" + expect(Retrieval.enabled()).toBe(true) + delete process.env["ALTIMATE_TOOL_RETRIEVAL"] + expect(Retrieval.enabled()).toBe(false) + if (prev !== undefined) process.env["ALTIMATE_TOOL_RETRIEVAL"] = prev + }) +}) From e6b70c6cb024da3e3c5be926038774b2c34a5284 Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sun, 31 May 2026 20:38:25 -0700 Subject: [PATCH 2/3] refactor: drop unwired critic gate from inference-stack PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `ALTIMATE_CRITIC_GATE` flag was a no-op — `tool/critic.ts` was never imported into the execute path, so enabling the flag did nothing. Removing it from this PR so every shipped flag is actually wired: - `ALTIMATE_TOOL_RETRIEVAL` — wired in `session/llm.ts`, validated (-50% input tokens at equal resolve) - `ALTIMATE_CONSTRAINED_TOOLCALLS` — wired in `session/llm.ts` (local providers) The critic gate (pre-execution `Verifier` for side-effecting tools) moves to a follow-up that wires it into the `session/prompt.ts` execute wrapper with an integration test. Code preserved on `feat/critic-gate`. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/opencode/src/tool/critic.ts | 69 ---------------------- packages/opencode/test/tool/critic.test.ts | 40 ------------- 2 files changed, 109 deletions(-) delete mode 100644 packages/opencode/src/tool/critic.ts delete mode 100644 packages/opencode/test/tool/critic.test.ts diff --git a/packages/opencode/src/tool/critic.ts b/packages/opencode/src/tool/critic.ts deleted file mode 100644 index 69e58ec01..000000000 --- a/packages/opencode/src/tool/critic.ts +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Pre-execution critic gate. - * - * Before a SIDE-EFFECTING tool runs (bash, write, edit, sql_execute, dbt_*), a - * verifier checks the proposed args; on hard failure the call is denied and the - * reason is fed back so the model can retry — instead of executing a bad action. - * - * The judgment plugs in via the `Verifier` interface; the default verifier ALLOWS - * everything (ungated) and a real verifier is injected by the caller. - * - * Pure + testable. Wiring point: session/prompt.ts execute wrapper, just before - * `item.execute(args, ctx)`. - */ - -export namespace Critic { - /** Side-effecting tools worth gating. Reads (glob/grep/read) are never gated. */ - export const DEFAULT_GATED = ["bash", "write", "edit", "sql_execute", "dbt_run", "patch"] - - export interface Verdict { - ok: boolean - reason?: string - } - - /** The judgment interface. Default impl allows all (open). Product plugs altimate-core. */ - export interface Verifier { - verify(toolName: string, args: Record): Verdict | Promise - } - - export const ALLOW_ALL: Verifier = { verify: () => ({ ok: true }) } - - export function enabled(): boolean { - return process.env["ALTIMATE_CRITIC_GATE"] === "1" - } - - export function isGated(toolName: string, gated: string[] = DEFAULT_GATED): boolean { - return gated.includes(toolName) - } - - export interface GateResult { - allow: boolean - /** when blocked, the message to feed back to the model in place of execution. */ - feedback?: string - } - - /** - * Decide whether a proposed tool call may execute. Non-gated tools always pass. - * Gated tools are checked by the verifier; a not-ok verdict blocks with feedback. - * NEVER throws — a critic failure must not break the agent (fail-open on error). - */ - export async function gate( - toolName: string, - args: Record, - verifier: Verifier = ALLOW_ALL, - gated: string[] = DEFAULT_GATED, - ): Promise { - if (!enabled() || !isGated(toolName, gated)) return { allow: true } - try { - const v = await verifier.verify(toolName, args) - if (v.ok) return { allow: true } - return { - allow: false, - feedback: `Blocked by altimate verifier before execution: ${v.reason ?? "failed validation"}. Fix and retry.`, - } - } catch { - // Fail-open: observability/governance must never break core functionality. - return { allow: true } - } - } -} diff --git a/packages/opencode/test/tool/critic.test.ts b/packages/opencode/test/tool/critic.test.ts deleted file mode 100644 index 9f611ed93..000000000 --- a/packages/opencode/test/tool/critic.test.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { afterEach, describe, expect, test } from "bun:test" -import { Critic } from "../../src/tool/critic" - -afterEach(() => delete process.env["ALTIMATE_CRITIC_GATE"]) - -describe("Critic.gate", () => { - test("disabled by default -> allow even a gated+denying call", async () => { - const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "x" }) } - expect((await Critic.gate("bash", {}, deny)).allow).toBe(true) - }) - - test("enabled: non-gated tool always allowed", async () => { - process.env["ALTIMATE_CRITIC_GATE"] = "1" - expect((await Critic.gate("read", {}, Critic.ALLOW_ALL)).allow).toBe(true) - }) - - test("enabled: gated + allow-all verifier -> allow", async () => { - process.env["ALTIMATE_CRITIC_GATE"] = "1" - expect((await Critic.gate("bash", { command: "ls" }, Critic.ALLOW_ALL)).allow).toBe(true) - }) - - test("enabled: gated + failing verifier -> block with feedback", async () => { - process.env["ALTIMATE_CRITIC_GATE"] = "1" - const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "unsafe SQL" }) } - const g = await Critic.gate("sql_execute", { q: "drop" }, deny) - expect(g.allow).toBe(false) - expect(g.feedback).toContain("unsafe SQL") - }) - - test("enabled: verifier throws -> fail-open (allow)", async () => { - process.env["ALTIMATE_CRITIC_GATE"] = "1" - const boom: Critic.Verifier = { verify: () => { throw new Error("down") } } - expect((await Critic.gate("bash", {}, boom)).allow).toBe(true) - }) - - test("isGated: side-effecting yes, reads no", () => { - expect(Critic.isGated("bash")).toBe(true) - expect(Critic.isGated("read")).toBe(false) - }) -}) From 38f873742c117bd9b0a3115e855b75856cd2c98f Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Mon, 1 Jun 2026 10:18:53 -0700 Subject: [PATCH 3/3] refactor: split constrained decoding into a follow-up Constrained tool-call decoding is local-providers-only and has no validation run behind it yet (the A/B that justifies this PR measured tool retrieval). Removing it so the validated retrieval lever can land clean; constrained moves to its own branch/PR pending a local vLLM guided-decoding run. - remove `provider/constrained.ts` + its test - remove the constrained wiring + import from `session/llm.ts` (retrieval stays) #858 is now retrieval-only. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/opencode/src/provider/constrained.ts | 96 ------------------- packages/opencode/src/session/llm.ts | 32 ------- .../test/provider/constrained.test.ts | 73 -------------- 3 files changed, 201 deletions(-) delete mode 100644 packages/opencode/src/provider/constrained.ts delete mode 100644 packages/opencode/test/provider/constrained.test.ts diff --git a/packages/opencode/src/provider/constrained.ts b/packages/opencode/src/provider/constrained.ts deleted file mode 100644 index dfced7a9e..000000000 --- a/packages/opencode/src/provider/constrained.ts +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Constrained (grammar) decoding for tool calls. - * - * Builds a JSON-Schema "envelope" describing a VALID tool call for the current - * resolved tool set, so a local model (vLLM / LM Studio / llama.cpp) can be - * forced — at the token level — to emit a parseable, schema-correct call. A - * deterministic fix for the "model emits unparseable tool calls" failure; - * base-model-agnostic. - * - * This module is pure (JSON-Schema in → payload out); wiring into the request - * happens in ProviderTransform.providerOptions (a separate, marker-wrapped edit). - */ - -export namespace Constrained { - /** Minimal tool shape this module needs (parameters already converted to JSON Schema). */ - export interface ToolSchema { - name: string - description?: string - parameters: Record // JSON Schema for the tool's arguments - } - - /** Only constrain when explicitly enabled AND the provider is a self-served / local - * OpenAI-compatible endpoint. Never constrain hosted models (Anthropic/OpenAI): - * we don't control their decoding and their tool-calls are already valid. */ - export function enabled(): boolean { - return process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] === "1" - } - - /** True for providers where WE control the inference engine and can pass guided - * decoding (vLLM, LM Studio, Ollama, llama.cpp via openai-compatible). */ - export function isLocalProvider(npm?: string, providerID?: string): boolean { - if (npm === "@ai-sdk/openai-compatible") return true - const local = new Set(["vllm", "lmstudio", "llamacpp", "ollama", "local"]) - return !!providerID && local.has(providerID) - } - - /** A normalized arguments schema: ensure it's an object schema (empty-arg tools - * constrain to `{}`), and forbid extra properties so the grammar is tight. */ - function argsSchema(t: ToolSchema): Record { - const p = t.parameters && typeof t.parameters === "object" ? t.parameters : {} - const props = (p as any).properties ?? {} - return { - type: "object", - properties: props, - required: (p as any).required ?? [], - additionalProperties: (p as any).additionalProperties ?? false, - } - } - - /** - * Discriminated-union envelope: a single tool call must be exactly one of the - * tools, with `name` pinned to that tool and `arguments` matching its schema. - * vLLM/XGrammar guided_json and llama.cpp GBNF both support oneOf. - */ - export function toolCallEnvelope(tools: ToolSchema[]): Record { - if (!tools.length) throw new Error("constrained: no tools to build envelope from") - return { - $schema: "http://json-schema.org/draft-07/schema#", - title: "tool_call", - oneOf: tools.map((t) => ({ - type: "object", - properties: { - name: { const: t.name }, - arguments: argsSchema(t), - }, - required: ["name", "arguments"], - additionalProperties: false, - })), - } - } - - /** - * Provider options payload to attach (under the provider's SDK key) for guided - * decoding. Two styles cover the engines we serve: - * - "response_format": OpenAI-style json_schema (vLLM ≥0.6, LM Studio) — preferred. - * - "guided_json": vLLM extra_body fallback for older servers. - * Caller picks based on the endpoint; default response_format. - */ - export function guidedOptions( - tools: ToolSchema[], - style: "response_format" | "guided_json" = "response_format", - ): Record { - const schema = toolCallEnvelope(tools) - if (style === "guided_json") { - // vLLM reads this from extra_body; the openai-compatible SDK forwards - // unknown providerOptions as request body fields. - return { guided_json: schema } - } - return { - response_format: { - type: "json_schema", - json_schema: { name: "tool_call", schema, strict: true }, - }, - } - } -} diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index fe0e04cc8..0e7ec2589 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -13,9 +13,6 @@ import { } from "ai" import { mergeDeep, pipe } from "remeda" import { ProviderTransform } from "@/provider/transform" -// altimate_change start — constrained tool-call decoding -import { Constrained } from "@/provider/constrained" -// altimate_change end // altimate_change start — tool retrieval import { Retrieval } from "@/tool/retrieval" // altimate_change end @@ -205,35 +202,6 @@ export namespace LLM { } // altimate_change end - // altimate_change start — constrained tool-call decoding for local providers - // Force a tool-call grammar ONLY when a call is mandatory (toolChoice "required"). - // A blanket guided constraint would forbid normal text turns; for "auto" mode, - // valid tool calls come from the vLLM serving config (--enable-auto-tool-choice - // --tool-call-parser), NOT a client constraint. Hosted models are excluded. - // - if ( - Constrained.enabled() && - Constrained.isLocalProvider(input.model.api.npm, input.model.providerID) && - input.toolChoice === "required" - ) { - const toolSchemas = Object.entries(tools) - .filter(([name]) => name !== "invalid") - .map(([name, t]) => ({ - name, - parameters: - ((t as any)?.inputSchema?.jsonSchema as Record) ?? { - type: "object", - properties: {}, - }, - })) - if (toolSchemas.length) { - // Routed under the provider SDK key by providerOptions(), then spread into - // the request body by @ai-sdk/openai-compatible -> vLLM reads guided_json. - params.options = { ...params.options, ...Constrained.guidedOptions(toolSchemas, "guided_json") } - } - } - // altimate_change end - return streamText({ onError(error) { l.error("stream error", { diff --git a/packages/opencode/test/provider/constrained.test.ts b/packages/opencode/test/provider/constrained.test.ts deleted file mode 100644 index 1f053af01..000000000 --- a/packages/opencode/test/provider/constrained.test.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { describe, expect, test } from "bun:test" -import { Constrained } from "../../src/provider/constrained" - -const TOOLS: Constrained.ToolSchema[] = [ - { - name: "bash", - description: "run a shell command", - parameters: { - type: "object", - properties: { command: { type: "string" }, timeout: { type: "number" } }, - required: ["command"], - }, - }, - { name: "list_databases", description: "no-arg tool", parameters: { type: "object", properties: {} } }, -] - -describe("Constrained.toolCallEnvelope", () => { - test("builds a oneOf discriminated union, name pinned per tool", () => { - const env = Constrained.toolCallEnvelope(TOOLS) - expect(Array.isArray(env.oneOf)).toBe(true) - expect(env.oneOf).toHaveLength(2) - const bash = env.oneOf[0] - expect(bash.properties.name.const).toBe("bash") - expect(bash.properties.arguments.required).toEqual(["command"]) - expect(bash.properties.arguments.additionalProperties).toBe(false) - expect(bash.additionalProperties).toBe(false) - }) - - test("no-arg tool constrains arguments to an empty object", () => { - const env = Constrained.toolCallEnvelope(TOOLS) - const noarg = env.oneOf[1] - expect(noarg.properties.name.const).toBe("list_databases") - expect(noarg.properties.arguments.properties).toEqual({}) - expect(noarg.properties.arguments.type).toBe("object") - }) - - test("throws on empty tool set", () => { - expect(() => Constrained.toolCallEnvelope([])).toThrow() - }) -}) - -describe("Constrained.guidedOptions", () => { - test("response_format json_schema (default)", () => { - const o = Constrained.guidedOptions(TOOLS) - expect(o.response_format.type).toBe("json_schema") - expect(o.response_format.json_schema.name).toBe("tool_call") - expect(o.response_format.json_schema.strict).toBe(true) - expect(o.response_format.json_schema.schema.oneOf).toHaveLength(2) - }) - - test("guided_json fallback for older vLLM", () => { - const o = Constrained.guidedOptions(TOOLS, "guided_json") - expect(o.guided_json.oneOf).toHaveLength(2) - }) -}) - -describe("Constrained gating", () => { - test("enabled() reads the env flag", () => { - const prev = process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] - process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = "1" - expect(Constrained.enabled()).toBe(true) - delete process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] - expect(Constrained.enabled()).toBe(false) - if (prev !== undefined) process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = prev - }) - - test("isLocalProvider: openai-compatible + known local ids, not hosted", () => { - expect(Constrained.isLocalProvider("@ai-sdk/openai-compatible", "vllm")).toBe(true) - expect(Constrained.isLocalProvider(undefined, "lmstudio")).toBe(true) - expect(Constrained.isLocalProvider("@ai-sdk/anthropic", "anthropic")).toBe(false) - expect(Constrained.isLocalProvider("@ai-sdk/openai", "openai")).toBe(false) - }) -})