From 278a3fa85d3d3ee20229a0dd1b15d807f3396c10 Mon Sep 17 00:00:00 2001
From: anandgupta42 <anand@altimate.ai>
Date: Sun, 31 May 2026 19:29:24 -0700
Subject: [PATCH 1/3] feat: inference-time tool-call reliability (constrained
 decoding, retrieval, critic gate)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three flag-gated, independent, default-off reliability features for the agent loop:

- `provider/constrained.ts` — grammar/JSON-Schema constrained decoding so a local
  model (vLLM/LM Studio/llama.cpp) is forced to emit a parseable, schema-correct
  tool call. Pure (schema in → payload out).
- `tool/retrieval.ts` — per-turn tool subset (always-on core + lexical top-k), never
  dropping a tool referenced mid-trajectory; trims the ~78-tool context flood. v1
  lexical, dependency-free, deterministic.
- `tool/critic.ts` — pre-execution gate for side-effecting tools via a pluggable
  `Verifier`; default allows everything (ungated), a real verifier is injected.

Wired flag-gated into `session/llm.ts` (markers; default off → upstream path unchanged).
18 unit tests; typecheck clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/opencode/src/provider/constrained.ts | 96 +++++++++++++++++++
 packages/opencode/src/session/llm.ts          | 55 +++++++++++
 packages/opencode/src/tool/critic.ts          | 69 +++++++++++++
 packages/opencode/src/tool/retrieval.ts       | 77 +++++++++++++++
 .../test/provider/constrained.test.ts         | 73 ++++++++++++++
 packages/opencode/test/tool/critic.test.ts    | 40 ++++++++
 packages/opencode/test/tool/retrieval.test.ts | 41 ++++++++
 7 files changed, 451 insertions(+)
 create mode 100644 packages/opencode/src/provider/constrained.ts
 create mode 100644 packages/opencode/src/tool/critic.ts
 create mode 100644 packages/opencode/src/tool/retrieval.ts
 create mode 100644 packages/opencode/test/provider/constrained.test.ts
 create mode 100644 packages/opencode/test/tool/critic.test.ts
 create mode 100644 packages/opencode/test/tool/retrieval.test.ts

diff --git a/packages/opencode/src/provider/constrained.ts b/packages/opencode/src/provider/constrained.ts
new file mode 100644
index 000000000..dfced7a9e
--- /dev/null
+++ b/packages/opencode/src/provider/constrained.ts
@@ -0,0 +1,96 @@
+/**
+ * Constrained (grammar) decoding for tool calls.
+ *
+ * Builds a JSON-Schema "envelope" describing a VALID tool call for the current
+ * resolved tool set, so a local model (vLLM / LM Studio / llama.cpp) can be
+ * forced — at the token level — to emit a parseable, schema-correct call. A
+ * deterministic fix for the "model emits unparseable tool calls" failure;
+ * base-model-agnostic.
+ *
+ * This module is pure (JSON-Schema in → payload out); wiring into the request
+ * happens in ProviderTransform.providerOptions (a separate, marker-wrapped edit).
+ */
+
+export namespace Constrained {
+  /** Minimal tool shape this module needs (parameters already converted to JSON Schema). */
+  export interface ToolSchema {
+    name: string
+    description?: string
+    parameters: Record<string, any> // JSON Schema for the tool's arguments
+  }
+
+  /** Only constrain when explicitly enabled AND the provider is a self-served / local
+   *  OpenAI-compatible endpoint. Never constrain hosted models (Anthropic/OpenAI):
+   *  we don't control their decoding and their tool-calls are already valid. */
+  export function enabled(): boolean {
+    return process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] === "1"
+  }
+
+  /** True for providers where WE control the inference engine and can pass guided
+   *  decoding (vLLM, LM Studio, Ollama, llama.cpp via openai-compatible). */
+  export function isLocalProvider(npm?: string, providerID?: string): boolean {
+    if (npm === "@ai-sdk/openai-compatible") return true
+    const local = new Set(["vllm", "lmstudio", "llamacpp", "ollama", "local"])
+    return !!providerID && local.has(providerID)
+  }
+
+  /** A normalized arguments schema: ensure it's an object schema (empty-arg tools
+   *  constrain to `{}`), and forbid extra properties so the grammar is tight. */
+  function argsSchema(t: ToolSchema): Record<string, any> {
+    const p = t.parameters && typeof t.parameters === "object" ? t.parameters : {}
+    const props = (p as any).properties ?? {}
+    return {
+      type: "object",
+      properties: props,
+      required: (p as any).required ?? [],
+      additionalProperties: (p as any).additionalProperties ?? false,
+    }
+  }
+
+  /**
+   * Discriminated-union envelope: a single tool call must be exactly one of the
+   * tools, with `name` pinned to that tool and `arguments` matching its schema.
+   * vLLM/XGrammar guided_json and llama.cpp GBNF both support oneOf.
+   */
+  export function toolCallEnvelope(tools: ToolSchema[]): Record<string, any> {
+    if (!tools.length) throw new Error("constrained: no tools to build envelope from")
+    return {
+      $schema: "http://json-schema.org/draft-07/schema#",
+      title: "tool_call",
+      oneOf: tools.map((t) => ({
+        type: "object",
+        properties: {
+          name: { const: t.name },
+          arguments: argsSchema(t),
+        },
+        required: ["name", "arguments"],
+        additionalProperties: false,
+      })),
+    }
+  }
+
+  /**
+   * Provider options payload to attach (under the provider's SDK key) for guided
+   * decoding. Two styles cover the engines we serve:
+   *  - "response_format": OpenAI-style json_schema (vLLM ≥0.6, LM Studio) — preferred.
+   *  - "guided_json":     vLLM extra_body fallback for older servers.
+   * Caller picks based on the endpoint; default response_format.
+   */
+  export function guidedOptions(
+    tools: ToolSchema[],
+    style: "response_format" | "guided_json" = "response_format",
+  ): Record<string, any> {
+    const schema = toolCallEnvelope(tools)
+    if (style === "guided_json") {
+      // vLLM reads this from extra_body; the openai-compatible SDK forwards
+      // unknown providerOptions as request body fields.
+      return { guided_json: schema }
+    }
+    return {
+      response_format: {
+        type: "json_schema",
+        json_schema: { name: "tool_call", schema, strict: true },
+      },
+    }
+  }
+}
diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
index b06112f2c..fe0e04cc8 100644
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -13,6 +13,12 @@ import {
 } from "ai"
 import { mergeDeep, pipe } from "remeda"
 import { ProviderTransform } from "@/provider/transform"
+// altimate_change start — constrained tool-call decoding
+import { Constrained } from "@/provider/constrained"
+// altimate_change end
+// altimate_change start — tool retrieval
+import { Retrieval } from "@/tool/retrieval"
+// altimate_change end
 import { Config } from "@/config/config"
 import { Instance } from "@/project/instance"
 import type { Agent } from "@/agent/agent"
@@ -179,6 +185,55 @@ export namespace LLM {
     }
     // altimate_change end
 
+    // altimate_change start — tool retrieval
+    // Expose only the relevant top-k tools this turn (flag-gated). Keeps the
+    // always-on core + any in-flight (referenced) tools; no-op for small sets.
+    if (Retrieval.enabled()) {
+      const lastUser = [...input.messages].reverse().find((m) => m.role === "user")
+      const c = lastUser?.content as any
+      const query =
+        typeof c === "string"
+          ? c
+          : Array.isArray(c)
+            ? c.map((p: any) => (typeof p === "string" ? p : (p?.text ?? ""))).join(" ")
+            : ""
+      const list = Object.entries(tools).map(([name, t]) => ({ name, description: (t as any)?.description }))
+      const keep = Retrieval.select(query, list, { keep: referencedTools })
+      for (const name of Object.keys(tools)) {
+        if (name !== "invalid" && !keep.has(name)) delete tools[name]
+      }
+    }
+    // altimate_change end
+
+    // altimate_change start — constrained tool-call decoding for local providers
+    // Force a tool-call grammar ONLY when a call is mandatory (toolChoice "required").
+    // A blanket guided constraint would forbid normal text turns; for "auto" mode,
+    // valid tool calls come from the vLLM serving config (--enable-auto-tool-choice
+    // --tool-call-parser), NOT a client constraint. Hosted models are excluded.
+    //
+    if (
+      Constrained.enabled() &&
+      Constrained.isLocalProvider(input.model.api.npm, input.model.providerID) &&
+      input.toolChoice === "required"
+    ) {
+      const toolSchemas = Object.entries(tools)
+        .filter(([name]) => name !== "invalid")
+        .map(([name, t]) => ({
+          name,
+          parameters:
+            ((t as any)?.inputSchema?.jsonSchema as Record<string, any>) ?? {
+              type: "object",
+              properties: {},
+            },
+        }))
+      if (toolSchemas.length) {
+        // Routed under the provider SDK key by providerOptions(), then spread into
+        // the request body by @ai-sdk/openai-compatible -> vLLM reads guided_json.
+        params.options = { ...params.options, ...Constrained.guidedOptions(toolSchemas, "guided_json") }
+      }
+    }
+    // altimate_change end
+
     return streamText({
       onError(error) {
         l.error("stream error", {
diff --git a/packages/opencode/src/tool/critic.ts b/packages/opencode/src/tool/critic.ts
new file mode 100644
index 000000000..69e58ec01
--- /dev/null
+++ b/packages/opencode/src/tool/critic.ts
@@ -0,0 +1,69 @@
+/**
+ * Pre-execution critic gate.
+ *
+ * Before a SIDE-EFFECTING tool runs (bash, write, edit, sql_execute, dbt_*), a
+ * verifier checks the proposed args; on hard failure the call is denied and the
+ * reason is fed back so the model can retry — instead of executing a bad action.
+ *
+ * The judgment plugs in via the `Verifier` interface; the default verifier ALLOWS
+ * everything (ungated) and a real verifier is injected by the caller.
+ *
+ * Pure + testable. Wiring point: session/prompt.ts execute wrapper, just before
+ * `item.execute(args, ctx)`.
+ */
+
+export namespace Critic {
+  /** Side-effecting tools worth gating. Reads (glob/grep/read) are never gated. */
+  export const DEFAULT_GATED = ["bash", "write", "edit", "sql_execute", "dbt_run", "patch"]
+
+  export interface Verdict {
+    ok: boolean
+    reason?: string
+  }
+
+  /** The judgment interface. Default impl allows all (open). Product plugs altimate-core. */
+  export interface Verifier {
+    verify(toolName: string, args: Record<string, any>): Verdict | Promise<Verdict>
+  }
+
+  export const ALLOW_ALL: Verifier = { verify: () => ({ ok: true }) }
+
+  export function enabled(): boolean {
+    return process.env["ALTIMATE_CRITIC_GATE"] === "1"
+  }
+
+  export function isGated(toolName: string, gated: string[] = DEFAULT_GATED): boolean {
+    return gated.includes(toolName)
+  }
+
+  export interface GateResult {
+    allow: boolean
+    /** when blocked, the message to feed back to the model in place of execution. */
+    feedback?: string
+  }
+
+  /**
+   * Decide whether a proposed tool call may execute. Non-gated tools always pass.
+   * Gated tools are checked by the verifier; a not-ok verdict blocks with feedback.
+   * NEVER throws — a critic failure must not break the agent (fail-open on error).
+   */
+  export async function gate(
+    toolName: string,
+    args: Record<string, any>,
+    verifier: Verifier = ALLOW_ALL,
+    gated: string[] = DEFAULT_GATED,
+  ): Promise<GateResult> {
+    if (!enabled() || !isGated(toolName, gated)) return { allow: true }
+    try {
+      const v = await verifier.verify(toolName, args)
+      if (v.ok) return { allow: true }
+      return {
+        allow: false,
+        feedback: `Blocked by altimate verifier before execution: ${v.reason ?? "failed validation"}. Fix and retry.`,
+      }
+    } catch {
+      // Fail-open: observability/governance must never break core functionality.
+      return { allow: true }
+    }
+  }
+}
diff --git a/packages/opencode/src/tool/retrieval.ts b/packages/opencode/src/tool/retrieval.ts
new file mode 100644
index 000000000..d02a3d372
--- /dev/null
+++ b/packages/opencode/src/tool/retrieval.ts
@@ -0,0 +1,77 @@
+/**
+ * Tool retrieval — pick a relevant subset of tools per turn.
+ *
+ * With ~78 tools, sending the full set every turn floods context and adds
+ * distractors that hurt tool SELECTION. This picks a relevant subset per turn:
+ * a fixed always-on CORE + lexically-ranked top-k of the rest, and NEVER drops a
+ * tool that's mid-trajectory (referenced by an in-flight tool call) — dropping
+ * those would corrupt the conversation.
+ *
+ * v1 is lexical (dependency-free, deterministic, testable). An embedding +
+ * cross-encoder rerank pass is a later enhancement; the `select` signature is
+ * stable so wiring doesn't change.
+ */
+
+export namespace Retrieval {
+  /** Always-available agent essentials — never retrieved out. */
+  export const CORE = [
+    "bash", "read", "write", "edit", "glob", "grep", "ls",
+    "task", "todowrite", "skill",
+  ]
+
+  export interface Tool {
+    name: string
+    description?: string
+  }
+
+  export interface Options {
+    /** target number of tools to expose (incl. core). */
+    topk?: number
+    /** names that MUST stay (e.g. tools referenced by in-flight tool calls). */
+    keep?: Iterable<string>
+    /** only retrieve when the tool count exceeds this (no-op for small sets). */
+    minToolsToRetrieve?: number
+  }
+
+  export function enabled(): boolean {
+    return process.env["ALTIMATE_TOOL_RETRIEVAL"] === "1"
+  }
+
+  function score(query: string, t: Tool): number {
+    const words = new Set(query.toLowerCase().match(/[a-z_]+/g) ?? [])
+    const hay = (t.name + " " + (t.description ?? "")).toLowerCase()
+    let s = 0
+    for (const w of words) if (w.length > 3 && hay.includes(w)) s += 1
+    // small boost for a direct name mention
+    if (words.has(t.name.toLowerCase())) s += 3
+    return s
+  }
+
+  /**
+   * Return the SUBSET of tool names to expose this turn. Caller deletes the rest.
+   * Deterministic: core + forced-keep first, then highest-scoring others up to topk
+   * (ties broken by original order for stability).
+   */
+  export function select(query: string, tools: Tool[], opts: Options = {}): Set<string> {
+    const topk = opts.topk ?? 12
+    const minToRetrieve = opts.minToolsToRetrieve ?? topk
+    const all = new Set(tools.map((t) => t.name))
+    // No-op for small tool sets — nothing to gain.
+    if (tools.length <= minToRetrieve) return all
+
+    const keep = new Set<string>()
+    for (const n of opts.keep ?? []) if (all.has(n)) keep.add(n)
+    for (const n of CORE) if (all.has(n)) keep.add(n)
+
+    const rest = tools.filter((t) => !keep.has(t.name))
+    const ranked = rest
+      .map((t, i) => ({ name: t.name, s: score(query, t), i }))
+      .sort((a, b) => b.s - a.s || a.i - b.i)
+
+    for (const r of ranked) {
+      if (keep.size >= topk) break
+      keep.add(r.name)
+    }
+    return keep
+  }
+}
diff --git a/packages/opencode/test/provider/constrained.test.ts b/packages/opencode/test/provider/constrained.test.ts
new file mode 100644
index 000000000..1f053af01
--- /dev/null
+++ b/packages/opencode/test/provider/constrained.test.ts
@@ -0,0 +1,73 @@
+import { describe, expect, test } from "bun:test"
+import { Constrained } from "../../src/provider/constrained"
+
+const TOOLS: Constrained.ToolSchema[] = [
+  {
+    name: "bash",
+    description: "run a shell command",
+    parameters: {
+      type: "object",
+      properties: { command: { type: "string" }, timeout: { type: "number" } },
+      required: ["command"],
+    },
+  },
+  { name: "list_databases", description: "no-arg tool", parameters: { type: "object", properties: {} } },
+]
+
+describe("Constrained.toolCallEnvelope", () => {
+  test("builds a oneOf discriminated union, name pinned per tool", () => {
+    const env = Constrained.toolCallEnvelope(TOOLS)
+    expect(Array.isArray(env.oneOf)).toBe(true)
+    expect(env.oneOf).toHaveLength(2)
+    const bash = env.oneOf[0]
+    expect(bash.properties.name.const).toBe("bash")
+    expect(bash.properties.arguments.required).toEqual(["command"])
+    expect(bash.properties.arguments.additionalProperties).toBe(false)
+    expect(bash.additionalProperties).toBe(false)
+  })
+
+  test("no-arg tool constrains arguments to an empty object", () => {
+    const env = Constrained.toolCallEnvelope(TOOLS)
+    const noarg = env.oneOf[1]
+    expect(noarg.properties.name.const).toBe("list_databases")
+    expect(noarg.properties.arguments.properties).toEqual({})
+    expect(noarg.properties.arguments.type).toBe("object")
+  })
+
+  test("throws on empty tool set", () => {
+    expect(() => Constrained.toolCallEnvelope([])).toThrow()
+  })
+})
+
+describe("Constrained.guidedOptions", () => {
+  test("response_format json_schema (default)", () => {
+    const o = Constrained.guidedOptions(TOOLS)
+    expect(o.response_format.type).toBe("json_schema")
+    expect(o.response_format.json_schema.name).toBe("tool_call")
+    expect(o.response_format.json_schema.strict).toBe(true)
+    expect(o.response_format.json_schema.schema.oneOf).toHaveLength(2)
+  })
+
+  test("guided_json fallback for older vLLM", () => {
+    const o = Constrained.guidedOptions(TOOLS, "guided_json")
+    expect(o.guided_json.oneOf).toHaveLength(2)
+  })
+})
+
+describe("Constrained gating", () => {
+  test("enabled() reads the env flag", () => {
+    const prev = process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"]
+    process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = "1"
+    expect(Constrained.enabled()).toBe(true)
+    delete process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"]
+    expect(Constrained.enabled()).toBe(false)
+    if (prev !== undefined) process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = prev
+  })
+
+  test("isLocalProvider: openai-compatible + known local ids, not hosted", () => {
+    expect(Constrained.isLocalProvider("@ai-sdk/openai-compatible", "vllm")).toBe(true)
+    expect(Constrained.isLocalProvider(undefined, "lmstudio")).toBe(true)
+    expect(Constrained.isLocalProvider("@ai-sdk/anthropic", "anthropic")).toBe(false)
+    expect(Constrained.isLocalProvider("@ai-sdk/openai", "openai")).toBe(false)
+  })
+})
diff --git a/packages/opencode/test/tool/critic.test.ts b/packages/opencode/test/tool/critic.test.ts
new file mode 100644
index 000000000..9f611ed93
--- /dev/null
+++ b/packages/opencode/test/tool/critic.test.ts
@@ -0,0 +1,40 @@
+import { afterEach, describe, expect, test } from "bun:test"
+import { Critic } from "../../src/tool/critic"
+
+afterEach(() => delete process.env["ALTIMATE_CRITIC_GATE"])
+
+describe("Critic.gate", () => {
+  test("disabled by default -> allow even a gated+denying call", async () => {
+    const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "x" }) }
+    expect((await Critic.gate("bash", {}, deny)).allow).toBe(true)
+  })
+
+  test("enabled: non-gated tool always allowed", async () => {
+    process.env["ALTIMATE_CRITIC_GATE"] = "1"
+    expect((await Critic.gate("read", {}, Critic.ALLOW_ALL)).allow).toBe(true)
+  })
+
+  test("enabled: gated + allow-all verifier -> allow", async () => {
+    process.env["ALTIMATE_CRITIC_GATE"] = "1"
+    expect((await Critic.gate("bash", { command: "ls" }, Critic.ALLOW_ALL)).allow).toBe(true)
+  })
+
+  test("enabled: gated + failing verifier -> block with feedback", async () => {
+    process.env["ALTIMATE_CRITIC_GATE"] = "1"
+    const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "unsafe SQL" }) }
+    const g = await Critic.gate("sql_execute", { q: "drop" }, deny)
+    expect(g.allow).toBe(false)
+    expect(g.feedback).toContain("unsafe SQL")
+  })
+
+  test("enabled: verifier throws -> fail-open (allow)", async () => {
+    process.env["ALTIMATE_CRITIC_GATE"] = "1"
+    const boom: Critic.Verifier = { verify: () => { throw new Error("down") } }
+    expect((await Critic.gate("bash", {}, boom)).allow).toBe(true)
+  })
+
+  test("isGated: side-effecting yes, reads no", () => {
+    expect(Critic.isGated("bash")).toBe(true)
+    expect(Critic.isGated("read")).toBe(false)
+  })
+})
diff --git a/packages/opencode/test/tool/retrieval.test.ts b/packages/opencode/test/tool/retrieval.test.ts
new file mode 100644
index 000000000..cb1adb885
--- /dev/null
+++ b/packages/opencode/test/tool/retrieval.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, test } from "bun:test"
+import { Retrieval } from "../../src/tool/retrieval"
+
+const TOOLS = [
+  ...Retrieval.CORE.map((name) => ({ name })),
+  ...Array.from({ length: 20 }, (_, i) => ({ name: `warehouse_op${i}`, description: `warehouse operation ${i}` })),
+  { name: "dbt_run", description: "run dbt models build" },
+  { name: "sql_execute", description: "execute SQL query against warehouse" },
+]
+
+describe("Retrieval.select", () => {
+  test("always keeps core tools", () => {
+    const sel = Retrieval.select("run the dbt models", TOOLS, { topk: 12 })
+    expect(sel.has("bash")).toBe(true)
+    expect(sel.has("read")).toBe(true)
+  })
+
+  test("picks lexically relevant tools", () => {
+    expect(Retrieval.select("run the dbt models and build", TOOLS, { topk: 12 }).has("dbt_run")).toBe(true)
+    expect(Retrieval.select("execute a SQL query on the warehouse", TOOLS, { topk: 12 }).has("sql_execute")).toBe(true)
+  })
+
+  test("never drops in-flight (keep) tools, even if irrelevant", () => {
+    const sel = Retrieval.select("hello", TOOLS, { topk: 12, keep: ["warehouse_op19"] })
+    expect(sel.has("warehouse_op19")).toBe(true)
+  })
+
+  test("no-op for small tool sets (returns all)", () => {
+    const small = [{ name: "a" }, { name: "b" }]
+    expect(Retrieval.select("x", small, { topk: 12 }).size).toBe(2)
+  })
+
+  test("enabled() reads the env flag", () => {
+    const prev = process.env["ALTIMATE_TOOL_RETRIEVAL"]
+    process.env["ALTIMATE_TOOL_RETRIEVAL"] = "1"
+    expect(Retrieval.enabled()).toBe(true)
+    delete process.env["ALTIMATE_TOOL_RETRIEVAL"]
+    expect(Retrieval.enabled()).toBe(false)
+    if (prev !== undefined) process.env["ALTIMATE_TOOL_RETRIEVAL"] = prev
+  })
+})

From e6b70c6cb024da3e3c5be926038774b2c34a5284 Mon Sep 17 00:00:00 2001
From: anandgupta42 <anand@altimate.ai>
Date: Sun, 31 May 2026 20:38:25 -0700
Subject: [PATCH 2/3] refactor: drop unwired critic gate from inference-stack
 PR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `ALTIMATE_CRITIC_GATE` flag was a no-op — `tool/critic.ts` was never
imported into the execute path, so enabling the flag did nothing. Removing
it from this PR so every shipped flag is actually wired:

- `ALTIMATE_TOOL_RETRIEVAL` — wired in `session/llm.ts`, validated (-50% input tokens at equal resolve)
- `ALTIMATE_CONSTRAINED_TOOLCALLS` — wired in `session/llm.ts` (local providers)

The critic gate (pre-execution `Verifier` for side-effecting tools) moves to
a follow-up that wires it into the `session/prompt.ts` execute wrapper with an
integration test. Code preserved on `feat/critic-gate`.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/opencode/src/tool/critic.ts       | 69 ----------------------
 packages/opencode/test/tool/critic.test.ts | 40 -------------
 2 files changed, 109 deletions(-)
 delete mode 100644 packages/opencode/src/tool/critic.ts
 delete mode 100644 packages/opencode/test/tool/critic.test.ts

diff --git a/packages/opencode/src/tool/critic.ts b/packages/opencode/src/tool/critic.ts
deleted file mode 100644
index 69e58ec01..000000000
--- a/packages/opencode/src/tool/critic.ts
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Pre-execution critic gate.
- *
- * Before a SIDE-EFFECTING tool runs (bash, write, edit, sql_execute, dbt_*), a
- * verifier checks the proposed args; on hard failure the call is denied and the
- * reason is fed back so the model can retry — instead of executing a bad action.
- *
- * The judgment plugs in via the `Verifier` interface; the default verifier ALLOWS
- * everything (ungated) and a real verifier is injected by the caller.
- *
- * Pure + testable. Wiring point: session/prompt.ts execute wrapper, just before
- * `item.execute(args, ctx)`.
- */
-
-export namespace Critic {
-  /** Side-effecting tools worth gating. Reads (glob/grep/read) are never gated. */
-  export const DEFAULT_GATED = ["bash", "write", "edit", "sql_execute", "dbt_run", "patch"]
-
-  export interface Verdict {
-    ok: boolean
-    reason?: string
-  }
-
-  /** The judgment interface. Default impl allows all (open). Product plugs altimate-core. */
-  export interface Verifier {
-    verify(toolName: string, args: Record<string, any>): Verdict | Promise<Verdict>
-  }
-
-  export const ALLOW_ALL: Verifier = { verify: () => ({ ok: true }) }
-
-  export function enabled(): boolean {
-    return process.env["ALTIMATE_CRITIC_GATE"] === "1"
-  }
-
-  export function isGated(toolName: string, gated: string[] = DEFAULT_GATED): boolean {
-    return gated.includes(toolName)
-  }
-
-  export interface GateResult {
-    allow: boolean
-    /** when blocked, the message to feed back to the model in place of execution. */
-    feedback?: string
-  }
-
-  /**
-   * Decide whether a proposed tool call may execute. Non-gated tools always pass.
-   * Gated tools are checked by the verifier; a not-ok verdict blocks with feedback.
-   * NEVER throws — a critic failure must not break the agent (fail-open on error).
-   */
-  export async function gate(
-    toolName: string,
-    args: Record<string, any>,
-    verifier: Verifier = ALLOW_ALL,
-    gated: string[] = DEFAULT_GATED,
-  ): Promise<GateResult> {
-    if (!enabled() || !isGated(toolName, gated)) return { allow: true }
-    try {
-      const v = await verifier.verify(toolName, args)
-      if (v.ok) return { allow: true }
-      return {
-        allow: false,
-        feedback: `Blocked by altimate verifier before execution: ${v.reason ?? "failed validation"}. Fix and retry.`,
-      }
-    } catch {
-      // Fail-open: observability/governance must never break core functionality.
-      return { allow: true }
-    }
-  }
-}
diff --git a/packages/opencode/test/tool/critic.test.ts b/packages/opencode/test/tool/critic.test.ts
deleted file mode 100644
index 9f611ed93..000000000
--- a/packages/opencode/test/tool/critic.test.ts
+++ /dev/null
@@ -1,40 +0,0 @@
-import { afterEach, describe, expect, test } from "bun:test"
-import { Critic } from "../../src/tool/critic"
-
-afterEach(() => delete process.env["ALTIMATE_CRITIC_GATE"])
-
-describe("Critic.gate", () => {
-  test("disabled by default -> allow even a gated+denying call", async () => {
-    const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "x" }) }
-    expect((await Critic.gate("bash", {}, deny)).allow).toBe(true)
-  })
-
-  test("enabled: non-gated tool always allowed", async () => {
-    process.env["ALTIMATE_CRITIC_GATE"] = "1"
-    expect((await Critic.gate("read", {}, Critic.ALLOW_ALL)).allow).toBe(true)
-  })
-
-  test("enabled: gated + allow-all verifier -> allow", async () => {
-    process.env["ALTIMATE_CRITIC_GATE"] = "1"
-    expect((await Critic.gate("bash", { command: "ls" }, Critic.ALLOW_ALL)).allow).toBe(true)
-  })
-
-  test("enabled: gated + failing verifier -> block with feedback", async () => {
-    process.env["ALTIMATE_CRITIC_GATE"] = "1"
-    const deny: Critic.Verifier = { verify: () => ({ ok: false, reason: "unsafe SQL" }) }
-    const g = await Critic.gate("sql_execute", { q: "drop" }, deny)
-    expect(g.allow).toBe(false)
-    expect(g.feedback).toContain("unsafe SQL")
-  })
-
-  test("enabled: verifier throws -> fail-open (allow)", async () => {
-    process.env["ALTIMATE_CRITIC_GATE"] = "1"
-    const boom: Critic.Verifier = { verify: () => { throw new Error("down") } }
-    expect((await Critic.gate("bash", {}, boom)).allow).toBe(true)
-  })
-
-  test("isGated: side-effecting yes, reads no", () => {
-    expect(Critic.isGated("bash")).toBe(true)
-    expect(Critic.isGated("read")).toBe(false)
-  })
-})

From 38f873742c117bd9b0a3115e855b75856cd2c98f Mon Sep 17 00:00:00 2001
From: anandgupta42 <anand@altimate.ai>
Date: Mon, 1 Jun 2026 10:18:53 -0700
Subject: [PATCH 3/3] refactor: split constrained decoding into a follow-up

Constrained tool-call decoding is local-providers-only and has no validation
run behind it yet (the A/B that justifies this PR measured tool retrieval).
Removing it so the validated retrieval lever can land clean; constrained moves
to its own branch/PR pending a local vLLM guided-decoding run.

- remove `provider/constrained.ts` + its test
- remove the constrained wiring + import from `session/llm.ts` (retrieval stays)

#858 is now retrieval-only.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/opencode/src/provider/constrained.ts | 96 -------------------
 packages/opencode/src/session/llm.ts          | 32 -------
 .../test/provider/constrained.test.ts         | 73 --------------
 3 files changed, 201 deletions(-)
 delete mode 100644 packages/opencode/src/provider/constrained.ts
 delete mode 100644 packages/opencode/test/provider/constrained.test.ts

diff --git a/packages/opencode/src/provider/constrained.ts b/packages/opencode/src/provider/constrained.ts
deleted file mode 100644
index dfced7a9e..000000000
--- a/packages/opencode/src/provider/constrained.ts
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Constrained (grammar) decoding for tool calls.
- *
- * Builds a JSON-Schema "envelope" describing a VALID tool call for the current
- * resolved tool set, so a local model (vLLM / LM Studio / llama.cpp) can be
- * forced — at the token level — to emit a parseable, schema-correct call. A
- * deterministic fix for the "model emits unparseable tool calls" failure;
- * base-model-agnostic.
- *
- * This module is pure (JSON-Schema in → payload out); wiring into the request
- * happens in ProviderTransform.providerOptions (a separate, marker-wrapped edit).
- */
-
-export namespace Constrained {
-  /** Minimal tool shape this module needs (parameters already converted to JSON Schema). */
-  export interface ToolSchema {
-    name: string
-    description?: string
-    parameters: Record<string, any> // JSON Schema for the tool's arguments
-  }
-
-  /** Only constrain when explicitly enabled AND the provider is a self-served / local
-   *  OpenAI-compatible endpoint. Never constrain hosted models (Anthropic/OpenAI):
-   *  we don't control their decoding and their tool-calls are already valid. */
-  export function enabled(): boolean {
-    return process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] === "1"
-  }
-
-  /** True for providers where WE control the inference engine and can pass guided
-   *  decoding (vLLM, LM Studio, Ollama, llama.cpp via openai-compatible). */
-  export function isLocalProvider(npm?: string, providerID?: string): boolean {
-    if (npm === "@ai-sdk/openai-compatible") return true
-    const local = new Set(["vllm", "lmstudio", "llamacpp", "ollama", "local"])
-    return !!providerID && local.has(providerID)
-  }
-
-  /** A normalized arguments schema: ensure it's an object schema (empty-arg tools
-   *  constrain to `{}`), and forbid extra properties so the grammar is tight. */
-  function argsSchema(t: ToolSchema): Record<string, any> {
-    const p = t.parameters && typeof t.parameters === "object" ? t.parameters : {}
-    const props = (p as any).properties ?? {}
-    return {
-      type: "object",
-      properties: props,
-      required: (p as any).required ?? [],
-      additionalProperties: (p as any).additionalProperties ?? false,
-    }
-  }
-
-  /**
-   * Discriminated-union envelope: a single tool call must be exactly one of the
-   * tools, with `name` pinned to that tool and `arguments` matching its schema.
-   * vLLM/XGrammar guided_json and llama.cpp GBNF both support oneOf.
-   */
-  export function toolCallEnvelope(tools: ToolSchema[]): Record<string, any> {
-    if (!tools.length) throw new Error("constrained: no tools to build envelope from")
-    return {
-      $schema: "http://json-schema.org/draft-07/schema#",
-      title: "tool_call",
-      oneOf: tools.map((t) => ({
-        type: "object",
-        properties: {
-          name: { const: t.name },
-          arguments: argsSchema(t),
-        },
-        required: ["name", "arguments"],
-        additionalProperties: false,
-      })),
-    }
-  }
-
-  /**
-   * Provider options payload to attach (under the provider's SDK key) for guided
-   * decoding. Two styles cover the engines we serve:
-   *  - "response_format": OpenAI-style json_schema (vLLM ≥0.6, LM Studio) — preferred.
-   *  - "guided_json":     vLLM extra_body fallback for older servers.
-   * Caller picks based on the endpoint; default response_format.
-   */
-  export function guidedOptions(
-    tools: ToolSchema[],
-    style: "response_format" | "guided_json" = "response_format",
-  ): Record<string, any> {
-    const schema = toolCallEnvelope(tools)
-    if (style === "guided_json") {
-      // vLLM reads this from extra_body; the openai-compatible SDK forwards
-      // unknown providerOptions as request body fields.
-      return { guided_json: schema }
-    }
-    return {
-      response_format: {
-        type: "json_schema",
-        json_schema: { name: "tool_call", schema, strict: true },
-      },
-    }
-  }
-}
diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
index fe0e04cc8..0e7ec2589 100644
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -13,9 +13,6 @@ import {
 } from "ai"
 import { mergeDeep, pipe } from "remeda"
 import { ProviderTransform } from "@/provider/transform"
-// altimate_change start — constrained tool-call decoding
-import { Constrained } from "@/provider/constrained"
-// altimate_change end
 // altimate_change start — tool retrieval
 import { Retrieval } from "@/tool/retrieval"
 // altimate_change end
@@ -205,35 +202,6 @@ export namespace LLM {
     }
     // altimate_change end
 
-    // altimate_change start — constrained tool-call decoding for local providers
-    // Force a tool-call grammar ONLY when a call is mandatory (toolChoice "required").
-    // A blanket guided constraint would forbid normal text turns; for "auto" mode,
-    // valid tool calls come from the vLLM serving config (--enable-auto-tool-choice
-    // --tool-call-parser), NOT a client constraint. Hosted models are excluded.
-    //
-    if (
-      Constrained.enabled() &&
-      Constrained.isLocalProvider(input.model.api.npm, input.model.providerID) &&
-      input.toolChoice === "required"
-    ) {
-      const toolSchemas = Object.entries(tools)
-        .filter(([name]) => name !== "invalid")
-        .map(([name, t]) => ({
-          name,
-          parameters:
-            ((t as any)?.inputSchema?.jsonSchema as Record<string, any>) ?? {
-              type: "object",
-              properties: {},
-            },
-        }))
-      if (toolSchemas.length) {
-        // Routed under the provider SDK key by providerOptions(), then spread into
-        // the request body by @ai-sdk/openai-compatible -> vLLM reads guided_json.
-        params.options = { ...params.options, ...Constrained.guidedOptions(toolSchemas, "guided_json") }
-      }
-    }
-    // altimate_change end
-
     return streamText({
       onError(error) {
         l.error("stream error", {
diff --git a/packages/opencode/test/provider/constrained.test.ts b/packages/opencode/test/provider/constrained.test.ts
deleted file mode 100644
index 1f053af01..000000000
--- a/packages/opencode/test/provider/constrained.test.ts
+++ /dev/null
@@ -1,73 +0,0 @@
-import { describe, expect, test } from "bun:test"
-import { Constrained } from "../../src/provider/constrained"
-
-const TOOLS: Constrained.ToolSchema[] = [
-  {
-    name: "bash",
-    description: "run a shell command",
-    parameters: {
-      type: "object",
-      properties: { command: { type: "string" }, timeout: { type: "number" } },
-      required: ["command"],
-    },
-  },
-  { name: "list_databases", description: "no-arg tool", parameters: { type: "object", properties: {} } },
-]
-
-describe("Constrained.toolCallEnvelope", () => {
-  test("builds a oneOf discriminated union, name pinned per tool", () => {
-    const env = Constrained.toolCallEnvelope(TOOLS)
-    expect(Array.isArray(env.oneOf)).toBe(true)
-    expect(env.oneOf).toHaveLength(2)
-    const bash = env.oneOf[0]
-    expect(bash.properties.name.const).toBe("bash")
-    expect(bash.properties.arguments.required).toEqual(["command"])
-    expect(bash.properties.arguments.additionalProperties).toBe(false)
-    expect(bash.additionalProperties).toBe(false)
-  })
-
-  test("no-arg tool constrains arguments to an empty object", () => {
-    const env = Constrained.toolCallEnvelope(TOOLS)
-    const noarg = env.oneOf[1]
-    expect(noarg.properties.name.const).toBe("list_databases")
-    expect(noarg.properties.arguments.properties).toEqual({})
-    expect(noarg.properties.arguments.type).toBe("object")
-  })
-
-  test("throws on empty tool set", () => {
-    expect(() => Constrained.toolCallEnvelope([])).toThrow()
-  })
-})
-
-describe("Constrained.guidedOptions", () => {
-  test("response_format json_schema (default)", () => {
-    const o = Constrained.guidedOptions(TOOLS)
-    expect(o.response_format.type).toBe("json_schema")
-    expect(o.response_format.json_schema.name).toBe("tool_call")
-    expect(o.response_format.json_schema.strict).toBe(true)
-    expect(o.response_format.json_schema.schema.oneOf).toHaveLength(2)
-  })
-
-  test("guided_json fallback for older vLLM", () => {
-    const o = Constrained.guidedOptions(TOOLS, "guided_json")
-    expect(o.guided_json.oneOf).toHaveLength(2)
-  })
-})
-
-describe("Constrained gating", () => {
-  test("enabled() reads the env flag", () => {
-    const prev = process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"]
-    process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = "1"
-    expect(Constrained.enabled()).toBe(true)
-    delete process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"]
-    expect(Constrained.enabled()).toBe(false)
-    if (prev !== undefined) process.env["ALTIMATE_CONSTRAINED_TOOLCALLS"] = prev
-  })
-
-  test("isLocalProvider: openai-compatible + known local ids, not hosted", () => {
-    expect(Constrained.isLocalProvider("@ai-sdk/openai-compatible", "vllm")).toBe(true)
-    expect(Constrained.isLocalProvider(undefined, "lmstudio")).toBe(true)
-    expect(Constrained.isLocalProvider("@ai-sdk/anthropic", "anthropic")).toBe(false)
-    expect(Constrained.isLocalProvider("@ai-sdk/openai", "openai")).toBe(false)
-  })
-})