From 3001a2b7bb9fb344af36d1512bfb9a09115fc45f Mon Sep 17 00:00:00 2001 From: Sebastian Danielsson Date: Mon, 8 Jun 2026 14:29:28 +0200 Subject: [PATCH 1/2] feat(opencode): respect provider/model streaming:false to disable response streaming Some OpenAI-compatible backends don't support streaming or return broken streamed output (e.g. self-hosted vLLM corrupting streamed tool-call args). The existing options.streaming config wasn't consumed, so there was no way to opt out. Honor options.streaming:false (per-model or per-provider) by adding the AI SDK's simulateStreamingMiddleware, which calls doGenerate (stream:false on the wire) and replays the result as a simulated stream, leaving the rest of the pipeline unchanged. Defaults to streaming on. Fixes #785 Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/opencode/src/session/llm.ts | 9 ++- packages/opencode/test/session/llm.test.ts | 69 ++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index cf284ce1ae6e..07ed3ad13dcc 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -4,7 +4,7 @@ import { SessionV1 } from "@opencode-ai/core/v1/session" import { serviceUse } from "@opencode-ai/core/effect/service-use" import { Context, Effect, Layer } from "effect" import * as Stream from "effect/Stream" -import { streamText, wrapLanguageModel, type ModelMessage, type Tool } from "ai" +import { simulateStreamingMiddleware, streamText, wrapLanguageModel, type ModelMessage, type Tool } from "ai" import type { LLMEvent } from "@opencode-ai/llm" import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route" import type { LLMClientService } from "@opencode-ai/llm/route" @@ -271,6 +271,12 @@ const live: Layer.Layer< "llm.provider": input.model.providerID, "llm.model": input.model.id, }) + // Opt out of streaming per-model or per-provider with `options.streaming: false`. + // Some backends corrupt or reject streamed responses (e.g. vLLM's gemma tool-call + // parser duplicates characters in streamed tool args); simulateStreamingMiddleware + // makes the model call doGenerate (stream:false on the wire) and re-emits a + // simulated stream, so the rest of the pipeline is unchanged. Model-level wins. + const disableStreaming = (input.model.options?.["streaming"] ?? item.options?.["streaming"]) === false // Default runtime path: AI SDK owns provider execution and tool dispatch; // LLMAISDK.toLLMEvents below normalizes fullStream parts for the processor. return { @@ -324,6 +330,7 @@ const live: Layer.Layer< return args.params }, }, + ...(disableStreaming ? [simulateStreamingMiddleware()] : []), ], }), experimental_telemetry: { diff --git a/packages/opencode/test/session/llm.test.ts b/packages/opencode/test/session/llm.test.ts index 0c5dadaf17d6..e9e2966526a2 100644 --- a/packages/opencode/test/session/llm.test.ts +++ b/packages/opencode/test/session/llm.test.ts @@ -1929,4 +1929,73 @@ describe("session.llm.stream", () => { }), }, ) + + it.instance( + "disables streaming when options.streaming is false", + () => + Effect.gen(function* () { + const fixture = loadFixture(vivgridFixture.providerID, vivgridFixture.modelID) + // Respond with a non-streaming chat completion. The provider can only parse + // this if it issued a doGenerate (stream:false) request — a streamed request + // would expect SSE and fail — so a clean drain proves streaming was disabled. + const request = waitRequest( + "/chat/completions", + new Response( + JSON.stringify({ + id: "chatcmpl-nostream", + object: "chat.completion", + model: fixture.model.id, + choices: [{ index: 0, message: { role: "assistant", content: "Hello" }, finish_reason: "stop" }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { status: 200, headers: { "Content-Type": "application/json" } }, + ), + ) + + const resolved = yield* Provider.use.getModel( + ProviderV2.ID.make(vivgridFixture.providerID), + ModelV2.ID.make(fixture.model.id), + ) + const sessionID = SessionID.make("session-test-no-stream") + const agent = { + name: "test", + mode: "primary", + options: {}, + permission: [{ permission: "*", pattern: "*", action: "allow" }], + } satisfies Agent.Info + + const user = { + id: MessageID.make("msg_user-no-stream"), + sessionID, + role: "user", + time: { created: Date.now() }, + agent: agent.name, + model: { providerID: ProviderV2.ID.make(vivgridFixture.providerID), modelID: resolved.id }, + } satisfies SessionV1.User + + yield* drain({ + user, + sessionID, + model: resolved, + agent, + system: ["You are a helpful assistant."], + messages: [{ role: "user", content: "Hello" }], + tools: {}, + }) + + const capture = yield* Effect.promise(() => request) + expect(capture.url.pathname.endsWith("/chat/completions")).toBe(true) + expect(capture.body.stream).not.toBe(true) + }), + { + config: () => ({ + enabled_providers: [vivgridFixture.providerID], + provider: { + [vivgridFixture.providerID]: { + options: { apiKey: "test-key", baseURL: `${state.server!.url.origin}/v1`, streaming: false }, + }, + }, + }), + }, + ) }) From b96bf576591ac01c6bab5140c1879edf0f3547b5 Mon Sep 17 00:00:00 2001 From: Sebastian Danielsson Date: Mon, 8 Jun 2026 14:38:34 +0200 Subject: [PATCH 2/2] test(opencode): tighten streaming-disabled assertion to reject truthy stream values Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/opencode/test/session/llm.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/opencode/test/session/llm.test.ts b/packages/opencode/test/session/llm.test.ts index e9e2966526a2..e1eea36806d9 100644 --- a/packages/opencode/test/session/llm.test.ts +++ b/packages/opencode/test/session/llm.test.ts @@ -1985,7 +1985,8 @@ describe("session.llm.stream", () => { const capture = yield* Effect.promise(() => request) expect(capture.url.pathname.endsWith("/chat/completions")).toBe(true) - expect(capture.body.stream).not.toBe(true) + // doGenerate omits `stream` entirely; assert it is never sent as a truthy value. + expect(capture.body.stream ?? false).toBe(false) }), { config: () => ({