From 3001a2b7bb9fb344af36d1512bfb9a09115fc45f Mon Sep 17 00:00:00 2001
From: Sebastian Danielsson <sebastian.danielsson@trafikverket.se>
Date: Mon, 8 Jun 2026 14:29:28 +0200
Subject: [PATCH 1/2] feat(opencode): respect provider/model streaming:false to
 disable response streaming

Some OpenAI-compatible backends don't support streaming or return broken
streamed output (e.g. self-hosted vLLM corrupting streamed tool-call args).
The existing options.streaming config wasn't consumed, so there was no way
to opt out.

Honor options.streaming:false (per-model or per-provider) by adding the AI
SDK's simulateStreamingMiddleware, which calls doGenerate (stream:false on
the wire) and replays the result as a simulated stream, leaving the rest of
the pipeline unchanged. Defaults to streaming on.

Fixes #785

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/opencode/src/session/llm.ts       |  9 ++-
 packages/opencode/test/session/llm.test.ts | 69 ++++++++++++++++++++++
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
index cf284ce1ae6e..07ed3ad13dcc 100644
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -4,7 +4,7 @@ import { SessionV1 } from "@opencode-ai/core/v1/session"
 import { serviceUse } from "@opencode-ai/core/effect/service-use"
 import { Context, Effect, Layer } from "effect"
 import * as Stream from "effect/Stream"
-import { streamText, wrapLanguageModel, type ModelMessage, type Tool } from "ai"
+import { simulateStreamingMiddleware, streamText, wrapLanguageModel, type ModelMessage, type Tool } from "ai"
 import type { LLMEvent } from "@opencode-ai/llm"
 import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route"
 import type { LLMClientService } from "@opencode-ai/llm/route"
@@ -271,6 +271,12 @@ const live: Layer.Layer<
         "llm.provider": input.model.providerID,
         "llm.model": input.model.id,
       })
+      // Opt out of streaming per-model or per-provider with `options.streaming: false`.
+      // Some backends corrupt or reject streamed responses (e.g. vLLM's gemma tool-call
+      // parser duplicates characters in streamed tool args); simulateStreamingMiddleware
+      // makes the model call doGenerate (stream:false on the wire) and re-emits a
+      // simulated stream, so the rest of the pipeline is unchanged. Model-level wins.
+      const disableStreaming = (input.model.options?.["streaming"] ?? item.options?.["streaming"]) === false
       // Default runtime path: AI SDK owns provider execution and tool dispatch;
       // LLMAISDK.toLLMEvents below normalizes fullStream parts for the processor.
       return {
@@ -324,6 +330,7 @@ const live: Layer.Layer<
                   return args.params
                 },
               },
+              ...(disableStreaming ? [simulateStreamingMiddleware()] : []),
             ],
           }),
           experimental_telemetry: {
diff --git a/packages/opencode/test/session/llm.test.ts b/packages/opencode/test/session/llm.test.ts
index 0c5dadaf17d6..e9e2966526a2 100644
--- a/packages/opencode/test/session/llm.test.ts
+++ b/packages/opencode/test/session/llm.test.ts
@@ -1929,4 +1929,73 @@ describe("session.llm.stream", () => {
       }),
     },
   )
+
+  it.instance(
+    "disables streaming when options.streaming is false",
+    () =>
+      Effect.gen(function* () {
+        const fixture = loadFixture(vivgridFixture.providerID, vivgridFixture.modelID)
+        // Respond with a non-streaming chat completion. The provider can only parse
+        // this if it issued a doGenerate (stream:false) request — a streamed request
+        // would expect SSE and fail — so a clean drain proves streaming was disabled.
+        const request = waitRequest(
+          "/chat/completions",
+          new Response(
+            JSON.stringify({
+              id: "chatcmpl-nostream",
+              object: "chat.completion",
+              model: fixture.model.id,
+              choices: [{ index: 0, message: { role: "assistant", content: "Hello" }, finish_reason: "stop" }],
+              usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+            }),
+            { status: 200, headers: { "Content-Type": "application/json" } },
+          ),
+        )
+
+        const resolved = yield* Provider.use.getModel(
+          ProviderV2.ID.make(vivgridFixture.providerID),
+          ModelV2.ID.make(fixture.model.id),
+        )
+        const sessionID = SessionID.make("session-test-no-stream")
+        const agent = {
+          name: "test",
+          mode: "primary",
+          options: {},
+          permission: [{ permission: "*", pattern: "*", action: "allow" }],
+        } satisfies Agent.Info
+
+        const user = {
+          id: MessageID.make("msg_user-no-stream"),
+          sessionID,
+          role: "user",
+          time: { created: Date.now() },
+          agent: agent.name,
+          model: { providerID: ProviderV2.ID.make(vivgridFixture.providerID), modelID: resolved.id },
+        } satisfies SessionV1.User
+
+        yield* drain({
+          user,
+          sessionID,
+          model: resolved,
+          agent,
+          system: ["You are a helpful assistant."],
+          messages: [{ role: "user", content: "Hello" }],
+          tools: {},
+        })
+
+        const capture = yield* Effect.promise(() => request)
+        expect(capture.url.pathname.endsWith("/chat/completions")).toBe(true)
+        expect(capture.body.stream).not.toBe(true)
+      }),
+    {
+      config: () => ({
+        enabled_providers: [vivgridFixture.providerID],
+        provider: {
+          [vivgridFixture.providerID]: {
+            options: { apiKey: "test-key", baseURL: `${state.server!.url.origin}/v1`, streaming: false },
+          },
+        },
+      }),
+    },
+  )
 })

From b96bf576591ac01c6bab5140c1879edf0f3547b5 Mon Sep 17 00:00:00 2001
From: Sebastian Danielsson <sebastian.danielsson@trafikverket.se>
Date: Mon, 8 Jun 2026 14:38:34 +0200
Subject: [PATCH 2/2] test(opencode): tighten streaming-disabled assertion to
 reject truthy stream values

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/opencode/test/session/llm.test.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/opencode/test/session/llm.test.ts b/packages/opencode/test/session/llm.test.ts
index e9e2966526a2..e1eea36806d9 100644
--- a/packages/opencode/test/session/llm.test.ts
+++ b/packages/opencode/test/session/llm.test.ts
@@ -1985,7 +1985,8 @@ describe("session.llm.stream", () => {
 
         const capture = yield* Effect.promise(() => request)
         expect(capture.url.pathname.endsWith("/chat/completions")).toBe(true)
-        expect(capture.body.stream).not.toBe(true)
+        // doGenerate omits `stream` entirely; assert it is never sent as a truthy value.
+        expect(capture.body.stream ?? false).toBe(false)
       }),
     {
       config: () => ({