diff --git a/packages/opencode/src/altimate/observability/tracing.ts b/packages/opencode/src/altimate/observability/tracing.ts index e0e742bc1..16563fd01 100644 --- a/packages/opencode/src/altimate/observability/tracing.ts +++ b/packages/opencode/src/altimate/observability/tracing.ts @@ -42,7 +42,7 @@ export interface TraceSpan { spanId: string parentSpanId: string | null name: string - kind: "session" | "generation" | "tool" | "text" | "span" + kind: "session" | "generation" | "tool" | "text" | "span" | "user-message" startTime: number endTime?: number status: "ok" | "error" @@ -345,6 +345,12 @@ function formatDurationShort(ms: number): string { } // altimate_change end +// altimate_change start — shared truncation cap for `logUserMessage` span input. +// Exported so the viewer's chat-tab dedupe can compare against the same boundary +// (otherwise it'd silently drift if either side changes the magic number). +export const USER_MESSAGE_INPUT_MAX_CHARS = 4000 +// altimate_change end + export class Trace { // Global active trace — set when a session starts, cleared on end. private static _active: Trace | null = null @@ -490,6 +496,109 @@ export class Trace { this.snapshot() } + // altimate_change start — rehydrate a Trace from an existing on-disk file. + // Used by `getOrCreateTrace` on cache miss for a session whose trace file + // already exists (worker restart, MAX_TRACES eviction). Without this, the + // fresh Trace.create() + startTrace() path would push a single root span + // into an empty `this.spans` and the immediate snapshot would clobber the + // rich on-disk trace. Returns true if a usable trace was loaded; false + // otherwise (the caller should fall back to startTrace). + // + // Does NOT call snapshot — the file is already correct on disk; an + // unnecessary write here would compete with concurrent flushSync paths. + // + // Async: read the file off the event-loop hot path so a worker-restart + // cache miss with a large existing trace doesn't head-of-line-block all + // session events. The actual hot path was bounded (cache-miss only) but + // the sync `readFileSync` could still pause the worker noticeably for a + // multi-MB trace. + async rehydrateFromFile(sessionId: string): Promise { + if (!this.snapshotDir) return false + const safeId = sessionId.replace(/[/\\.:]/g, "_") || "unknown" + const filePath = path.join(this.snapshotDir, `${safeId}.json`) + let raw: string + try { + raw = await fs.readFile(filePath, "utf-8") + } catch { + return false + } + let trace: TraceFile + try { + trace = JSON.parse(raw) as TraceFile + } catch { + return false + } + // `buildTraceFile` writes the sanitized form of `sessionId` (see ~line 808 + // `.replace(/[/\\.:]/g, "_")`), so compare the same way — otherwise valid + // trace files with `/`, `\`, `.`, `:` in the session id would be rejected + // and the caller would fall back to `startTrace`, clobbering them. + const normalizedSessionId = sessionId.replace(/[/\\.:]/g, "_") || "unknown" + if ( + !trace || + trace.sessionId !== normalizedSessionId || + !Array.isArray(trace.spans) || + trace.spans.length === 0 + ) { + return false + } + const root = trace.spans.find((s) => s.parentSpanId === null && s.kind === "session") + if (!root) return false + + this.sessionId = sessionId + // Restore the original traceId so post-rehydrate snapshots/exports keep + // the same trace identity (downstream OTLP/Jaeger-style consumers and the + // trace viewer URL both depend on it being stable across rehydration). + if (typeof trace.traceId === "string" && trace.traceId.length > 0) { + this.traceId = trace.traceId + } + this.spans = trace.spans.map((s) => ({ ...s })) + this.rootSpanId = root.spanId + this.metadata = { ...(trace.metadata ?? {}) } + this.startTime = root.startTime + if (trace.summary) { + this.totalTokens = trace.summary.totalTokens ?? 0 + this.totalCost = trace.summary.totalCost ?? 0 + this.toolCallCount = trace.summary.totalToolCalls ?? 0 + this.generationCount = trace.summary.totalGenerations ?? 0 + if (trace.summary.tokens) { + this.tokensBreakdown = { + input: trace.summary.tokens.input ?? 0, + output: trace.summary.tokens.output ?? 0, + reasoning: trace.summary.tokens.reasoning ?? 0, + cacheRead: trace.summary.tokens.cacheRead ?? 0, + cacheWrite: trace.summary.tokens.cacheWrite ?? 0, + } + } + } + // Mid-session rehydration: the root span's endTime (if any) was set by a + // prior endTrace; clear it so the trace doesn't render as "completed." + const r = this.spans.find((s) => s.spanId === this.rootSpanId) + if (r) { + delete (r as { endTime?: number }).endTime + r.status = "ok" + } + // Close any in-flight generation spans as interrupted. The transient state + // these spans depended on (`this.currentGenerationSpanId`, `generationText`, + // `generationToolCalls`, `pendingToolResults`) lives only in memory and + // can't be reconstructed from the on-disk file. If we leave open spans + // open, the next `step-finish` event for that turn drops at the + // `if (!this.currentGenerationSpanId) return` guard in `logStepFinish` + // and any later `logToolCall` falls back to attaching tool spans to + // `rootSpanId`, silently degrading the trace shape. Marking them + // interrupted preserves the partial data and makes the boundary visible. + const now = Date.now() + for (const s of this.spans) { + if (s.kind === "generation" && s.endTime === undefined) { + s.endTime = now + s.status = "error" + s.statusMessage = "interrupted (worker restart / cache eviction before step-finish)" + } + } + this.endTraceStarted = false + return true + } + // altimate_change end + /** * Enrich the trace with model/provider info from the first assistant message. * Called when the message.updated event fires with assistant role. @@ -515,6 +624,46 @@ export class Trace { if (prompt) this.metadata.prompt = prompt } + // altimate_change start — set only the user prompt without mutating title. + // The bus emits an auto-generated session title (Path C, `session.updated`) + // separately from the user's actual prompt text (Path B, `message.part.updated`). + // Capturing the prompt via `setTitle(text, text)` would race the title-agent: + // if the user text part arrived after `session.updated`, the nice generated + // title ("Greeting") would regress to the raw user input ("hi"). Use this + // method for prompt-only capture. + setPrompt(prompt: string) { + if (prompt) this.metadata.prompt = prompt + } + // altimate_change end + + // altimate_change start — record an individual user message as a span so the + // chat tab can render multi-turn conversations. Without this, the viewer's + // chat tab can only display `metadata.prompt` at the top — a single string — + // and every later user message is silently dropped from the conversation + // rendering. Tracked as `kind: "user-message"` so the viewer can interleave + // these with `kind: "generation"` spans by startTime. + logUserMessage(text: string) { + if (!this.rootSpanId) return + if (!text) return + try { + const now = Date.now() + this.spans.push({ + spanId: randomUUIDv7(), + parentSpanId: this.rootSpanId, + name: "user-message", + kind: "user-message", + startTime: now, + endTime: now, + status: "ok", + input: text.length > USER_MESSAGE_INPUT_MAX_CHARS ? text.slice(0, USER_MESSAGE_INPUT_MAX_CHARS) : text, + }) + this.snapshot() + } catch { + // best-effort + } + } + // altimate_change end + /** * Open a generation span from a step-start event. */ diff --git a/packages/opencode/src/altimate/observability/viewer.ts b/packages/opencode/src/altimate/observability/viewer.ts index eb8f5918d..df80d46a6 100644 --- a/packages/opencode/src/altimate/observability/viewer.ts +++ b/packages/opencode/src/altimate/observability/viewer.ts @@ -12,7 +12,7 @@ * Branded with Altimate Trace colors. Includes share/export features for virality. */ -import type { TraceFile } from "./tracing" +import { USER_MESSAGE_INPUT_MAX_CHARS, type TraceFile } from "./tracing" export function renderTraceViewer(trace: TraceFile, options?: { live?: boolean; apiPath?: string }): string { const traceJSON = JSON.stringify(trace).replace(/<\//g, "<\\/") @@ -1310,13 +1310,48 @@ function showDetail(span) { (function() { var el = document.getElementById('v-chat'); var html = ''; + // Build a chronologically sorted list of conversation turns by interleaving + // user-message spans with generation spans. We render \`metadata.prompt\` at + // the top as a fallback whenever it carries a value not already represented + // by a user-message span — that covers three cases: + // 1. Pre-fix traces with no user-message spans (just the legacy prompt). + // 2. Mixed traces: an older session rehydrated with only metadata.prompt, + // then continued under the new code which added user-message spans for + // later turns. Without this, the legacy first turn would be dropped. + // 3. Brand-new traces where the first user-message span input equals + // metadata.prompt — we skip the duplicate to avoid double-rendering. + var userMsgs = spans.filter(function(s){return s.kind==='user-message';}); + var gens = spans.filter(function(s){return s.kind==='generation';}); if (t.metadata.prompt) { - html += '
\\u25B6 You
'; - html += '
' + e(t.metadata.prompt) + '
'; + // \`logUserMessage\` truncates span input to USER_MESSAGE_INPUT_MAX_CHARS + // (see tracing.ts); \`metadata.prompt\` stores the full string. For prompts + // longer than the truncation length, strict equality would miss the dedupe + // and the same text would render twice. Match against the truncated form as + // well. The constant is interpolated at render time so the two sides can't + // drift. + var USER_MSG_TRUNCATE = ${USER_MESSAGE_INPUT_MAX_CHARS}; + var promptStr = String(t.metadata.prompt); + var promptTruncated = promptStr.slice(0, USER_MSG_TRUNCATE); + var promptAlreadyInSpan = userMsgs.some(function(u){ + return typeof u.input === 'string' && (u.input === promptStr || u.input === promptTruncated); + }); + if (!promptAlreadyInSpan) { + html += '
\\u25B6 You
'; + html += '
' + e(promptStr) + '
'; + } } - var gens = spans.filter(function(s){return s.kind==='generation';}); - gens.forEach(function(gen) { - var tools = spans.filter(function(s){return s.parentSpanId===gen.spanId && s.kind==='tool';}); + var turns = userMsgs.concat(gens).sort(function(a, b) { return (a.startTime||0) - (b.startTime||0); }); + turns.forEach(function(s) { + if (s.kind === 'user-message') { + var utxt = typeof s.input === 'string' ? s.input : (s.input != null ? JSON.stringify(s.input) : ''); + if (!utxt) return; + html += '
\\u25B6 You
'; + html += '
' + e(utxt) + '
'; + return; + } + // Generation: render its tool children first, then the agent response. + var gen = s; + var tools = spans.filter(function(c){return c.parentSpanId===gen.spanId && c.kind==='tool';}); if (tools.length) { tools.forEach(function(tool) { html += '
'; diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index d6cd5a7a8..d86e7b740 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -183,11 +183,24 @@ export function Session() { return new CustomSpeedScroll(3) }) - createEffect(() => { - if (session()?.workspaceID) { - sdk.setWorkspace(session()?.workspaceID) - } - }) + // altimate_change start — gate setWorkspace on actual workspaceID change. + // A plain inline `createEffect` callback that reads the session signal would + // re-fire whenever ANY field on the signal changes (message count, status, + // parts) — including the cascade of updates at agent-finish. Every spurious + // fire propagates into `worker.setWorkspace` → `startEventStream` → + // `sessionTraces.clear()` → next snapshot overwrites the rich on-disk trace + // with a near-empty one. The `on()` projector below restricts SolidJS dirty- + // tracking to the workspaceID value alone, so the effect only fires when + // that field actually changes. + createEffect( + on( + () => session()?.workspaceID, + (workspaceID) => { + if (workspaceID) sdk.setWorkspace(workspaceID) + }, + ), + ) + // altimate_change end createEffect(async () => { await sync.session diff --git a/packages/opencode/src/cli/cmd/tui/worker.ts b/packages/opencode/src/cli/cmd/tui/worker.ts index 3e2b78de1..777abc8ba 100644 --- a/packages/opencode/src/cli/cmd/tui/worker.ts +++ b/packages/opencode/src/cli/cmd/tui/worker.ts @@ -80,7 +80,7 @@ async function loadTracingConfig() { // altimate_change end // altimate_change start — trace: get or create per-session trace -function getOrCreateTrace(sessionID: string): Trace | null { +async function getOrCreateTrace(sessionID: string): Promise { if (!sessionID || !tracingEnabled) return null if (sessionTraces.has(sessionID)) return sessionTraces.get(sessionID)! try { @@ -96,7 +96,16 @@ function getOrCreateTrace(sessionID: string): Trace | null { const trace = tracingExporters ? Trace.withExporters([...tracingExporters], { maxFiles: tracingMaxFiles }) : Trace.create() - trace.startTrace(sessionID, {}) + // altimate_change start — prefer disk-rehydration on cache miss for an + // existing session (worker restart, MAX_TRACES eviction). startTrace would + // push a fresh root span into empty `this.spans` and the immediate + // snapshot would clobber the rich on-disk file. Defense in depth in + // addition to keeping the cache alive across turns. + // Async to keep the event-stream loop unblocked on large existing traces. + if (!(await trace.rehydrateFromFile(sessionID))) { + trace.startTrace(sessionID, {}) + } + // altimate_change end Trace.setActive(trace) sessionTraces.set(sessionID, trace) return trace @@ -171,7 +180,9 @@ const startEventStream = (input: { directory: string; workspaceID?: string }) => } if (resolvedSessionID) { // Create trace eagerly on user message (arrives before part events) - const trace = sessionTraces.get(resolvedSessionID) ?? (info.role === "user" ? getOrCreateTrace(resolvedSessionID) : null) + const trace = + sessionTraces.get(resolvedSessionID) ?? + (info.role === "user" ? await getOrCreateTrace(resolvedSessionID) : null) if (info.role === "user") { if (info.id) { if (!sessionUserMsgIds.has(resolvedSessionID)) sessionUserMsgIds.set(resolvedSessionID, new Set()) @@ -183,7 +194,7 @@ const startEventStream = (input: { directory: string; workspaceID?: string }) => } } if (info.role === "assistant") { - const r = trace ?? getOrCreateTrace(resolvedSessionID) + const r = trace ?? (await getOrCreateTrace(resolvedSessionID)) r?.enrichFromAssistant({ modelID: info.modelID, providerID: info.providerID, @@ -199,20 +210,54 @@ const startEventStream = (input: { directory: string; workspaceID?: string }) => const part = (event as any).properties?.part if (part) { // Create trace on first event for this session (lazy creation) - const trace = sessionTraces.get(part.sessionID) ?? getOrCreateTrace(part.sessionID) + const trace = sessionTraces.get(part.sessionID) ?? (await getOrCreateTrace(part.sessionID)) if (trace) { if (part.type === "step-start") trace.logStepStart(part) if (part.type === "step-finish") trace.logStepFinish(part) - if (part.type === "text" && part.time?.end) { - if (part.messageID && sessionUserMsgIds.get(part.sessionID)?.has(part.messageID)) { - // This is user prompt text — capture as title/prompt + // altimate_change start — split the user-vs-assistant text routes. + // User text parts arrive without `time.end` set (it's a meaningful + // concept only for processing-end of assistant chunks), so the old + // `&& part.time?.end` gate dropped the prompt entirely. We trust + // `sessionUserMsgIds.has(messageID)` as the user-text signal and + // call `setPrompt(text)` only — never `setTitle` — to avoid racing + // the auto-generated title from `session.updated` (Path C). + if (part.type === "text") { + // altimate_change start — skip synthetic / ignored text parts. + // `Session.createUserMessage` (prompt.ts) attaches many `synthetic: true` + // text parts to the user message — MCP resource banners, decoded file + // contents, retry/reminder text, plan-mode reminders, agent-handoff + // tags. They all share the user's `messageID` so they would otherwise + // pass the `sessionUserMsgIds` check below and override `metadata.prompt` + // with the LAST synthetic blob (typically file content) and render one + // fake "▶ You" bubble per synthetic part in the chat tab. The synthetic + // and ignored flags exist precisely to mark non-authored content; this + // is exactly the place to consult them. We skip silently rather than + // `continue`-ing the event-loop iteration because the outer loop still + // needs to forward the event downstream via `Rpc.emit`. + const isAuthoredText = !part.synthetic && !part.ignored + // altimate_change end + if ( + isAuthoredText && + part.messageID && + sessionUserMsgIds.get(part.sessionID)?.has(part.messageID) + ) { const text = String(part.text || "") - if (text) trace.setTitle(text.slice(0, 80), text) - } else { - // This is assistant response text + if (text) { + trace.setPrompt(text) + // altimate_change start — record each user message as a span + // so the chat tab can render multi-turn conversations. + // Without a span, the viewer can only display `metadata.prompt` + // (singular) and every subsequent user message is silently + // dropped from the conversation rendering. + trace.logUserMessage(text) + // altimate_change end + } + } else if (isAuthoredText && part.time?.end) { + // Assistant response text (only counts when processing-end fires) trace.logText(part) } } + // altimate_change end if (part.type === "tool" && (part.state?.status === "completed" || part.state?.status === "error")) { trace.logToolCall(part) } @@ -229,19 +274,21 @@ const startEventStream = (input: { directory: string; workspaceID?: string }) => if (trace) trace.setTitle(String(info.title)) } } - // Finalize trace when session reaches idle (completed) - if (event.type === "session.status") { - const sid = (event as any).properties?.sessionID - const status = (event as any).properties?.status?.type - if (status === "idle" && sid) { - const trace = sessionTraces.get(sid) - if (trace) { - void trace.endTrace().catch(() => {}) - sessionTraces.delete(sid) - sessionUserMsgIds.delete(sid) - } - } - } + // altimate_change start — DO NOT finalize the trace on session.status=idle. + // `idle` fires after every turn (busy → idle transition), not at session end. + // Calling `endTrace` + `sessionTraces.delete` here treats each turn as the + // end of the session: the next event for the same session in a later turn + // hits a cache miss in getOrCreateTrace, constructs a fresh Trace.create() + // with empty `this.spans`, and the immediate `snapshot()` clobbers the + // rich on-disk `ses_.json` with a single root-span file. Symptoms: + // - waterfall view collapses to the system-prompt span after every turn + // - "What was asked / No prompt recorded" because metadata.prompt was + // captured on the destroyed instance, never on the replacement + // Sessions in altimate-code are long-lived across many turns; the Trace + // should live as long as the worker has the session in cache. Finalization + // happens on `shutdown` (worker.ts:312) and on MAX_TRACES eviction + // (worker.ts:87). No per-turn finalization is correct. + // altimate_change end } catch { // Trace must never interrupt event forwarding } @@ -261,6 +308,16 @@ const startEventStream = (input: { directory: string; workspaceID?: string }) => }) } +// altimate_change start — track the last workspaceID used to start the event stream +// so `setWorkspace` becomes idempotent on unchanged values. SolidJS effects in the +// session route can fire on every `session()` signal change (including agent-finish); +// without this guard, every fire propagates to `startEventStream` which clears +// `sessionTraces`, which causes the next snapshot from a freshly-created Trace to +// overwrite the rich on-disk trace with a near-empty one. Symptom: waterfall view +// collapses to the system-prompt span after every turn. +let currentWorkspaceID: string | undefined +// altimate_change end + startEventStream({ directory: process.cwd() }) export const rpc = { @@ -306,6 +363,10 @@ export const rpc = { await Instance.disposeAll() }, async setWorkspace(input: { workspaceID?: string }) { + // altimate_change start — idempotency guard; see currentWorkspaceID comment above + if (input.workspaceID === currentWorkspaceID) return + currentWorkspaceID = input.workspaceID + // altimate_change end startEventStream({ directory: process.cwd(), workspaceID: input.workspaceID }) }, async shutdown() { diff --git a/packages/opencode/test/altimate/tracing-rehydrate.test.ts b/packages/opencode/test/altimate/tracing-rehydrate.test.ts new file mode 100644 index 000000000..8f26bcb91 --- /dev/null +++ b/packages/opencode/test/altimate/tracing-rehydrate.test.ts @@ -0,0 +1,278 @@ +/** + * Behavioral test for `Trace.rehydrateFromFile` and the worker's + * cache-miss-rehydrates contract. + * + * The bug being guarded against: when the in-memory `Trace` instance for a + * session is destroyed (worker restart, MAX_TRACES eviction, the now-removed + * per-turn idle finalize) and a new `Trace` is constructed for the same + * `sessionID`, the fresh `this.spans = []` plus immediate `snapshot()` in + * `startTrace` would clobber the rich on-disk `ses_.json` with a 1-span + * file. Symptom: waterfall view collapses to system-prompt; metadata.prompt + * disappears. + * + * `rehydrateFromFile` is the defense-in-depth fix: on cache miss, load the + * existing trace state instead of starting fresh. + */ + +import { describe, expect, test } from "bun:test" +import fs from "fs/promises" +import path from "path" +import { Trace, FileExporter, type TraceFile } from "../../src/altimate/observability/tracing" +import { tmpdir } from "../fixture/fixture" + +function makeTrace(dir: string) { + return Trace.withExporters([new FileExporter(dir)]) +} + +async function readTraceFile(dir: string, sessionId: string): Promise { + const safeId = sessionId.replace(/[/\\.:]/g, "_") + const filePath = path.join(dir, `${safeId}.json`) + return JSON.parse(await fs.readFile(filePath, "utf-8")) +} + +describe("Trace.rehydrateFromFile + cache-miss rehydration", () => { + test("rehydrate preserves spans, metadata, and counters when reconstructing on cache miss", async () => { + await using tmp = await tmpdir() + const sessionId = "ses_rehydrate_basic" + + // Phase 1: original Trace builds up rich state, then is discarded (simulating + // a worker restart or MAX_TRACES eviction). + const original = makeTrace(tmp.path) + original.startTrace(sessionId, { + title: "test session", + prompt: "do the thing", + model: "openai/gpt-5.5", + providerId: "altimate-backend", + agent: "builder", + }) + // Simulate turn-1 activity: a few tool calls + some text. + original.logToolCall({ tool: "read", state: { status: "completed", input: { f: "a" } } } as any) + original.logToolCall({ tool: "grep", state: { status: "completed", input: { p: "x" } } } as any) + original.logText({ text: "first response", time: { end: Date.now() } } as any) + // Flush to disk so the file represents the cumulative state. + await original.flush() + + const onDiskAfterPhase1 = await readTraceFile(tmp.path, sessionId) + expect(onDiskAfterPhase1.spans.length).toBeGreaterThan(1) + expect(onDiskAfterPhase1.metadata.prompt).toBe("do the thing") + expect(onDiskAfterPhase1.metadata.title).toBe("test session") + + // Phase 2: drop the original (worker restart / eviction). Construct a fresh + // Trace instance for the SAME sessionID. This is the path that, pre-fix, + // would have called startTrace and clobbered the file. + const reconstructed = makeTrace(tmp.path) + const didRehydrate = await reconstructed.rehydrateFromFile(sessionId) + + expect(didRehydrate).toBe(true) + + // Without writing anything yet, the in-memory state must reflect what was + // on disk: same spans, same metadata, same root id. + const fileImmediatelyAfterRehydrate = await readTraceFile(tmp.path, sessionId) + expect(fileImmediatelyAfterRehydrate.spans).toEqual(onDiskAfterPhase1.spans) + expect(fileImmediatelyAfterRehydrate.metadata.prompt).toBe("do the thing") + + // Phase 3: log new activity on the reconstructed Trace and flush. The next + // snapshot must preserve the old spans AND add the new ones — not overwrite. + reconstructed.logToolCall({ tool: "edit", state: { status: "completed", input: { f: "b" } } } as any) + await reconstructed.flush() + + const onDiskAfterPhase3 = await readTraceFile(tmp.path, sessionId) + expect(onDiskAfterPhase3.spans.length).toBeGreaterThan(onDiskAfterPhase1.spans.length) + // Old spans still there. + for (const oldSpan of onDiskAfterPhase1.spans) { + expect(onDiskAfterPhase3.spans.some((s) => s.spanId === oldSpan.spanId)).toBe(true) + } + // Metadata was preserved across the reconstruction. + expect(onDiskAfterPhase3.metadata.prompt).toBe("do the thing") + expect(onDiskAfterPhase3.metadata.title).toBe("test session") + expect(onDiskAfterPhase3.metadata.model).toBe("openai/gpt-5.5") + }) + + test("rehydrate returns false when no on-disk trace exists (fresh session)", async () => { + await using tmp = await tmpdir() + const trace = makeTrace(tmp.path) + const result = await trace.rehydrateFromFile("ses_does_not_exist") + expect(result).toBe(false) + }) + + test("rehydrate preserves traceId across instance reconstruction", async () => { + await using tmp = await tmpdir() + // Without this, downstream trace consumers (viewer URL, OTLP exporters) + // see a different traceId on every snapshot after rehydration — breaks + // trace identity across instance lifetimes. + const sessionId = "ses_traceid_preservation" + const original = makeTrace(tmp.path) + original.startTrace(sessionId, {}) + original.logToolCall({ tool: "read", state: { status: "completed", input: { f: "a" } } } as any) + await original.flush() + const beforeFile = await readTraceFile(tmp.path, sessionId) + const originalTraceId = beforeFile.traceId + expect(typeof originalTraceId).toBe("string") + + const reconstructed = makeTrace(tmp.path) + expect(await reconstructed.rehydrateFromFile(sessionId)).toBe(true) + reconstructed.logToolCall({ tool: "grep", state: { status: "completed", input: { p: "x" } } } as any) + await reconstructed.flush() + const afterFile = await readTraceFile(tmp.path, sessionId) + expect(afterFile.traceId).toBe(originalTraceId) + }) + + test("rehydrate matches sessionIds containing sanitized characters", async () => { + await using tmp = await tmpdir() + // `buildTraceFile` writes the sanitized form (slashes/dots/colons → "_"). + // The match check needs to normalize before comparing, otherwise valid + // files would be rejected for sessions with those characters. + const sessionId = "ses_with/slash.and:colon" + const safeId = sessionId.replace(/[/\\.:]/g, "_") + // Write a valid file at the sanitized path with the sanitized sessionId + // inside (mirrors what buildTraceFile actually does). + await fs.writeFile( + path.join(tmp.path, `${safeId}.json`), + JSON.stringify({ + version: 2, + traceId: "tr_x", + sessionId: safeId, + startedAt: new Date(0).toISOString(), + endedAt: new Date(0).toISOString(), + metadata: {}, + spans: [ + { spanId: "r", parentSpanId: null, name: safeId, kind: "session", startTime: 0, status: "ok" }, + ], + summary: { + totalTokens: 0, + totalCost: 0, + totalToolCalls: 0, + totalGenerations: 0, + duration: 0, + status: "completed", + tokens: { input: 0, output: 0, reasoning: 0, cacheRead: 0, cacheWrite: 0 }, + }, + }), + ) + + const trace = makeTrace(tmp.path) + // Pass the RAW sessionId (with slashes/dots/colons). Pre-fix this would have + // returned false because "ses_with/slash.and:colon" !== "ses_with_slash_and_colon". + expect(await trace.rehydrateFromFile(sessionId)).toBe(true) + }) + + test("rehydrate returns false when on-disk trace is for a different session", async () => { + await using tmp = await tmpdir() + // Stage a mismatched file at the expected path for "session-A". + await fs.writeFile( + path.join(tmp.path, "ses_target.json"), + JSON.stringify({ + version: 2, + traceId: "x", + sessionId: "ses_other", + startedAt: new Date().toISOString(), + endedAt: new Date().toISOString(), + metadata: {}, + spans: [{ spanId: "r", parentSpanId: null, name: "ses_other", kind: "session", startTime: 0, status: "ok" }], + summary: { + totalTokens: 0, + totalCost: 0, + totalToolCalls: 0, + totalGenerations: 0, + duration: 0, + status: "completed", + tokens: { input: 0, output: 0, reasoning: 0, cacheRead: 0, cacheWrite: 0 }, + }, + }), + ) + const trace = makeTrace(tmp.path) + const result = await trace.rehydrateFromFile("ses_target") + expect(result).toBe(false) + }) + + test("logUserMessage records each user turn as a span so the chat tab can interleave them", async () => { + await using tmp = await tmpdir() + const sessionId = "ses_user_messages" + + const trace = makeTrace(tmp.path) + trace.startTrace(sessionId, {}) + trace.logUserMessage("first user message") + trace.logToolCall({ tool: "read", state: { status: "completed", input: { f: "a" } } } as any) + trace.logUserMessage("second user message") + trace.logToolCall({ tool: "grep", state: { status: "completed", input: { p: "x" } } } as any) + trace.logUserMessage("third user message") + await trace.flush() + + const fileContent = await readTraceFile(tmp.path, sessionId) + const userMessages = fileContent.spans.filter((s) => s.kind === "user-message") + expect(userMessages).toHaveLength(3) + expect((userMessages[0] as any).input).toBe("first user message") + expect((userMessages[1] as any).input).toBe("second user message") + expect((userMessages[2] as any).input).toBe("third user message") + + // Chronological order — user-message spans must precede each generation/tool + // span that came after them, so the chat tab renders them in turn order. + const allSpans = fileContent.spans + const sortedByStart = [...allSpans].sort((a, b) => (a.startTime || 0) - (b.startTime || 0)) + expect(sortedByStart).toEqual(allSpans) + }) + + test("rehydrate marks any in-flight generation span as interrupted", async () => { + // Without this, the transient state that `logStepStart` opened (currentGenerationSpanId, + // generationText, generationToolCalls, pendingToolResults) is lost on reconstruction. + // A later `step-finish` event for that turn would drop at the + // `if (!this.currentGenerationSpanId) return` guard, and follow-up `logToolCall` + // would mis-parent its span at the root. Marking open generation spans interrupted + // makes the boundary visible and prevents the silent degrade. + await using tmp = await tmpdir() + const sessionId = "ses_inflight_gen" + + const original = makeTrace(tmp.path) + original.startTrace(sessionId, {}) + // Open a generation span via logStepStart. `logStepStart` does not itself + // snapshot to disk, so follow with a tool call (which does snapshot) to + // force a flush of the open generation span. Do NOT call logStepFinish — + // the point is to leave the generation span open on disk. + original.logStepStart({ id: "step-1" } as any) + original.logToolCall({ tool: "read", state: { status: "completed", input: { f: "a" } } } as any) + await original.flush() + const beforeFile = await readTraceFile(tmp.path, sessionId) + const openGen = beforeFile.spans.find((s) => s.kind === "generation") + expect(openGen).toBeDefined() + expect(openGen?.endTime).toBeUndefined() + + const reconstructed = makeTrace(tmp.path) + expect(await reconstructed.rehydrateFromFile(sessionId)).toBe(true) + // Trigger a snapshot so the rehydrated state lands on disk. + reconstructed.logToolCall({ tool: "read", state: { status: "completed", input: { f: "a" } } } as any) + await reconstructed.flush() + + const afterFile = await readTraceFile(tmp.path, sessionId) + const interruptedGen = afterFile.spans.find((s) => s.spanId === openGen?.spanId) + expect(interruptedGen?.endTime).toBeDefined() + expect(interruptedGen?.status).toBe("error") + expect(interruptedGen?.statusMessage).toMatch(/interrupted/i) + }) + + test("rehydrate clears endTime on the root span so the trace renders as still in-progress", async () => { + await using tmp = await tmpdir() + const sessionId = "ses_endtime_clear" + + const original = makeTrace(tmp.path) + original.startTrace(sessionId, {}) + original.logToolCall({ tool: "read", state: { status: "completed", input: { f: "a" } } } as any) + await original.endTrace() // sets root endTime + const ended = await readTraceFile(tmp.path, sessionId) + const endedRoot = ended.spans.find((s) => s.parentSpanId === null) + expect(endedRoot?.endTime).toBeDefined() + + const reconstructed = makeTrace(tmp.path) + const ok = await reconstructed.rehydrateFromFile(sessionId) + expect(ok).toBe(true) + + // Trigger a snapshot via a real span push so the disk file gets re-written. + reconstructed.logToolCall({ tool: "grep", state: { status: "completed", input: { p: "x" } } } as any) + await reconstructed.flush() + + const afterRehydrate = await readTraceFile(tmp.path, sessionId) + const root = afterRehydrate.spans.find((s) => s.parentSpanId === null) + // The trace must be re-openable: the root endTime is cleared so subsequent + // turns don't render the trace as "completed" in the viewer. + expect(root?.endTime).toBeUndefined() + }) +}) diff --git a/packages/opencode/test/cli/tui/worker-trace-clearing.test.ts b/packages/opencode/test/cli/tui/worker-trace-clearing.test.ts new file mode 100644 index 000000000..925eb2e4b --- /dev/null +++ b/packages/opencode/test/cli/tui/worker-trace-clearing.test.ts @@ -0,0 +1,177 @@ +import { describe, expect, test } from "bun:test" +import path from "path" +import fs from "fs/promises" + +const ROOT = path.resolve(__dirname, "../../../") + +// Regression guards for the trace-clearing-on-workspace-set bug. +// +// Bug shape (full chain): +// +// 1. SolidJS createEffect in `routes/session/index.tsx` watches +// `session()?.workspaceID` but re-runs on every `session()` signal change +// (message count, status, parts updated, …) — including the cascade at +// agent-finish. +// 2. Each fire calls `sdk.setWorkspace(workspaceID)` via RPC. +// 3. `worker.setWorkspace` unconditionally calls `startEventStream`. +// 4. `startEventStream` calls `endTrace()` on every entry in `sessionTraces` +// (fire-and-forget) and then `sessionTraces.clear()`. +// 5. On the next event for the same session, `getOrCreateTrace(sessionID)` +// hits a cache miss, calls `Trace.create()` + `startTrace(sessionID, {})`, +// which pushes a single root span into a freshly-empty `this.spans`. +// `startTrace` then writes a snapshot — overwriting the rich on-disk trace +// with a near-empty one. +// +// Observable: `/traces` waterfall view collapses to just the system-prompt +// span after every agent-finish. +// +// Two fixes lock the contract here: +// - `worker.setWorkspace` is now idempotent on unchanged workspaceID. +// - `createEffect` in `routes/session/index.tsx` uses `on(...)` so it only +// fires when the projected `workspaceID` actually changes. + +describe("trace-clearing-on-workspace-set regression", () => { + test("worker.setWorkspace short-circuits when workspaceID is unchanged", async () => { + const workerSrc = await fs.readFile(path.join(ROOT, "src/cli/cmd/tui/worker.ts"), "utf-8") + + // A `currentWorkspaceID` tracker is declared at module scope. + expect(workerSrc).toMatch(/let currentWorkspaceID:\s*string\s*\|\s*undefined/) + + // The setWorkspace handler must guard on equality with the tracker before + // calling startEventStream. Match either a strict-equality early return or + // the same pattern wrapped in a block. + expect(workerSrc).toMatch( + /async setWorkspace\(input: \{ workspaceID\?: string \}\)\s*\{[\s\S]{0,400}if \(input\.workspaceID === currentWorkspaceID\) return/, + ) + + // After the guard, the tracker must be updated to the new value, otherwise + // a subsequent same-value call wouldn't short-circuit. + expect(workerSrc).toMatch( + /if \(input\.workspaceID === currentWorkspaceID\) return\s*\n\s*currentWorkspaceID = input\.workspaceID/, + ) + }) + + test("session route's workspaceID effect uses `on()` so it only fires when workspaceID actually changes", async () => { + const routeSrc = await fs.readFile( + path.join(ROOT, "src/cli/cmd/tui/routes/session/index.tsx"), + "utf-8", + ) + + // The previous shape (`createEffect(() => { if (session()?.workspaceID) ... })`) + // re-runs on every session() signal change. The fixed shape uses `on()` with + // an explicit projector returning workspaceID, so SolidJS dirty-tracks only + // that value. + expect(routeSrc).toMatch(/createEffect\(\s*on\(\s*\(\)\s*=>\s*session\(\)\?\.workspaceID/) + + // Reject the three bug-equivalent spellings the reviewers flagged. Each + // pattern is bounded so it can't span across unrelated createEffect bodies + // elsewhere in the file (the route file has many createEffects). + // + // Inline expression: `createEffect(() => session()?.workspaceID && ...)` + // Inline ternary: `createEffect(() => session()?.workspaceID ? ... : ...)` + expect(routeSrc).not.toMatch(/createEffect\(\s*\(\s*\)\s*=>\s*session\(\)\?\.workspaceID\s*[&?]/) + // Block body with `if (session()?.workspaceID)` — `[^{}]*?` prevents the + // match from crossing into other blocks. + expect(routeSrc).not.toMatch(/createEffect\(\s*\(\s*\)\s*=>\s*\{[^{}]*?if\s*\(session\(\)\?\.workspaceID/) + }) + + // The real root cause: a session.status=idle handler that called endTrace + + // sessionTraces.delete after every turn. `idle` fires on busy→idle transition + // (i.e. after every agent turn finishes), not at session-end. Each fire + // destroyed the Trace instance; the next event in a later turn forced a fresh + // Trace.create() whose 1-span initial snapshot clobbered the rich on-disk + // trace. Also explains the "What was asked / No prompt recorded" symptom — + // metadata.prompt was captured on the destroyed instance, never on the + // replacement. The handler is now a no-op; finalization happens on worker + // shutdown and MAX_TRACES eviction only. + test("worker does NOT call endTrace+delete on session.status=idle", async () => { + const workerSrc = await fs.readFile(path.join(ROOT, "src/cli/cmd/tui/worker.ts"), "utf-8") + + // The destructive shape must not exist anywhere in the file. + expect(workerSrc).not.toMatch(/status === "idle"[\s\S]{0,200}sessionTraces\.delete/) + expect(workerSrc).not.toMatch(/status === "idle"[\s\S]{0,200}trace\.endTrace\(\)/) + }) + + // Defense-in-depth: `getOrCreateTrace` on cache miss must try to load an + // existing on-disk trace before falling back to startTrace. Otherwise a + // worker restart or MAX_TRACES eviction recreates the trace empty and + // the next snapshot clobbers the rich file. + test("getOrCreateTrace prefers rehydrateFromFile over startTrace on cache miss", async () => { + const workerSrc = await fs.readFile(path.join(ROOT, "src/cli/cmd/tui/worker.ts"), "utf-8") + // `rehydrateFromFile` is async (off-loads disk I/O from the event-stream + // hot path per cubic P2 review on tracing.ts:515) so the call must be + // awaited inside getOrCreateTrace. + expect(workerSrc).toMatch( + /if \(!\(await trace\.rehydrateFromFile\(sessionID\)\)\)\s*\{\s*\n\s*trace\.startTrace\(sessionID, \{\}\)\s*\n\s*\}/, + ) + }) + + // User-text-part branch must accept parts that have no `time.end` (user input + // never has a meaningful processing-end) and must capture the prompt via + // `setPrompt` — NOT `setTitle`. Otherwise the user text races the + // title-agent's auto-generated title from `session.updated` and overwrites + // it (e.g. "Greeting" → "hi"). See codex review feedback this round. + test("user text part is captured via setPrompt, drops time.end precondition, never touches title", async () => { + const workerSrc = await fs.readFile(path.join(ROOT, "src/cli/cmd/tui/worker.ts"), "utf-8") + + // The old shape gated on `part.time?.end` for the entire user/assistant + // branch — that shape must not be present anymore, because user-input + // parts never have `time.end` set. + expect(workerSrc).not.toMatch(/if \(part\.type === "text" && part\.time\?\.end\)/) + + // Broader guard: no `part.time?.end` check is permitted INSIDE the + // user-text branch (identified by `sessionUserMsgIds.get(...).has(...)`). + // Catches a nested `if (part.time?.end)` shape that would re-introduce + // the same drop. + expect(workerSrc).not.toMatch( + /sessionUserMsgIds\.get\(part\.sessionID\)\?\.has\(part\.messageID\)[\s\S]{0,400}part\.time\?\.end/, + ) + + // The user-text branch must call setPrompt (not setTitle) so the auto- + // generated session title from Path C isn't overwritten by raw user text. + expect(workerSrc).toMatch(/sessionUserMsgIds\.get\(part\.sessionID\)\?\.has\(part\.messageID\)[\s\S]{0,200}trace\.setPrompt/) + // The user-text branch must NOT call setTitle. + expect(workerSrc).not.toMatch( + /sessionUserMsgIds\.get\(part\.sessionID\)\?\.has\(part\.messageID\)[\s\S]{0,200}trace\.setTitle\(text/, + ) + }) + + // Major #1 from the multi-LLM consensus review (codex-verified). The user-text + // branch must NOT feed `setPrompt`/`logUserMessage` from synthetic or ignored + // parts — `Session.createUserMessage` (prompt.ts) attaches MCP resource banners, + // decoded file contents, retry/reminder text, and plan-mode reminders as + // `synthetic: true` text parts that share the user's `messageID`. Without the + // gate, `metadata.prompt` ends up holding the LAST synthetic part (typically + // a file blob) and the chat tab renders one fake "▶ You" bubble per synthetic + // span — defeating the two display surfaces this PR fixes. + test("user-text branch skips synthetic/ignored parts before calling setPrompt+logUserMessage", async () => { + const workerSrc = await fs.readFile(path.join(ROOT, "src/cli/cmd/tui/worker.ts"), "utf-8") + // The synthetic+ignored gate must build the `isAuthoredText` predicate + // from BOTH flags. Stronger than just searching for the literal anywhere. + expect(workerSrc).toMatch( + /const\s+isAuthoredText\s*=\s*!part\.synthetic\s*&&\s*!part\.ignored/, + ) + // Both write paths must sit inside the user-text branch (gated on the + // `sessionUserMsgIds...has(...)` membership check) AND inside an + // `if (text)` body whose contents don't cross block boundaries. + // The `[^{}]` bounds on the inner spans prevent the match from extending + // past the closing brace of the `if (text)` body, so unrelated + // `trace.setPrompt` / `trace.logUserMessage` calls elsewhere in the file + // can't satisfy this assertion — exactly what the previous loose regex + // allowed and what CodeRabbit flagged. + expect(workerSrc).toMatch( + /sessionUserMsgIds\.get\(part\.sessionID\)\?\.has\(part\.messageID\)[\s\S]{0,400}if\s*\(\s*text\s*\)\s*\{[^{}]*trace\.setPrompt[^{}]*trace\.logUserMessage/, + ) + }) + + test("Trace.setPrompt exists and only mutates metadata.prompt", async () => { + const tracingSrc = await fs.readFile( + path.join(ROOT, "src/altimate/observability/tracing.ts"), + "utf-8", + ) + // Method must exist with the documented signature. + expect(tracingSrc).toMatch(/setPrompt\(prompt: string\)\s*\{[\s\S]{0,200}this\.metadata\.prompt = prompt/) + // Must NOT touch metadata.title. + expect(tracingSrc).not.toMatch(/setPrompt\(prompt: string\)\s*\{[\s\S]{0,300}this\.metadata\.title/) + }) +})