open-gitagent
diff --git a/‎.changeset/multi-turn-replay-fix.md‎
Lines changed: 24 additions & 0 deletions b/‎.changeset/multi-turn-replay-fix.md‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎examples/marketing-agent.ts‎
Lines changed: 102 additions & 38 deletions b/‎examples/marketing-agent.ts‎
Lines changed: 102 additions & 38 deletions
diff --git a/‎packages/sdk/src/computer-agent.test.ts‎
Lines changed: 41 additions & 0 deletions b/‎packages/sdk/src/computer-agent.test.ts‎
Lines changed: 41 additions & 0 deletions
@@ -0,0 +1,24 @@
+---
+"@computeragent/sdk": patch
+---
+
+Fix multi-turn `agent.chat()` replaying first-turn events (issue #2).
+
+Sequential `agent.chat()` calls on the same agent now produce distinct
+responses. Previously the second `chat()` would re-yield the first turn's
+events from the SSE replay buffer, so every turn returned the same answer.
+
+The fix:
+- Sessions are always created in streaming-input mode internally so the
+  engine stays alive across turns
+- Every `chat()` pushes via `/messages` (never via the createSession body)
+- `/events` is opened with `Last-Event-ID: <highest seen>` so the replay
+  buffer skips events the SDK already saw
+- Each chat handle's iterable terminates on the turn's `result` SDK message
+  (synthesizing a `ca_session_ended` for `ChatHandle.drain()`)
+- `dispose()` POSTs `/end-input` so the engine drains cleanly
+
+`consumeSseEvents` now yields `{ id?, event }` envelopes instead of bare
+events — the only in-package caller (`ComputerAgent.openTurnEventStream`)
+uses the id for `Last-Event-ID` tracking. Other consumers were not affected
+because the function was internal.
@@ -62,11 +62,14 @@ function yellow(s: string): string {
 async function streamTurn(
   agent: ComputerAgent,
   message: string,
-): Promise<{ sessionId: string; result: string; harnessUrl: string }> {
+): Promise<{ sessionId: string; result: string; harnessUrl: string; ok: boolean }> {
   console.log(`\n${yellow("You:")} ${message}\n`);
 
   let result = "";
+  let assistantText = "";
   let sessionId = "";
+  let ok = true;
+  let endError: string | undefined;
 
   const handle = agent.chat(message);
 
@@ -82,15 +85,14 @@ async function streamTurn(
         for (const block of msg?.content ?? []) {
           const b = block as { type?: string; text?: string; name?: string; input?: unknown };
           if (b.type === "text" && b.text) {
-            // Stream text progressively — print in chunks
             process.stdout.write(b.text);
+            assistantText += b.text;
           } else if (b.type === "tool_use") {
             const input = JSON.stringify(b.input ?? {});
             console.log(`\n${dim(`  → ${b.name}(${input.slice(0, 120)}${input.length > 120 ? "…" : ""})`)}`);
           }
         }
       } else if (p.type === "tool") {
-        // tool result
         const content = typeof p.content === "string" ? p.content : JSON.stringify(p.content);
         console.log(dim(`    ← ${content.slice(0, 200)}${content.length > 200 ? "…" : ""}`));
       } else if (p.type === "result" && typeof p.result === "string") {
@@ -102,12 +104,43 @@ async function streamTurn(
         console.log(`\n${dim(`[usage: in=${u.input_tokens} out=${u.output_tokens} cost=$${u.cost_usd?.toFixed(4)}]`)}`);
       }
     } else if (ev.kind === "ca_session_ended") {
-      console.log(`\n${dim(`[ended: ${ev.reason}]`)}`);
+      if (ev.reason !== "complete") {
+        ok = false;
+        endError = ev.errorMessage ?? ev.reason;
+      }
+      console.log(`\n${dim(`[ended: ${ev.reason}${ev.errorMessage ? ` — ${ev.errorMessage}` : ""}]`)}`);
     }
   }
 
+  // Fall back to assembled assistant text if there was no terminal `result`.
+  // Helps when the engine errors mid-turn — we still surface what came through.
+  if (!result && assistantText) result = assistantText;
+
   const harnessUrl = await agent.harnessUrl();
-  return { sessionId, result, harnessUrl };
+  if (!ok) {
+    console.log(`\n${dim(`[turn failed: ${endError ?? "unknown"} — saved files may be empty/partial]`)}`);
+  }
+  return { sessionId, result, harnessUrl, ok };
+}
+
+interface FsEntry {
+  path: string;
+  type: string;
+  size: number;
+}
+
+/** Snapshot of the workdir tree at a point in time. Used to diff at session end. */
+async function snapshotWorkdir(harnessUrl: string, sessionId: string): Promise<Map<string, FsEntry>> {
+  const out = new Map<string, FsEntry>();
+  try {
+    const res = await fetch(`${harnessUrl}/v1/sessions/${sessionId}/fs/tree?depth=10`);
+    if (!res.ok) return out;
+    const tree = (await res.json()) as { entries: FsEntry[] };
+    for (const e of tree.entries) out.set(e.path, e);
+  } catch {
+    // FS API unavailable — return empty snapshot; downstream diff will skip.
+  }
+  return out;
 }
 
 async function saveOutput(filename: string, content: string): Promise<void> {
@@ -116,6 +149,21 @@ async function saveOutput(filename: string, content: string): Promise<void> {
   console.log(`\n${green("✓")} Saved → ${path}`);
 }
 
+async function saveTurnOutput(
+  filename: string,
+  header: string,
+  body: string,
+): Promise<void> {
+  if (!body.trim()) {
+    console.log(`\n${dim(`(skipped ${filename} — empty response from agent)`)}`);
+    return;
+  }
+  await saveOutput(
+    filename,
+    `# ${header}\n\nGenerated by marketing-agent via ComputerAgent\n\n---\n\n${body}\n`,
+  );
+}
+
 async function fetchAgentFile(harnessUrl: string, sessionId: string, path: string): Promise<string | null> {
   try {
     const res = await fetch(`${harnessUrl}/v1/sessions/${sessionId}/fs/file?path=${encodeURIComponent(path)}`);
@@ -157,12 +205,19 @@ await using agent = new ComputerAgent({
   ...(priorSessionId ? { sessionId: priorSessionId } : {}),
 });
 
+// Pre-session-start snapshot is empty by definition (we don't have a sessionId
+// yet). The snapshot is taken right AFTER turn 1 — at that point the harness
+// has materialized the GAP repo into the workdir but the agent hasn't yet
+// produced any deliverable files. Anything that appears after this snapshot
+// is something the AGENT wrote (issue #3 fix).
+let workdirBaseline: Map<string, FsEntry> = new Map();
+
 // ── Turn 1: Product context ───────────────────────────────────────────────────
 
 if (!RESUME) {
   hr("Turn 1 — Product Context");
 
-  const { sessionId } = await streamTurn(
+  const { sessionId, harnessUrl } = await streamTurn(
     agent,
     `Here is our product context. Please confirm you've loaded it and identify the top 3
 marketing challenges you'd recommend we tackle first.
@@ -176,6 +231,10 @@ Current channels: mostly outbound sales, some inbound content, limited PLG motio
 Top conversion barrier: enterprises want a PoC before committing — long evaluation cycles (60-90 days).`,
   );
 
+  // Snapshot once — captures the materialized GAP repo (agent.yaml, SOUL.md,
+  // RULES.md, .claude/skills/**, .git/**) so the final FS sweep can exclude it.
+  workdirBaseline = await snapshotWorkdir(harnessUrl, sessionId);
+
   await writeFile(SESSION_FILE, sessionId, "utf8");
   console.log(`\nSession ID saved → ${SESSION_FILE}`);
 }
@@ -192,9 +251,10 @@ Angle: they are likely already using LangChain or CrewAI and hitting reliability
 Format each email with: Subject line, Body, Send timing.`,
 );
 
-await saveOutput(
+await saveTurnOutput(
   "cold-email-sequence.md",
-  `# Cold Email Sequence — VP Engineering @ Series B+ SaaS\n\nGenerated by marketing-agent via ComputerAgent\n\n---\n\n${emailResult}`,
+  "Cold Email Sequence — VP Engineering @ Series B+ SaaS",
+  emailResult,
 );
 
 // ── Turn 3: Pricing strategy ──────────────────────────────────────────────────
@@ -213,10 +273,7 @@ const { result: pricingResult } = await streamTurn(
 - A/B test ideas for the pricing page`,
 );
 
-await saveOutput(
-  "pricing-strategy.md",
-  `# Pricing Strategy — Lyzr AI\n\nGenerated by marketing-agent via ComputerAgent\n\n---\n\n${pricingResult}`,
-);
+await saveTurnOutput("pricing-strategy.md", "Pricing Strategy — Lyzr AI", pricingResult);
 
 // ── Turn 4: Launch strategy for self-serve tier ───────────────────────────────
 
@@ -231,40 +288,47 @@ Include: Pre-launch checklist, launch day playbook, week-by-week activation plan
 success metrics, and the top 3 risks with mitigations.`,
 );
 
-await saveOutput(
+await saveTurnOutput(
   "launch-strategy.md",
-  `# Self-Serve Launch Strategy — 30-Day Plan\n\nGenerated by marketing-agent via ComputerAgent\n\n---\n\n${launchResult}`,
+  "Self-Serve Launch Strategy — 30-Day Plan",
+  launchResult,
 );
 
-// ── Bonus: check if agent wrote any files to its workdir ─────────────────────
+// ── Bonus: capture files the AGENT wrote (not the materialized GAP repo) ─────
+//
+// Diffs the current workdir against the baseline snapshot taken after turn 1.
+// Anything new (or grown) is something the agent itself produced via its
+// Write/Bash tools. The materialized GAP repo (agent.yaml, SKILL.md files,
+// .git/**) is excluded. See issue #3 for context.
 
 hr("Harness Filesystem");
 
-try {
-  const treeRes = await fetch(
-    `${harnessUrl}/v1/sessions/${finalSession}/fs/tree?depth=2`,
-  );
-  if (treeRes.ok) {
-    const tree = (await treeRes.json()) as { entries: { path: string; type: string; size: number }[] };
-    const interesting = tree.entries.filter((e) => e.path !== "/" && !e.path.startsWith("/."));
-    if (interesting.length > 0) {
-      console.log("\nFiles the agent wrote to its workspace:");
-      for (const e of interesting) {
-        console.log(`  ${e.type.padEnd(4)} ${e.path.padEnd(50)} ${e.size}b`);
-        if (e.type === "file") {
-          const content = await fetchAgentFile(harnessUrl, finalSession, e.path);
-          if (content) {
-            const outName = e.path.replace(/^\//, "").replace(/\//g, "-");
-            await saveOutput(`agent-workdir-${outName}`, content);
-          }
-        }
-      }
-    } else {
-      console.log(dim("  (agent workdir is empty — all output was inline text)"));
+const finalTree = await snapshotWorkdir(harnessUrl, finalSession);
+const newFiles: FsEntry[] = [];
+for (const [path, entry] of finalTree) {
+  if (entry.type !== "file") continue;
+  const before = workdirBaseline.get(path);
+  if (!before) {
+    newFiles.push(entry);   // brand new file
+  } else if (before.size !== entry.size) {
+    newFiles.push(entry);   // existed before but was modified
+  }
+}
+
+if (newFiles.length === 0) {
+  console.log(dim("  (agent didn't write any files to its workspace — all output was inline text)"));
+  console.log(dim("  Note: the materialized GAP repo is excluded from this view."));
+} else {
+  console.log("\nFiles the agent wrote during this session:");
+  for (const e of newFiles) {
+    console.log(`  file ${e.path.padEnd(50)} ${e.size}b`);
+    const content = await fetchAgentFile(harnessUrl, finalSession, e.path);
+    if (content) {
+      // Drop leading slash; preserve subdirectory structure with "/" → "_"
+      const outName = e.path.replace(/^\//, "").replace(/\//g, "_");
+      await saveOutput(outName, content);
     }
   }
-} catch {
-  // FS API optional — harness may not expose it in all configurations
 }
 
 // ── Summary ───────────────────────────────────────────────────────────────────
 
@@ -157,6 +157,47 @@ describe("ComputerAgent — multi-turn", () => {
     expect(first.sessionId).toMatch(/^sess_/);
     expect(agent.sessionId).toBe(first.sessionId);
   });
+
+  it("two sequential .chat() calls produce distinct responses (issue #2)", async () => {
+    // Two turns scripted in one engine session. The engine waits for each
+    // user message in turn, then emits a result. If the SDK's multi-turn
+    // wiring is right, turn 2's response must be "response-2", not the
+    // replayed "response-1" from turn 1.
+    const engine = new MockEngine([
+      { kind: "wait_for_user_message" },
+      { kind: "emit", payload: { type: "result", text: "response-1" } },
+      { kind: "wait_for_user_message" },
+      { kind: "emit", payload: { type: "result", text: "response-2" } },
+    ]);
+    serverHandle = await bootServer(engine);
+
+    const agent = new ComputerAgent({
+      source: { type: "local", path: "/tmp" },
+      harness: "mock",
+      identityLoader: "mock",
+      harnessUrl: serverHandle.url,
+    });
+
+    const r1 = await agent.chat("turn 1");
+    const r1Text = (r1.messages.find(
+      (m): m is { type: "result"; text: string } =>
+        (m as { type?: string }).type === "result",
+    ))?.text;
+    expect(r1Text).toBe("response-1");
+
+    const r2 = await agent.chat("turn 2");
+    const r2Text = (r2.messages.find(
+      (m): m is { type: "result"; text: string } =>
+        (m as { type?: string }).type === "result",
+    ))?.text;
+    expect(r2Text).toBe("response-2");
+
+    // Same session across both turns
+    expect(r1.sessionId).toBe(r2.sessionId);
+
+    // Engine actually saw both user messages
+    expect(engine.received.userMessages).toHaveLength(2);
+  });
 });
 
 describe("ComputerAgent — Substrate runtime", () => {