diff --git a/apps/cli/scripts/integration/cases/cancel-message-recovery-race.ts b/apps/cli/scripts/integration/cases/cancel-message-recovery-race.ts new file mode 100644 index 0000000000..bb5f6f30c8 --- /dev/null +++ b/apps/cli/scripts/integration/cases/cancel-message-recovery-race.ts @@ -0,0 +1,161 @@ +import { runStreamCase, StreamEvent } from "../lib/stream-harness" + +const START_PROMPT = + 'Run exactly this command and do not summarize until it finishes: sleep 12 && echo "done". After it finishes, reply with exactly "done".' +const FOLLOWUP_PROMPT = 'After cancellation, reply with only "RACE-OK".' + +async function main() { + const startRequestId = `start-${Date.now()}` + const cancelRequestId = `cancel-${Date.now()}` + const followupRequestId = `message-${Date.now()}` + const shutdownRequestId = `shutdown-${Date.now()}` + + let initSeen = false + let sentCancelAndFollowup = false + let sentShutdown = false + let cancelDoneCode: string | undefined + let followupDoneCode: string | undefined + let followupResult = "" + let sawFollowupUserTurn = false + let sawMisroutedToolResult = false + let sawMessageControlError = false + + await runStreamCase({ + onEvent(event: StreamEvent, context) { + if (event.type === "system" && event.subtype === "init" && !initSeen) { + initSeen = true + context.sendCommand({ + command: "start", + requestId: startRequestId, + prompt: START_PROMPT, + }) + return + } + + if (event.type === "control" && event.subtype === "error") { + if (event.requestId === followupRequestId) { + sawMessageControlError = true + } + throw new Error( + `received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`, + ) + } + + if ( + !sentCancelAndFollowup && + event.type === "tool_use" && + event.requestId === startRequestId && + event.subtype === "command" + ) { + context.sendCommand({ + command: "cancel", + requestId: cancelRequestId, + }) + context.sendCommand({ + command: "message", + requestId: followupRequestId, + prompt: FOLLOWUP_PROMPT, + }) + sentCancelAndFollowup = true + return + } + + if ( + event.type === "control" && + event.command === "cancel" && + event.subtype === "done" && + event.requestId === cancelRequestId + ) { + cancelDoneCode = event.code + return + } + + if ( + event.type === "control" && + event.command === "message" && + event.subtype === "done" && + event.requestId === followupRequestId + ) { + followupDoneCode = event.code + return + } + + if ( + event.type === "tool_result" && + event.requestId === followupRequestId && + typeof event.content === "string" && + event.content.includes("") + ) { + sawMisroutedToolResult = true + return + } + + if (event.type === "user" && event.requestId === followupRequestId) { + sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("RACE-OK") + return + } + + if (event.type !== "result" || event.done !== true || event.requestId !== followupRequestId) { + return + } + + followupResult = event.content ?? "" + + if (followupResult.trim().length === 0) { + throw new Error("follow-up after cancel produced an empty result") + } + if (cancelDoneCode !== "cancel_requested") { + throw new Error( + `cancel done code mismatch; expected cancel_requested, got "${cancelDoneCode ?? "none"}"`, + ) + } + if (followupDoneCode !== "responded" && followupDoneCode !== "queued") { + throw new Error( + `unexpected follow-up done code after cancel race; expected responded|queued, got "${followupDoneCode ?? "none"}"`, + ) + } + if (sawMessageControlError) { + throw new Error("follow-up message emitted control error in cancel recovery race") + } + if (sawMisroutedToolResult) { + throw new Error( + "follow-up message was misrouted into tool_result () in cancel recovery race", + ) + } + if (!sawFollowupUserTurn) { + throw new Error("follow-up after cancel did not appear as a normal user turn") + } + + console.log(`[PASS] cancel done code: "${cancelDoneCode}"`) + console.log(`[PASS] follow-up done code: "${followupDoneCode}"`) + console.log(`[PASS] follow-up user turn observed: ${sawFollowupUserTurn}`) + console.log(`[PASS] follow-up result: "${followupResult}"`) + + if (!sentShutdown) { + context.sendCommand({ + command: "shutdown", + requestId: shutdownRequestId, + }) + sentShutdown = true + } + }, + onTimeoutMessage() { + return [ + "timed out waiting for cancel-message-recovery-race validation", + `initSeen=${initSeen}`, + `sentCancelAndFollowup=${sentCancelAndFollowup}`, + `cancelDoneCode=${cancelDoneCode ?? "none"}`, + `followupDoneCode=${followupDoneCode ?? "none"}`, + `sawFollowupUserTurn=${sawFollowupUserTurn}`, + `sawMisroutedToolResult=${sawMisroutedToolResult}`, + `sawMessageControlError=${sawMessageControlError}`, + `haveFollowupResult=${Boolean(followupResult)}`, + ].join(" ") + }, + }) +} + +main().catch((error) => { + console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`) + process.exit(1) +}) diff --git a/apps/cli/scripts/integration/cases/cancel-without-active-task.ts b/apps/cli/scripts/integration/cases/cancel-without-active-task.ts new file mode 100644 index 0000000000..5647adaca9 --- /dev/null +++ b/apps/cli/scripts/integration/cases/cancel-without-active-task.ts @@ -0,0 +1,73 @@ +import { runStreamCase, StreamEvent } from "../lib/stream-harness" + +async function main() { + const cancelRequestId = `cancel-${Date.now()}` + const shutdownRequestId = `shutdown-${Date.now()}` + + let initSeen = false + let cancelAckSeen = false + let cancelDoneSeen = false + let shutdownSent = false + + await runStreamCase({ + onEvent(event: StreamEvent, context) { + if (event.type === "system" && event.subtype === "init" && !initSeen) { + initSeen = true + context.sendCommand({ + command: "cancel", + requestId: cancelRequestId, + }) + return + } + + if ( + event.type === "control" && + event.subtype === "ack" && + event.command === "cancel" && + event.requestId === cancelRequestId + ) { + cancelAckSeen = true + return + } + + if ( + event.type === "control" && + event.subtype === "done" && + event.command === "cancel" && + event.requestId === cancelRequestId + ) { + cancelDoneSeen = true + + if (event.code !== "no_active_task") { + throw new Error(`cancel without task should return no_active_task, got "${event.code ?? "none"}"`) + } + if (event.success !== true) { + throw new Error("cancel without task should be treated as successful no-op") + } + + if (!shutdownSent) { + context.sendCommand({ + command: "shutdown", + requestId: shutdownRequestId, + }) + shutdownSent = true + } + return + } + + if (event.type === "control" && event.subtype === "error") { + throw new Error( + `unexpected control error command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`, + ) + } + }, + onTimeoutMessage() { + return `timed out waiting for cancel-without-active-task validation (initSeen=${initSeen}, cancelAckSeen=${cancelAckSeen}, cancelDoneSeen=${cancelDoneSeen}, shutdownSent=${shutdownSent})` + }, + }) +} + +main().catch((error) => { + console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`) + process.exit(1) +}) diff --git a/apps/cli/scripts/integration/cases/followup-after-completion.ts b/apps/cli/scripts/integration/cases/followup-after-completion.ts index ce4ea57294..af8e0696bd 100644 --- a/apps/cli/scripts/integration/cases/followup-after-completion.ts +++ b/apps/cli/scripts/integration/cases/followup-after-completion.ts @@ -7,18 +7,9 @@ function parseEventContent(text: string | undefined): string { return typeof text === "string" ? text : "" } -function validateFollowupAnswer(text: string): void { - const normalized = text.toLowerCase() - const containsExpected = /\b6\b/.test(normalized) || normalized.includes("six") - const containsOldAnswer = /\b1\+1\b/.test(normalized) || /\b2\b/.test(normalized) - const containsQuestionReference = normalized.includes("3+3") - - if (!containsExpected) { - throw new Error(`follow-up result did not answer the follow-up question; result="${text}"`) - } - - if (!containsQuestionReference && containsOldAnswer && !containsExpected) { - throw new Error(`follow-up result appears anchored to first question; result="${text}"`) +function validateFollowupResult(text: string): void { + if (text.trim().length === 0) { + throw new Error("follow-up produced an empty result") } } @@ -32,6 +23,9 @@ async function main() { let sentShutdown = false let firstResult = "" let followupResult = "" + let followupDoneCode: string | undefined + let sawFollowupUserTurn = false + let sawMisroutedToolResult = false await runStreamCase({ onEvent(event: StreamEvent, context) { @@ -52,6 +46,31 @@ async function main() { } if (event.type !== "result" || event.done !== true) { + if ( + event.type === "control" && + event.requestId === followupRequestId && + event.command === "message" && + event.subtype === "done" + ) { + followupDoneCode = event.code + return + } + + if ( + event.type === "tool_result" && + event.requestId === followupRequestId && + typeof event.content === "string" && + event.content.includes("") + ) { + sawMisroutedToolResult = true + return + } + + if (event.type === "user" && event.requestId === followupRequestId) { + sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("3+3") + return + } + return } @@ -77,7 +96,22 @@ async function main() { } followupResult = parseEventContent(event.content) - validateFollowupAnswer(followupResult) + validateFollowupResult(followupResult) + + if (followupDoneCode !== "responded") { + throw new Error( + `follow-up message was not routed as ask response; code="${followupDoneCode ?? "none"}"`, + ) + } + + if (!sawFollowupUserTurn) { + throw new Error("follow-up did not appear as a normal user turn in stream output") + } + + if (sawMisroutedToolResult) { + throw new Error("follow-up message was misrouted into tool_result (), old bug reproduced") + } + console.log(`[PASS] first result="${firstResult}"`) console.log(`[PASS] follow-up result="${followupResult}"`) diff --git a/apps/cli/scripts/integration/cases/followup-completion-ask-response-images.ts b/apps/cli/scripts/integration/cases/followup-completion-ask-response-images.ts new file mode 100644 index 0000000000..55b1ccf94c --- /dev/null +++ b/apps/cli/scripts/integration/cases/followup-completion-ask-response-images.ts @@ -0,0 +1,136 @@ +import { runStreamCase, StreamEvent } from "../lib/stream-harness" + +const START_PROMPT = 'Answer this question and finish: What is 1+1? Reply with only "2", then complete the task.' +const FOLLOWUP_PROMPT = 'Different question now: what is 3+3? Reply with only "6".' +const ONE_PIXEL_IMAGE = + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAusB9Y9R4WQAAAAASUVORK5CYII=" + +async function main() { + const startRequestId = `start-${Date.now()}` + const followupRequestId = `message-${Date.now()}` + const shutdownRequestId = `shutdown-${Date.now()}` + + let initSeen = false + let sentFollowup = false + let sentShutdown = false + let followupDoneCode: string | undefined + let sawFollowupUserTurn = false + let sawMisroutedToolResult = false + let sawQueueImageMetadata = false + let shutdownDoneSeen = false + + await runStreamCase({ + onEvent(event: StreamEvent, context) { + if (event.type === "system" && event.subtype === "init" && !initSeen) { + initSeen = true + context.sendCommand({ + command: "start", + requestId: startRequestId, + prompt: START_PROMPT, + }) + return + } + + if (event.type === "control" && event.subtype === "error") { + throw new Error( + `received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`, + ) + } + + if ( + event.type === "control" && + event.command === "message" && + event.subtype === "done" && + event.requestId === followupRequestId + ) { + followupDoneCode = event.code + if (!sentShutdown) { + context.sendCommand({ + command: "shutdown", + requestId: shutdownRequestId, + }) + sentShutdown = true + } + return + } + + if ( + event.type === "control" && + event.command === "shutdown" && + event.subtype === "done" && + event.requestId === shutdownRequestId + ) { + shutdownDoneSeen = true + + if (followupDoneCode !== "responded") { + throw new Error( + `follow-up image message was not routed as ask response; code="${followupDoneCode ?? "none"}"`, + ) + } + if (sawQueueImageMetadata) { + throw new Error("follow-up image message was unexpectedly queued (observed queue image metadata)") + } + if (sawMisroutedToolResult) { + throw new Error("follow-up image message was misrouted into tool_result ()") + } + + console.log(`[PASS] follow-up image control code: "${followupDoneCode}"`) + console.log(`[PASS] follow-up image user turn observed before shutdown: ${sawFollowupUserTurn}`) + return + } + + if ( + event.type === "queue" && + Array.isArray(event.queue) && + event.queue.some((item) => item?.imageCount === 1) + ) { + sawQueueImageMetadata = true + return + } + + if ( + event.type === "tool_result" && + event.requestId === followupRequestId && + typeof event.content === "string" && + event.content.includes("") + ) { + sawMisroutedToolResult = true + return + } + + if (event.type === "user" && event.requestId === followupRequestId) { + sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("3+3") + return + } + + if (event.type === "result" && event.done === true && event.requestId === startRequestId && !sentFollowup) { + context.sendCommand({ + command: "message", + requestId: followupRequestId, + prompt: FOLLOWUP_PROMPT, + images: [ONE_PIXEL_IMAGE], + }) + sentFollowup = true + return + } + }, + onTimeoutMessage() { + return [ + "timed out waiting for followup-completion-ask-response-images validation", + `initSeen=${initSeen}`, + `sentFollowup=${sentFollowup}`, + `sentShutdown=${sentShutdown}`, + `shutdownDoneSeen=${shutdownDoneSeen}`, + `followupDoneCode=${followupDoneCode ?? "none"}`, + `sawFollowupUserTurn=${sawFollowupUserTurn}`, + `sawMisroutedToolResult=${sawMisroutedToolResult}`, + `sawQueueImageMetadata=${sawQueueImageMetadata}`, + ].join(" ") + }, + }) +} + +main().catch((error) => { + console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`) + process.exit(1) +}) diff --git a/apps/cli/scripts/integration/cases/followup-completion-ask-response.ts b/apps/cli/scripts/integration/cases/followup-completion-ask-response.ts index 5322110292..8b2410f0d0 100644 --- a/apps/cli/scripts/integration/cases/followup-completion-ask-response.ts +++ b/apps/cli/scripts/integration/cases/followup-completion-ask-response.ts @@ -16,6 +16,7 @@ async function main() { let followupDoneCode: string | undefined let sawFollowupUserTurn = false let sawMisroutedToolResult = false + let sawQueueEventForFollowupRequest = false let followupResult = "" await runStreamCase({ @@ -54,6 +55,11 @@ async function main() { return } + if (event.type === "queue" && event.requestId === followupRequestId) { + sawQueueEventForFollowupRequest = true + return + } + if ( event.type === "tool_result" && event.requestId === followupRequestId && @@ -97,6 +103,9 @@ async function main() { if (sawMisroutedToolResult) { throw new Error("follow-up message was misrouted into tool_result (), old bug reproduced") } + if (sawQueueEventForFollowupRequest) { + throw new Error("follow-up message produced queue events despite responded routing") + } if (!sawFollowupUserTurn) { throw new Error("follow-up did not appear as a normal user turn in stream output") @@ -131,6 +140,7 @@ async function main() { `followupDoneCode=${followupDoneCode ?? "none"}`, `sawFollowupUserTurn=${sawFollowupUserTurn}`, `sawMisroutedToolResult=${sawMisroutedToolResult}`, + `sawQueueEventForFollowupRequest=${sawQueueEventForFollowupRequest}`, `haveFollowupResult=${Boolean(followupResult)}`, ].join(" ") }, diff --git a/apps/cli/scripts/integration/cases/mixed-command-ordering.ts b/apps/cli/scripts/integration/cases/mixed-command-ordering.ts new file mode 100644 index 0000000000..3166e78031 --- /dev/null +++ b/apps/cli/scripts/integration/cases/mixed-command-ordering.ts @@ -0,0 +1,148 @@ +import { runStreamCase, StreamEvent } from "../lib/stream-harness" + +const START_PROMPT = + 'Run exactly this command and do not summarize until it finishes: sleep 8 && echo "done". After it finishes, reply with exactly "done".' + +async function main() { + const startRequestId = `start-${Date.now()}` + const pingARequestId = `ping-a-${Date.now()}` + const messageRequestId = `message-${Date.now()}` + const pingBRequestId = `ping-b-${Date.now()}` + const shutdownRequestId = `shutdown-${Date.now()}` + + let initSeen = false + let sentInterleavedCommands = false + let sentShutdown = false + + const eventOrderByRequestId = new Map() + let messageDoneCode: string | undefined + let messageQueueEnqueuedSeen = false + let messageResultSeen = false + + function recordControlEvent(event: StreamEvent): void { + if (!event.requestId || event.type !== "control" || !event.subtype) { + return + } + const existing = eventOrderByRequestId.get(event.requestId) ?? [] + existing.push(event.subtype) + eventOrderByRequestId.set(event.requestId, existing) + } + + await runStreamCase({ + onEvent(event: StreamEvent, context) { + if (event.type === "system" && event.subtype === "init" && !initSeen) { + initSeen = true + context.sendCommand({ + command: "start", + requestId: startRequestId, + prompt: START_PROMPT, + }) + return + } + + recordControlEvent(event) + + if (event.type === "control" && event.subtype === "error") { + throw new Error( + `received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`, + ) + } + + if ( + !sentInterleavedCommands && + event.type === "control" && + event.subtype === "ack" && + event.command === "start" && + event.requestId === startRequestId + ) { + context.sendCommand({ + command: "ping", + requestId: pingARequestId, + }) + context.sendCommand({ + command: "message", + requestId: messageRequestId, + prompt: 'When this queued message is processed, reply with only "INTERLEAVED".', + }) + context.sendCommand({ + command: "ping", + requestId: pingBRequestId, + }) + sentInterleavedCommands = true + return + } + + if ( + event.type === "control" && + event.subtype === "done" && + event.command === "message" && + event.requestId === messageRequestId + ) { + messageDoneCode = event.code + return + } + + if ( + event.type === "queue" && + event.subtype === "enqueued" && + event.requestId === startRequestId && + event.queueDepth === 1 + ) { + messageQueueEnqueuedSeen = true + return + } + + if (event.type === "result" && event.done === true && event.requestId === messageRequestId) { + messageResultSeen = true + + const pingAOrder = eventOrderByRequestId.get(pingARequestId) ?? [] + const pingBOrder = eventOrderByRequestId.get(pingBRequestId) ?? [] + const messageOrder = eventOrderByRequestId.get(messageRequestId) ?? [] + + if (pingAOrder.join(",") !== "ack,done") { + throw new Error(`ping A control order mismatch: ${pingAOrder.join(",") || "none"}`) + } + if (pingBOrder.join(",") !== "ack,done") { + throw new Error(`ping B control order mismatch: ${pingBOrder.join(",") || "none"}`) + } + if (messageOrder.join(",") !== "ack,done") { + throw new Error(`message control order mismatch: ${messageOrder.join(",") || "none"}`) + } + if (messageDoneCode !== "queued") { + throw new Error( + `expected interleaved message done code \"queued\", got \"${messageDoneCode ?? "none"}\"`, + ) + } + if (!messageQueueEnqueuedSeen) { + throw new Error("expected queue enqueued event after interleaved message") + } + + if (!sentShutdown) { + context.sendCommand({ + command: "shutdown", + requestId: shutdownRequestId, + }) + sentShutdown = true + } + } + }, + onTimeoutMessage() { + return [ + "timed out waiting for mixed-command-ordering validation", + `initSeen=${initSeen}`, + `sentInterleavedCommands=${sentInterleavedCommands}`, + `messageDoneCode=${messageDoneCode ?? "none"}`, + `messageQueueEnqueuedSeen=${messageQueueEnqueuedSeen}`, + `messageResultSeen=${messageResultSeen}`, + `pingAOrder=${(eventOrderByRequestId.get(pingARequestId) ?? []).join(",") || "none"}`, + `messageOrder=${(eventOrderByRequestId.get(messageRequestId) ?? []).join(",") || "none"}`, + `pingBOrder=${(eventOrderByRequestId.get(pingBRequestId) ?? []).join(",") || "none"}`, + ].join(" ") + }, + }) +} + +main().catch((error) => { + console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`) + process.exit(1) +}) diff --git a/apps/cli/src/commands/cli/__tests__/parse-stdin-command.test.ts b/apps/cli/src/commands/cli/__tests__/parse-stdin-command.test.ts index f46c46158e..408ab1bc6e 100644 --- a/apps/cli/src/commands/cli/__tests__/parse-stdin-command.test.ts +++ b/apps/cli/src/commands/cli/__tests__/parse-stdin-command.test.ts @@ -168,8 +168,16 @@ describe("shouldSendMessageAsAskResponse", () => { expect(shouldSendMessageAsAskResponse(true, "completion_result")).toBe(true) }) - it("routes followup asks as ask responses", () => { - expect(shouldSendMessageAsAskResponse(true, "followup")).toBe(true) + it.each([ + "followup", + "tool", + "command", + "use_mcp_server", + "resume_task", + "resume_completed_task", + "mistake_limit_reached", + ])("routes %s asks as ask responses", (ask) => { + expect(shouldSendMessageAsAskResponse(true, ask)).toBe(true) }) it("does not route when not waiting for input", () => {