Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions apps/cli/scripts/integration/cases/cancel-message-recovery-race.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import { runStreamCase, StreamEvent } from "../lib/stream-harness"

const START_PROMPT =
'Run exactly this command and do not summarize until it finishes: sleep 12 && echo "done". After it finishes, reply with exactly "done".'
const FOLLOWUP_PROMPT = 'After cancellation, reply with only "RACE-OK".'

async function main() {
const startRequestId = `start-${Date.now()}`
const cancelRequestId = `cancel-${Date.now()}`
const followupRequestId = `message-${Date.now()}`
const shutdownRequestId = `shutdown-${Date.now()}`

let initSeen = false
let sentCancelAndFollowup = false
let sentShutdown = false
let cancelDoneCode: string | undefined
let followupDoneCode: string | undefined
let followupResult = ""
let sawFollowupUserTurn = false
let sawMisroutedToolResult = false
let sawMessageControlError = false

await runStreamCase({
onEvent(event: StreamEvent, context) {
if (event.type === "system" && event.subtype === "init" && !initSeen) {
initSeen = true
context.sendCommand({
command: "start",
requestId: startRequestId,
prompt: START_PROMPT,
})
return
}

if (event.type === "control" && event.subtype === "error") {
if (event.requestId === followupRequestId) {
sawMessageControlError = true
}
throw new Error(
`received control error for requestId=${event.requestId ?? "unknown"} command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
)
}

if (
!sentCancelAndFollowup &&
event.type === "tool_use" &&
event.requestId === startRequestId &&
event.subtype === "command"
) {
context.sendCommand({
command: "cancel",
requestId: cancelRequestId,
})
context.sendCommand({
command: "message",
requestId: followupRequestId,
prompt: FOLLOWUP_PROMPT,
})
sentCancelAndFollowup = true
return
}

if (
event.type === "control" &&
event.command === "cancel" &&
event.subtype === "done" &&
event.requestId === cancelRequestId
) {
cancelDoneCode = event.code
return
}

if (
event.type === "control" &&
event.command === "message" &&
event.subtype === "done" &&
event.requestId === followupRequestId
) {
followupDoneCode = event.code
return
}

if (
event.type === "tool_result" &&
event.requestId === followupRequestId &&
typeof event.content === "string" &&
event.content.includes("<user_message>")
) {
sawMisroutedToolResult = true
return
}

if (event.type === "user" && event.requestId === followupRequestId) {
sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("RACE-OK")
return
}

if (event.type !== "result" || event.done !== true || event.requestId !== followupRequestId) {
return
}

followupResult = event.content ?? ""

if (followupResult.trim().length === 0) {
throw new Error("follow-up after cancel produced an empty result")
}
if (cancelDoneCode !== "cancel_requested") {
throw new Error(
`cancel done code mismatch; expected cancel_requested, got "${cancelDoneCode ?? "none"}"`,
)
}
if (followupDoneCode !== "responded" && followupDoneCode !== "queued") {
throw new Error(
`unexpected follow-up done code after cancel race; expected responded|queued, got "${followupDoneCode ?? "none"}"`,
)
}
if (sawMessageControlError) {
throw new Error("follow-up message emitted control error in cancel recovery race")
}
if (sawMisroutedToolResult) {
throw new Error(
"follow-up message was misrouted into tool_result (<user_message>) in cancel recovery race",
)
}
if (!sawFollowupUserTurn) {
throw new Error("follow-up after cancel did not appear as a normal user turn")
}

console.log(`[PASS] cancel done code: "${cancelDoneCode}"`)
console.log(`[PASS] follow-up done code: "${followupDoneCode}"`)
console.log(`[PASS] follow-up user turn observed: ${sawFollowupUserTurn}`)
console.log(`[PASS] follow-up result: "${followupResult}"`)

if (!sentShutdown) {
context.sendCommand({
command: "shutdown",
requestId: shutdownRequestId,
})
sentShutdown = true
}
},
onTimeoutMessage() {
return [
"timed out waiting for cancel-message-recovery-race validation",
`initSeen=${initSeen}`,
`sentCancelAndFollowup=${sentCancelAndFollowup}`,
`cancelDoneCode=${cancelDoneCode ?? "none"}`,
`followupDoneCode=${followupDoneCode ?? "none"}`,
`sawFollowupUserTurn=${sawFollowupUserTurn}`,
`sawMisroutedToolResult=${sawMisroutedToolResult}`,
`sawMessageControlError=${sawMessageControlError}`,
`haveFollowupResult=${Boolean(followupResult)}`,
].join(" ")
},
})
}

main().catch((error) => {
console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
process.exit(1)
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { runStreamCase, StreamEvent } from "../lib/stream-harness"

async function main() {
const cancelRequestId = `cancel-${Date.now()}`
const shutdownRequestId = `shutdown-${Date.now()}`

let initSeen = false
let cancelAckSeen = false
let cancelDoneSeen = false
let shutdownSent = false

await runStreamCase({
onEvent(event: StreamEvent, context) {
if (event.type === "system" && event.subtype === "init" && !initSeen) {
initSeen = true
context.sendCommand({
command: "cancel",
requestId: cancelRequestId,
})
return
}

if (
event.type === "control" &&
event.subtype === "ack" &&
event.command === "cancel" &&
event.requestId === cancelRequestId
) {
cancelAckSeen = true
return
}

if (
event.type === "control" &&
event.subtype === "done" &&
event.command === "cancel" &&
event.requestId === cancelRequestId
) {
cancelDoneSeen = true

if (event.code !== "no_active_task") {
throw new Error(`cancel without task should return no_active_task, got "${event.code ?? "none"}"`)
}
if (event.success !== true) {
throw new Error("cancel without task should be treated as successful no-op")
}

if (!shutdownSent) {
context.sendCommand({
command: "shutdown",
requestId: shutdownRequestId,
})
shutdownSent = true
}
return
}

if (event.type === "control" && event.subtype === "error") {
throw new Error(
`unexpected control error command=${event.command ?? "unknown"} code=${event.code ?? "unknown"} content=${event.content ?? ""}`,
)
}
},
onTimeoutMessage() {
return `timed out waiting for cancel-without-active-task validation (initSeen=${initSeen}, cancelAckSeen=${cancelAckSeen}, cancelDoneSeen=${cancelDoneSeen}, shutdownSent=${shutdownSent})`
},
})
}

main().catch((error) => {
console.error(`[FAIL] ${error instanceof Error ? error.message : String(error)}`)
process.exit(1)
})
60 changes: 47 additions & 13 deletions apps/cli/scripts/integration/cases/followup-after-completion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,9 @@ function parseEventContent(text: string | undefined): string {
return typeof text === "string" ? text : ""
}

function validateFollowupAnswer(text: string): void {
const normalized = text.toLowerCase()
const containsExpected = /\b6\b/.test(normalized) || normalized.includes("six")
const containsOldAnswer = /\b1\+1\b/.test(normalized) || /\b2\b/.test(normalized)
const containsQuestionReference = normalized.includes("3+3")

if (!containsExpected) {
throw new Error(`follow-up result did not answer the follow-up question; result="${text}"`)
}

if (!containsQuestionReference && containsOldAnswer && !containsExpected) {
throw new Error(`follow-up result appears anchored to first question; result="${text}"`)
function validateFollowupResult(text: string): void {
if (text.trim().length === 0) {
throw new Error("follow-up produced an empty result")
}
}

Expand All @@ -32,6 +23,9 @@ async function main() {
let sentShutdown = false
let firstResult = ""
let followupResult = ""
let followupDoneCode: string | undefined
let sawFollowupUserTurn = false
let sawMisroutedToolResult = false

await runStreamCase({
onEvent(event: StreamEvent, context) {
Expand All @@ -52,6 +46,31 @@ async function main() {
}

if (event.type !== "result" || event.done !== true) {
if (
event.type === "control" &&
event.requestId === followupRequestId &&
event.command === "message" &&
event.subtype === "done"
) {
followupDoneCode = event.code
return
}

if (
event.type === "tool_result" &&
event.requestId === followupRequestId &&
typeof event.content === "string" &&
event.content.includes("<user_message>")
) {
sawMisroutedToolResult = true
return
}

if (event.type === "user" && event.requestId === followupRequestId) {
sawFollowupUserTurn = typeof event.content === "string" && event.content.includes("3+3")
return
}

return
}

Expand All @@ -77,7 +96,22 @@ async function main() {
}

followupResult = parseEventContent(event.content)
validateFollowupAnswer(followupResult)
validateFollowupResult(followupResult)

if (followupDoneCode !== "responded") {
throw new Error(
`follow-up message was not routed as ask response; code="${followupDoneCode ?? "none"}"`,
)
}

if (!sawFollowupUserTurn) {
throw new Error("follow-up did not appear as a normal user turn in stream output")
}

if (sawMisroutedToolResult) {
throw new Error("follow-up message was misrouted into tool_result (<user_message>), old bug reproduced")
}

console.log(`[PASS] first result="${firstResult}"`)
console.log(`[PASS] follow-up result="${followupResult}"`)

Expand Down
Loading
Loading