diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md index 7009ec86..e6a2c467 100644 --- a/plugins/codex/agents/codex-rescue.md +++ b/plugins/codex/agents/codex-rescue.md @@ -21,10 +21,11 @@ Forwarding rules: - Use exactly one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...`. - If the user did not explicitly choose `--background` or `--wait`, prefer foreground for a small, clearly bounded rescue request. -- If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution. +- If the user explicitly chose `--background`, call `task --background --await` so the Bash command remains alive until the Codex daemon job reaches a terminal state. +- If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer `task --background --await`. - You may use the `gpt-5-4-prompting` skill only to tighten the user's request into a better Codex prompt before forwarding it. - Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work beyond shaping the forwarded prompt text. -- Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own. +- Do not inspect the repository, read files, grep, cancel jobs, summarize output, or do any follow-up work of your own. - Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`. This subagent only forwards to `task`. - Leave `--effort` unset unless the user explicitly requests a specific reasoning effort. - Leave model unset by default. Only add `--model` when the user explicitly asks for a specific model. diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index 56de9555..cf2efb92 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -13,10 +13,10 @@ $ARGUMENTS Execution mode: -- If the request includes `--background`, run the `codex:codex-rescue` subagent in the background. +- If the request includes `--background`, run the `codex:codex-rescue` subagent in the background and make the forwarded task command use `task --background --await`. - If the request includes `--wait`, run the `codex:codex-rescue` subagent in the foreground. - If neither flag is present, default to foreground. -- `--background` and `--wait` are execution flags for Claude Code. Do not forward them to `task`, and do not treat them as part of the natural-language task text. +- `--background` and `--wait` are execution flags for Claude Code. Do not treat them as part of the natural-language task text. - `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text. - If the request includes `--resume`, do not ask whether to continue. The user already chose. - If the request includes `--fresh`, do not ask whether to continue. The user already chose. @@ -39,9 +39,10 @@ node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task-resume-candidate - Operating rules: - The subagent is a thin forwarder only. It should use one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...` and return that command's stdout as-is. +- For background requests, the single `Bash` call must invoke `task --background --await ...` so the command completes only when the Codex daemon job reaches a terminal state. - Return the Codex companion stdout verbatim to the user. - Do not paraphrase, summarize, rewrite, or add commentary before or after it. -- Do not ask the subagent to inspect files, monitor progress, poll `/codex:status`, fetch `/codex:result`, call `/codex:cancel`, summarize output, or do follow-up work of its own. +- Do not ask the subagent to inspect files, call `/codex:cancel`, summarize output, or do follow-up work of its own. - Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort. - Leave the model unset unless the user explicitly asks for one. If they ask for `spark`, map it to `gpt-5.3-codex-spark`. - Leave `--resume` and `--fresh` in the forwarded request. The subagent handles that routing when it builds the `task` command. diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..f27ae884 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -65,6 +65,7 @@ import { const ROOT_DIR = path.resolve(fileURLToPath(new URL("..", import.meta.url))); const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json"); const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000; +const DEFAULT_AWAIT_TIMEOUT_MS = 6 * 60 * 60 * 1000; const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000; const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]); const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]); @@ -77,8 +78,9 @@ function printUsage() { " node scripts/codex-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]", " node scripts/codex-companion.mjs review [--wait|--background] [--base ] [--scope ]", " node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [focus text]", - " node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", + " node scripts/codex-companion.mjs task [--background [--await]] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", " node scripts/codex-companion.mjs status [job-id] [--all] [--json]", + " node scripts/codex-companion.mjs await [--timeout-ms ] [--poll-interval-ms ] [--json]", " node scripts/codex-companion.mjs result [job-id] [--json]", " node scripts/codex-companion.mjs cancel [job-id] [--json]" ].join("\n") @@ -288,6 +290,26 @@ function isActiveJobStatus(status) { return status === "queued" || status === "running"; } +function isTerminalJobStatus(status) { + return status === "completed" || status === "failed" || status === "cancelled"; +} + +function normalizeTimeoutMs(value, fallback) { + if (value == null) { + return fallback; + } + const parsed = Number(value); + return Number.isFinite(parsed) ? Math.max(0, parsed) : fallback; +} + +function normalizePollIntervalMs(value) { + if (value == null) { + return DEFAULT_STATUS_POLL_INTERVAL_MS; + } + const parsed = Number(value); + return Number.isFinite(parsed) ? Math.max(100, parsed) : DEFAULT_STATUS_POLL_INTERVAL_MS; +} + function getCurrentClaudeSessionId() { return process.env[SESSION_ID_ENV] ?? null; } @@ -313,8 +335,8 @@ function findLatestResumableTaskJob(jobs) { } async function waitForSingleJobSnapshot(cwd, reference, options = {}) { - const timeoutMs = Math.max(0, Number(options.timeoutMs) || DEFAULT_STATUS_WAIT_TIMEOUT_MS); - const pollIntervalMs = Math.max(100, Number(options.pollIntervalMs) || DEFAULT_STATUS_POLL_INTERVAL_MS); + const timeoutMs = normalizeTimeoutMs(options.timeoutMs, DEFAULT_STATUS_WAIT_TIMEOUT_MS); + const pollIntervalMs = normalizePollIntervalMs(options.pollIntervalMs); const deadline = Date.now() + timeoutMs; let snapshot = buildSingleJobSnapshot(cwd, reference); @@ -330,6 +352,53 @@ async function waitForSingleJobSnapshot(cwd, reference, options = {}) { }; } +async function waitForTerminalJobSnapshot(cwd, reference, options = {}) { + const timeoutMs = normalizeTimeoutMs(options.timeoutMs, DEFAULT_AWAIT_TIMEOUT_MS); + const pollIntervalMs = normalizePollIntervalMs(options.pollIntervalMs); + const deadline = Date.now() + timeoutMs; + let snapshot = buildSingleJobSnapshot(cwd, reference); + + while (!isTerminalJobStatus(snapshot.job.status) && Date.now() < deadline) { + await sleep(Math.min(pollIntervalMs, Math.max(0, deadline - Date.now()))); + snapshot = buildSingleJobSnapshot(cwd, reference); + } + + return { + ...snapshot, + waitTimedOut: !isTerminalJobStatus(snapshot.job.status), + timeoutMs + }; +} + +async function awaitJobResult(cwd, reference, options = {}) { + const snapshot = await waitForTerminalJobSnapshot(cwd, reference, { + timeoutMs: options.timeoutMs, + pollIntervalMs: options.pollIntervalMs + }); + + if (snapshot.waitTimedOut) { + outputCommandResult(snapshot, renderJobStatusReport(snapshot.job), options.json); + process.exitCode = 124; + return snapshot; + } + + const storedJob = readStoredJob(snapshot.workspaceRoot, snapshot.job.id); + const payload = { + job: snapshot.job, + storedJob + }; + outputCommandResult(payload, renderStoredJobResult(snapshot.job, storedJob), options.json); + + if (snapshot.job.status !== "completed") { + process.exitCode = 1; + } + + return { + ...snapshot, + storedJob + }; +} + async function resolveLatestTrackedTaskThread(cwd, options = {}) { const workspaceRoot = resolveWorkspaceRoot(cwd); const sessionId = getCurrentClaudeSessionId(); @@ -551,7 +620,7 @@ function buildTaskRunMetadata({ prompt, resumeLast = false }) { } function renderQueuedTaskLaunch(payload) { - return `${payload.title} started in the background as ${payload.jobId}. Check /codex:status ${payload.jobId} for progress.\n`; + return `${payload.title} queued as ${payload.jobId}.\nThis command is only launch-complete; daemon completion is not signaled here.\nCheck /codex:status ${payload.jobId} for progress.\nFor completion notification, use: task --background --await ...\n`; } function getJobKindLabel(kind, jobClass) { @@ -731,8 +800,8 @@ async function handleReview(argv) { async function handleTask(argv) { const { options, positionals } = parseCommandInput(argv, { - valueOptions: ["model", "effort", "cwd", "prompt-file"], - booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background"], + valueOptions: ["model", "effort", "cwd", "prompt-file", "timeout-ms", "poll-interval-ms"], + booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background", "await"], aliasMap: { m: "model" } @@ -770,6 +839,14 @@ async function handleTask(argv) { jobId: job.id }); const { payload } = enqueueBackgroundTask(cwd, job, request); + if (options.await) { + await awaitJobResult(cwd, job.id, { + json: options.json, + timeoutMs: options["timeout-ms"], + pollIntervalMs: options["poll-interval-ms"] + }); + return; + } outputCommandResult(payload, renderQueuedTaskLaunch(payload), options.json); return; } @@ -792,6 +869,25 @@ async function handleTask(argv) { ); } +async function handleAwait(argv) { + const { options, positionals } = parseCommandInput(argv, { + valueOptions: ["cwd", "timeout-ms", "poll-interval-ms"], + booleanOptions: ["json"] + }); + + const reference = positionals[0] ?? ""; + if (!reference) { + throw new Error("`await` requires a job id."); + } + + const cwd = resolveCommandCwd(options); + await awaitJobResult(cwd, reference, { + json: options.json, + timeoutMs: options["timeout-ms"], + pollIntervalMs: options["poll-interval-ms"] + }); +} + async function handleTaskWorker(argv) { const { options } = parseCommandInput(argv, { valueOptions: ["cwd", "job-id"] @@ -1006,6 +1102,9 @@ async function main() { case "status": await handleStatus(argv); break; + case "await": + await handleAwait(argv); + break; case "result": handleResult(argv); break; diff --git a/plugins/codex/skills/codex-cli-runtime/SKILL.md b/plugins/codex/skills/codex-cli-runtime/SKILL.md index 0e91bfb5..febd1c90 100644 --- a/plugins/codex/skills/codex-cli-runtime/SKILL.md +++ b/plugins/codex/skills/codex-cli-runtime/SKILL.md @@ -25,7 +25,8 @@ Execution rules: Command selection: - Use exactly one `task` invocation per rescue handoff. -- If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only. Strip it before calling `task`, and do not treat it as part of the natural-language task text. +- If the forwarded request includes `--background`, strip it from the natural-language task text and call `task --background --await`. +- If the forwarded request includes `--wait`, treat that as Claude-side execution control only. Strip it before calling `task`, and do not treat it as part of the natural-language task text. - If the forwarded request includes `--model`, normalize `spark` to `gpt-5.3-codex-spark` and pass it through to `task`. - If the forwarded request includes `--effort`, pass it through to `task`. - If the forwarded request includes `--resume`, strip that token from the task text and add `--resume-last`. @@ -38,6 +39,6 @@ Command selection: Safety rules: - Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior. - Preserve the user's task text as-is apart from stripping routing flags. -- Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own. +- Do not inspect the repository, read files, grep, cancel jobs, summarize output, or do any follow-up work of your own. - Return the stdout of the `task` command exactly as-is. - If the Bash call fails or Codex cannot be invoked, return nothing. diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index 3724ffa4..92e14c7c 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -110,7 +110,7 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /Start a new Codex thread/); assert.match(rescue, /run the `codex:codex-rescue` subagent in the background/i); assert.match(rescue, /default to foreground/i); - assert.match(rescue, /Do not forward them to `task`/i); + assert.match(rescue, /Do not treat them as part of the natural-language task text/i); assert.match(rescue, /`--model` and `--effort` are runtime-selection flags/i); assert.match(rescue, /Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort/i); assert.match(rescue, /If they ask for `spark`, map it to `gpt-5\.3-codex-spark`/i); @@ -119,6 +119,7 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /If the user chooses continue, add `--resume`/i); assert.match(rescue, /If the user chooses a new thread, add `--fresh`/i); assert.match(rescue, /thin forwarder only/i); + assert.match(rescue, /task --background --await/i); assert.match(rescue, /Return the Codex companion stdout verbatim to the user/i); assert.match(rescue, /Do not paraphrase, summarize, rewrite, or add commentary before or after it/i); assert.match(rescue, /return that command's stdout as-is/i); @@ -127,9 +128,10 @@ test("rescue command absorbs continue semantics", () => { assert.match(agent, /--fresh/); assert.match(agent, /thin forwarding wrapper/i); assert.match(agent, /prefer foreground for a small, clearly bounded rescue request/i); - assert.match(agent, /If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution/i); + assert.match(agent, /If the user explicitly chose `--background`, call `task --background --await`/i); + assert.match(agent, /If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer `task --background --await`/i); assert.match(agent, /Use exactly one `Bash` call/i); - assert.match(agent, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); + assert.match(agent, /Do not inspect the repository, read files, grep, cancel jobs, summarize output, or do any follow-up work of your own/i); assert.match(agent, /Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`/i); assert.match(agent, /Leave `--effort` unset unless the user explicitly requests a specific reasoning effort/i); assert.match(agent, /Leave model unset by default/i); @@ -147,10 +149,10 @@ test("rescue command absorbs continue semantics", () => { assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i); assert.match(runtimeSkill, /Leave model unset by default/i); assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i); - assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i); - assert.match(runtimeSkill, /Strip it before calling `task`/i); + assert.match(runtimeSkill, /If the forwarded request includes `--background`, strip it from the natural-language task text and call `task --background --await`/i); + assert.match(runtimeSkill, /If the forwarded request includes `--wait`, treat that as Claude-side execution control only/i); assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i); - assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); + assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, cancel jobs, summarize output, or do any follow-up work of your own/i); assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i); assert.match(readme, /`codex:codex-rescue` subagent/i); assert.match(readme, /if you do not pass `--model` or `--effort`, Codex chooses its own defaults/i); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..d2da9d37 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -28,6 +28,74 @@ async function waitFor(predicate, { timeoutMs = 5000, intervalMs = 50 } = {}) { throw new Error("Timed out waiting for condition."); } +function runAsync(command, args, options = {}) { + return new Promise((resolve) => { + const child = spawn(command, args, { + cwd: options.cwd, + env: options.env, + windowsHide: true + }); + let stdout = ""; + let stderr = ""; + child.stdout.setEncoding("utf8"); + child.stderr.setEncoding("utf8"); + child.stdout.on("data", (chunk) => { + stdout += chunk; + }); + child.stderr.on("data", (chunk) => { + stderr += chunk; + }); + child.on("close", (status, signal) => { + resolve({ status: status ?? 0, signal, stdout, stderr }); + }); + }); +} + +function writeTrackedJob(workspace, jobPatch, storedPatch = {}) { + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + const timestamp = "2026-03-24T20:00:00.000Z"; + const job = { + id: jobPatch.id, + kind: "task", + kindLabel: "rescue", + status: "completed", + title: "Codex Task", + jobClass: "task", + summary: "Test task", + createdAt: timestamp, + updatedAt: timestamp, + ...jobPatch + }; + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [job] + }, + null, + 2 + )}\n`, + "utf8" + ); + fs.writeFileSync( + path.join(jobsDir, `${job.id}.json`), + `${JSON.stringify( + { + ...job, + ...storedPatch + }, + null, + 2 + )}\n`, + "utf8" + ); + return job; +} + test("setup reports ready when fake codex is installed and authenticated", () => { const binDir = makeTempDir(); installFakeCodex(binDir); @@ -833,6 +901,141 @@ test("task --background enqueues a detached worker and exposes per-job status", assert.match(resultPayload.storedJob.rendered, /Handled the requested task/); }); +test("await prints a completed job result", () => { + const workspace = makeTempDir(); + writeTrackedJob( + workspace, + { + id: "task-complete", + status: "completed", + threadId: "thr_done" + }, + { + result: { + status: 0, + rawOutput: "Completed from stored result." + } + } + ); + + const result = run("node", [SCRIPT, "await", "task-complete"], { + cwd: workspace + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Completed from stored result\./); + assert.match(result.stdout, /Codex session ID: thr_done/); +}); + +test("await prints a failed job result and exits non-zero", () => { + const workspace = makeTempDir(); + writeTrackedJob( + workspace, + { + id: "task-failed", + status: "failed", + threadId: "thr_failed" + }, + { + result: { + status: 1, + rawOutput: "Failure details from Codex." + } + } + ); + + const result = run("node", [SCRIPT, "await", "task-failed"], { + cwd: workspace + }); + + assert.equal(result.status, 1); + assert.match(result.stdout, /Failure details from Codex\./); + assert.match(result.stdout, /Codex session ID: thr_failed/); +}); + +test("await waits for a running job to complete before printing the result", async () => { + const workspace = makeTempDir(); + writeTrackedJob(workspace, { + id: "task-running", + status: "running", + threadId: "thr_running", + startedAt: "2026-03-24T20:00:00.000Z" + }); + + const awaited = runAsync("node", [SCRIPT, "await", "task-running", "--timeout-ms", "5000", "--poll-interval-ms", "100"], { + cwd: workspace + }); + + setTimeout(() => { + writeTrackedJob( + workspace, + { + id: "task-running", + status: "completed", + threadId: "thr_running", + completedAt: "2026-03-24T20:00:01.000Z" + }, + { + result: { + status: 0, + rawOutput: "Finished after polling." + } + } + ); + }, 200); + + const result = await awaited; + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Finished after polling\./); +}); + +test("await times out with the current status and exit 124", () => { + const workspace = makeTempDir(); + writeTrackedJob(workspace, { + id: "task-timeout", + status: "running", + threadId: "thr_timeout", + startedAt: "2026-03-24T20:00:00.000Z" + }); + + const result = run("node", [SCRIPT, "await", "task-timeout", "--timeout-ms", "1", "--poll-interval-ms", "100", "--json"], { + cwd: workspace + }); + + assert.equal(result.status, 124); + const payload = JSON.parse(result.stdout); + assert.equal(payload.job.id, "task-timeout"); + assert.equal(payload.job.status, "running"); + assert.equal(payload.waitTimedOut, true); + assert.equal(payload.timeoutMs, 1); +}); + +test("task --background --await prints the final result and exits with worker status", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run( + "node", + [SCRIPT, "task", "--background", "--await", "--poll-interval-ms", "100", "investigate the failing test"], + { + cwd: repo, + env: buildEnv(binDir) + } + ); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n\nCodex session ID: thr_1\nResume in Codex: codex resume thr_1\n"); + + const state = JSON.parse(fs.readFileSync(path.join(resolveStateDir(repo), "state.json"), "utf8")); + assert.equal(state.jobs[0].status, "completed"); +}); + test("review rejects focus text because it is native-review only", () => { const repo = makeTempDir(); const binDir = makeTempDir();