diff --git a/electron/ipc/channels.ts b/electron/ipc/channels.ts index b6a8e3d8..547703bf 100644 --- a/electron/ipc/channels.ts +++ b/electron/ipc/channels.ts @@ -157,6 +157,7 @@ export enum IPC { MCP_TaskCreated = 'mcp_task_created', MCP_TaskClosed = 'mcp_task_closed', MCP_TaskStateSync = 'mcp_task_state_sync', + MCP_TaskLandingReviewCleared = 'mcp_task_landing_review_cleared', MCP_ControlChanged = 'mcp_control_changed', // Coordinator notifications (main → renderer) MCP_CoordinatorNotificationStaged = 'mcp_coordinator_notification_staged', diff --git a/electron/ipc/pty.test.ts b/electron/ipc/pty.test.ts index 52f42304..696fc32c 100644 --- a/electron/ipc/pty.test.ts +++ b/electron/ipc/pty.test.ts @@ -755,6 +755,30 @@ describe('spawnAgent session reattach', () => { ).not.toThrow(); expect(mockPtySpawn).toHaveBeenCalledTimes(1); }); + + it('replaces an existing same-id PTY when attachExisting is explicitly false', () => { + const win = createMockWindow(); + const agentId = 'agent-replace'; + const args = buildSpawnArgs({ + agentId, + command: 'claude', + args: [], + dockerMode: false, + onOutput: { __CHANNEL_ID__: 'channel-1' }, + }); + + spawnAgent(win, args); + const oldProc = mockPtySpawn.mock.results[0].value as ReturnType; + + spawnAgent(win, { + ...args, + attachExisting: false, + onOutput: { __CHANNEL_ID__: 'channel-2' }, + }); + + expect(oldProc.kill).toHaveBeenCalled(); + expect(mockPtySpawn).toHaveBeenCalledTimes(2); + }); }); describe('validateCommand', () => { diff --git a/electron/ipc/register-mcp.test.ts b/electron/ipc/register-mcp.test.ts index 2e495475..0f5d952a 100644 --- a/electron/ipc/register-mcp.test.ts +++ b/electron/ipc/register-mcp.test.ts @@ -310,6 +310,7 @@ const VALID_ARGS = { coordinatorTaskId: TEST_COORDINATOR_ID, projectId: 'proj-1', projectRoot: '/absolute/project', + coordinatorBranch: 'task/coordinator-work', worktreePath: '/absolute/worktree', agentArgs: ['--flag', 'value'], dockerContainerName: 'my-container', @@ -356,6 +357,18 @@ describe('Layer 4 — StartMCPServer input validation', () => { expect(copyFileSpy).not.toHaveBeenCalled(); }); + it('rejects invalid coordinatorBranch', () => { + const writeFileSpy = vi.spyOn(fs, 'writeFileSync'); + const copyFileSpy = vi.spyOn(fs, 'copyFileSync'); + + expect(() => + validateStartMCPServerArgs({ ...VALID_ARGS, coordinatorBranch: 'bad branch' }), + ).toThrow('coordinatorBranch'); + + expect(writeFileSpy).not.toHaveBeenCalled(); + expect(copyFileSpy).not.toHaveBeenCalled(); + }); + it('rejects agentArgs containing a non-string element', () => { const writeFileSpy = vi.spyOn(fs, 'writeFileSync'); const copyFileSpy = vi.spyOn(fs, 'copyFileSync'); diff --git a/electron/ipc/register.ts b/electron/ipc/register.ts index f6b4151c..5ebeb7ea 100644 --- a/electron/ipc/register.ts +++ b/electron/ipc/register.ts @@ -177,6 +177,9 @@ export function validateStartMCPServerArgs(args: Record): void assertString(args.projectId, 'projectId'); validatePath(args.projectRoot, 'projectRoot'); if (args.worktreePath !== undefined) validatePath(args.worktreePath, 'worktreePath'); + if (args.coordinatorBranch !== undefined) { + validateBranchName(args.coordinatorBranch, 'coordinatorBranch'); + } if (args.agentCommand !== undefined) assertString(args.agentCommand, 'agentCommand'); if (args.agentArgs !== undefined) assertStringArray(args.agentArgs, 'agentArgs'); assertOptionalBoolean(args.skipPermissions, 'skipPermissions'); @@ -1180,10 +1183,22 @@ export function registerAllHandlers(win: BrowserWindow): void { ipcMain.handle( IPC.MCP_CoordinatorRegistered, - (_e, args: { coordinatorTaskId: string; projectId: string; worktreePath?: string }) => { + ( + _e, + args: { + coordinatorTaskId: string; + projectId: string; + coordinatorBranch?: string; + worktreePath?: string; + }, + ) => { assertString(args.coordinatorTaskId, 'coordinatorTaskId'); assertString(args.projectId, 'projectId'); + if (args.coordinatorBranch !== undefined) { + validateBranchName(args.coordinatorBranch, 'coordinatorBranch'); + } coordinator?.registerCoordinator(args.coordinatorTaskId, args.projectId, { + branchName: args.coordinatorBranch, worktreePath: args.worktreePath, }); }, @@ -1254,6 +1269,11 @@ export function registerAllHandlers(win: BrowserWindow): void { }, ); + ipcMain.handle(IPC.MCP_TaskLandingReviewCleared, (_e, args: { taskId: string }) => { + assertString(args.taskId, 'taskId'); + coordinator?.markTaskReviewed(args.taskId); + }); + ipcMain.handle( IPC.MCP_CoordinatedTaskClosed, (_e, args: { taskId: string; coordinatorTaskId: string }) => { @@ -1280,7 +1300,13 @@ export function registerAllHandlers(win: BrowserWindow): void { agentId?: string; signalDoneAt?: string; signalDoneConsumed?: boolean; + verification?: import('../mcp/types.js').SubtaskVerification; + landingState?: import('../mcp/types.js').LandingState; + landingReason?: string; + landingSummary?: string; + landedMetadata?: import('../mcp/types.js').LandedMetadata; mcpConfigPath?: string; + agentCommand?: string; preambleFileExistedBefore?: boolean; }, ) => { @@ -1296,7 +1322,8 @@ export function registerAllHandlers(win: BrowserWindow): void { assertString(args.coordinatorTaskId, 'coordinatorTaskId'); validateUUID(args.coordinatorTaskId, 'coordinatorTaskId'); if (!coordinator) throw new Error('coordinator mode not initialized'); - coordinator.hydrateTask({ + if (args.agentCommand !== undefined) assertString(args.agentCommand, 'agentCommand'); + const result = coordinator.hydrateTask({ id: args.id, name: args.name, projectId: args.projectId, @@ -1309,13 +1336,20 @@ export function registerAllHandlers(win: BrowserWindow): void { controlledBy: args.controlledBy, signalDoneAt: args.signalDoneAt, signalDoneConsumed: args.signalDoneConsumed, + verification: args.verification, + landingState: args.landingState, + landingReason: args.landingReason, + landingSummary: args.landingSummary, + landedMetadata: args.landedMetadata, mcpConfigPath: args.mcpConfigPath, + agentCommand: args.agentCommand, preambleFileExistedBefore: args.preambleFileExistedBefore, }); // Signal to renderer that MCP hydration is complete — gates TerminalView auto-spawn. if (!win.isDestroyed()) { win.webContents.send(IPC.MCP_TaskHydrated, { taskId: args.id }); } + return result; }, ); } @@ -1361,6 +1395,7 @@ export function registerAllHandlers(win: BrowserWindow): void { coordinatorTaskId: string; projectId: string; projectRoot: string; + coordinatorBranch?: string; worktreePath?: string; skipPermissions?: boolean; propagateSkipPermissions?: boolean; @@ -1405,6 +1440,7 @@ export function registerAllHandlers(win: BrowserWindow): void { // so create_task / list_tasks know about it. Idempotent — safe to call on restore. coordinator.setDefaultProject(args.projectId, args.projectRoot, args.coordinatorTaskId); coordinator.registerCoordinator(args.coordinatorTaskId, args.projectId, { + branchName: args.coordinatorBranch, worktreePath: args.worktreePath, skipPermissions: Boolean(args.skipPermissions && args.propagateSkipPermissions), }); diff --git a/electron/mcp/client.ts b/electron/mcp/client.ts index b7aeab3d..d44ab7ea 100644 --- a/electron/mcp/client.ts +++ b/electron/mcp/client.ts @@ -8,6 +8,8 @@ import type { ApiDiffResult, ApiMergeResult, ApiReviewAndMergeResult, + ApiLandSelfResult, + LandSelfInput, WaitForSignalDoneResult, } from './types.js'; @@ -106,7 +108,19 @@ export class MCPClient { } async signalDone(taskId: string): Promise { - const url = `${this.baseUrl}/api/tasks/${encodeURIComponent(taskId)}/done`; + await this.taskOwnerRequest('POST', `/api/tasks/${encodeURIComponent(taskId)}/done`, {}); + } + + async landSelf(taskId: string, input: LandSelfInput): Promise { + return this.taskOwnerRequest( + 'POST', + `/api/tasks/${encodeURIComponent(taskId)}/land`, + input, + ); + } + + private async taskOwnerRequest(method: string, path: string, body: unknown): Promise { + const url = `${this.baseUrl}${path}`; const headers: Record = { Authorization: `Bearer ${this.token}`, 'Content-Type': 'application/json', @@ -114,11 +128,12 @@ export class MCPClient { // Per-task done token is sent as X-Done-Token so the server can verify task ownership // without needing per-task bearer token classification. if (this.doneToken) headers['X-Done-Token'] = this.doneToken; - const res = await fetch(url, { method: 'POST', headers, body: '{}' }); + const res = await fetch(url, { method, headers, body: JSON.stringify(body) }); if (!res.ok) { const text = await res.text().catch(() => ''); - throw new Error(`API POST /api/tasks/.../done failed (${res.status}): ${text}`); + throw new Error(`API ${method} ${path} failed (${res.status}): ${text}`); } + return (await res.json()) as T; } async waitForSignalDone( diff --git a/electron/mcp/coordinator.test.ts b/electron/mcp/coordinator.test.ts index 180f92f1..5b630c9b 100644 --- a/electron/mcp/coordinator.test.ts +++ b/electron/mcp/coordinator.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import os from 'os'; import { join, dirname } from 'path'; -import { getChangedFiles, getAllFileDiffs, getDiffBaseSha } from '../ipc/git.js'; +import { getChangedFiles, getAllFileDiffs, getDiffBaseSha, mergeTask } from '../ipc/git.js'; // --- fs / child_process mocks (must come before dynamic import) --- const mockExecFile = vi.fn( @@ -138,6 +138,11 @@ function encode(s: string): string { return Buffer.from(s).toString('base64'); } +function emitWorkThenIdle(outputCb: (encoded: string) => void): void { + outputCb(encode('Working...\n')); + outputCb(encode('Done ❯ ')); +} + const mockWin = { isDestroyed: () => false, webContents: { send: mockNotifyRenderer }, @@ -200,6 +205,24 @@ describe('Coordinator registerCoordinator — idempotency', () => { expect.objectContaining({ name: 'test' }), ); }); + + it('defaults sub-task baseBranch to the coordinator branch', async () => { + coordinator.registerCoordinator('coord-1', 'proj-1', { + branchName: 'task/coordinator-work', + worktreePath: '/tmp/project', + }); + + await coordinator.createTask({ name: 'child', prompt: 'do', coordinatorTaskId: 'coord-1' }); + + expect(mockCreateBackendTask).toHaveBeenCalledWith( + 'child', + '/tmp/project', + ['.claude', 'node_modules'], + 'task', + 'task/coordinator-work', + ); + expect(coordinator.getTask('task-1')?.baseBranch).toBe('task/coordinator-work'); + }); }); // ─── coordinator notification tests ─────────────────────────────────────────── @@ -248,7 +271,7 @@ describe('Coordinator coordinator notifications', () => { ); }); - it('notifies coordinator when sub-task idles after prompt delivery', async () => { + it('suppresses the stale idle prompt immediately after prompt delivery', async () => { coordinator.registerCoordinator('coord-1', 'proj-1'); await coordinator.createTask({ name: 'test', @@ -259,7 +282,14 @@ describe('Coordinator coordinator notifications', () => { coordinator.markPromptDelivered('task-1'); - outputCb(encode('Working... ❯ ')); + outputCb(encode('❯ ')); + expect(mockNotifyRenderer).not.toHaveBeenCalledWith( + 'mcp_coordinator_notification_staged', + expect.anything(), + ); + + outputCb(encode('Working...')); + outputCb(encode('Done ❯ ')); expect(mockNotifyRenderer).toHaveBeenCalledWith( 'mcp_coordinator_notification_staged', expect.objectContaining({ @@ -291,7 +321,7 @@ describe('Coordinator coordinator notifications', () => { const outputCb = getOutputCb(); const agentId = getAgentId(); const exitHandler = getExitHandler(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); mockNotifyRenderer.mockClear(); exitHandler(agentId, { exitCode: 0 }); const stagedCalls = mockNotifyRenderer.mock.calls.filter( @@ -307,7 +337,7 @@ describe('Coordinator coordinator notifications', () => { await coordinator.createTask({ name: 'task-a', prompt: 'do', coordinatorTaskId: 'coord-1' }); coordinator.markPromptDelivered('task-1'); const outputCb = getOutputCb(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); const stagedCallAck = mockNotifyRenderer.mock.calls.find( (c) => c[0] === 'mcp_coordinator_notification_staged', @@ -325,7 +355,7 @@ describe('Coordinator coordinator notifications', () => { await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); coordinator.markPromptDelivered('task-1'); const outputCb = getOutputCb(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); const stagedCallIdempotent = mockNotifyRenderer.mock.calls.find( (c) => c[0] === 'mcp_coordinator_notification_staged', ); @@ -370,7 +400,7 @@ describe('Coordinator coordinator notifications', () => { await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); coordinator.markPromptDelivered('task-1'); const outputCb = getOutputCb(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); expect(mockNotifyRenderer).toHaveBeenCalledWith( 'mcp_coordinator_notification_staged', @@ -464,6 +494,322 @@ describe('Coordinator signal_done', () => { }); }); +// ─── land_self tests ────────────────────────────────────────────────────────── + +describe('Coordinator land_self', () => { + let coordinator: InstanceType; + + const verification = { + checks: [{ name: 'typecheck', command: 'npm run typecheck', result: 'passed' as const }], + }; + + function mockGit(statusOut = '') { + mockExecFile.mockImplementation( + ( + _cmd: string, + args: string[], + _opts: unknown, + cb: (err: Error | null, stdout: string, stderr: string) => void, + ) => { + if (args.join(' ') === 'rev-parse --abbrev-ref HEAD') { + cb(null, 'task/test\n', ''); + return; + } + if (args[0] === 'status') { + cb(null, statusOut, ''); + return; + } + if (args.join(' ') === 'rev-parse HEAD') { + cb(null, 'landed-sha\n', ''); + return; + } + cb(null, '', ''); + }, + ); + } + + beforeEach(async () => { + vi.clearAllMocks(); + mockExistsSync.mockReturnValue(false); + mockFsReadFile.mockResolvedValue('# existing\n'); + mockReadFileSync.mockReturnValue('# existing\n'); + mockCreateBackendTask.mockResolvedValue({ + id: 'task-1', + branch_name: 'task/test', + worktree_path: '/tmp/test', + }); + vi.mocked(mergeTask).mockResolvedValue({ + main_branch: 'main', + lines_added: 3, + lines_removed: 1, + }); + const { deleteTask: mockDeleteTask } = await import('../ipc/tasks.js'); + vi.mocked(mockDeleteTask).mockResolvedValue(undefined); + mockGit(); + coordinator = new Coordinator(); + coordinator.setWindow(mockWin); + coordinator.setDefaultProject('proj-1', '/tmp/project'); + coordinator.registerCoordinator('coord-1', 'proj-1', { worktreePath: '/tmp/project' }); + await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); + mockNotifyRenderer.mockClear(); + }); + + it('lands, cleans up resources, and closes the task record', async () => { + const { deleteTask: mockDeleteTask } = await import('../ipc/tasks.js'); + + const result = await coordinator.landSelf('task-1', { verification, summary: 'done' }); + + expect(result).toMatchObject({ + mainBranch: 'main', + linesAdded: 3, + linesRemoved: 1, + landingState: 'reviewed', + }); + expect(vi.mocked(mergeTask)).toHaveBeenCalled(); + expect(vi.mocked(mockDeleteTask)).toHaveBeenCalled(); + expect(coordinator.getTask('task-1')).toBeUndefined(); + expect(mockNotifyRenderer).toHaveBeenCalledWith( + 'mcp_task_closed', + expect.objectContaining({ taskId: 'task-1' }), + ); + }); + + it('rejects missing verification before merging', async () => { + await expect(coordinator.landSelf('task-1', { verification: { checks: [] } })).rejects.toThrow( + 'verification', + ); + + expect(vi.mocked(mergeTask)).not.toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landing_escalated'); + }); + + it('rejects dirty non-preamble worktrees before merging', async () => { + mockGit(' M src/file.ts\n'); + + await expect(coordinator.landSelf('task-1', { verification })).rejects.toThrow( + 'uncommitted changes', + ); + + expect(vi.mocked(mergeTask)).not.toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landing_escalated'); + }); + + it('rejects dirty preamble files that contain user-authored changes', async () => { + const preambleWithUserChange = + '\nrules\n\n\n# User-authored change\n'; + const fakeDiff = + 'diff --git a/AGENTS.md b/AGENTS.md\n--- a/AGENTS.md\n+++ b/AGENTS.md\n@@ -0,0 +1 @@\n+# User-authored change\n'; + mockFsReadFile.mockImplementation(async (p: unknown) => { + if (String(p).includes('AGENTS.md')) return preambleWithUserChange; + throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' }); + }); + mockExistsSync.mockImplementation((p?: unknown) => String(p).includes('AGENTS.md')); + mockReadFileSync.mockReturnValue(preambleWithUserChange); + mockExecFile.mockImplementation( + ( + _cmd: string, + args: string[], + optsOrCb: unknown, + cbMaybe?: (err: Error | null, stdout: string, stderr: string) => void, + ) => { + const cb = (typeof optsOrCb === 'function' ? optsOrCb : cbMaybe) as ( + err: Error | null, + stdout: string, + stderr: string, + ) => void; + if (args.join(' ') === 'rev-parse --abbrev-ref HEAD') { + cb(null, 'task/test\n', ''); + return; + } + if (args[0] === 'status') { + cb(null, ' M AGENTS.md\n', ''); + return; + } + if (args[0] === 'show') { + cb(null, '', ''); + return; + } + if (args[0] === 'diff') { + cb(Object.assign(new Error('diff'), { code: 1, stdout: fakeDiff }), fakeDiff, ''); + return; + } + cb(null, '', ''); + }, + ); + + await expect(coordinator.landSelf('task-1', { verification })).rejects.toThrow( + 'uncommitted changes in preamble files', + ); + + expect(vi.mocked(mergeTask)).not.toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landing_escalated'); + }); + + it('allows dirty settings.local.json when only injected systemPrompt is stripped', async () => { + const settingsContent = JSON.stringify({ + systemPrompt: '\nrules\n', + }); + mockFsReadFile.mockImplementation(async (p: unknown) => { + if (String(p).includes('settings.local.json')) return settingsContent; + throw Object.assign(new Error('ENOENT'), { code: 'ENOENT' }); + }); + mockExistsSync.mockImplementation((p?: unknown) => String(p).includes('settings.local.json')); + mockReadFileSync.mockReturnValue(settingsContent); + let statusCalls = 0; + mockExecFile.mockImplementation( + ( + _cmd: string, + args: string[], + optsOrCb: unknown, + cbMaybe?: (err: Error | null, stdout: string, stderr: string) => void, + ) => { + const cb = (typeof optsOrCb === 'function' ? optsOrCb : cbMaybe) as ( + err: Error | null, + stdout: string, + stderr: string, + ) => void; + if (args.join(' ') === 'rev-parse --abbrev-ref HEAD') { + cb(null, 'task/test\n', ''); + return; + } + if (args[0] === 'status') { + statusCalls += 1; + cb(null, statusCalls <= 2 ? ' M .claude/settings.local.json\n' : '', ''); + return; + } + if (args[0] === 'show') { + cb(null, '', ''); + return; + } + if (args[0] === 'diff') { + cb(null, '', ''); + return; + } + if (args.join(' ') === 'rev-parse HEAD') { + cb(null, 'landed-sha\n', ''); + return; + } + cb(null, '', ''); + }, + ); + + const result = await coordinator.landSelf('task-1', { verification }); + + expect(result.landingState).toBe('reviewed'); + expect(vi.mocked(mergeTask)).toHaveBeenCalled(); + }); + + it('rejects orphaned tasks before merging', async () => { + coordinator.deregisterCoordinator('coord-1'); + + await expect(coordinator.landSelf('task-1', { verification })).rejects.toThrow('orphaned task'); + + expect(vi.mocked(mergeTask)).not.toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landing_escalated'); + }); + + it('rejects branch mismatch before merging', async () => { + mockExecFile.mockImplementation( + ( + _cmd: string, + args: string[], + _opts: unknown, + cb: (err: Error | null, stdout: string, stderr: string) => void, + ) => { + if (args.join(' ') === 'rev-parse --abbrev-ref HEAD') { + cb(null, 'other-branch\n', ''); + return; + } + cb(null, '', ''); + }, + ); + + await expect(coordinator.landSelf('task-1', { verification })).rejects.toThrow( + 'Branch mismatch', + ); + + expect(vi.mocked(mergeTask)).not.toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landing_escalated'); + }); + + it('records merge conflict as landing_escalated without cleanup', async () => { + const { deleteTask: mockDeleteTask } = await import('../ipc/tasks.js'); + vi.mocked(mergeTask).mockRejectedValueOnce(new Error('Merge failed: conflict')); + + await expect(coordinator.landSelf('task-1', { verification })).rejects.toThrow('conflict'); + + expect(vi.mocked(mockDeleteTask)).not.toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landing_escalated'); + }); + + it('keeps landed state visible if landed commit resolution fails after merge', async () => { + const { deleteTask: mockDeleteTask } = await import('../ipc/tasks.js'); + mockExecFile.mockImplementation( + ( + _cmd: string, + args: string[], + _opts: unknown, + cb: (err: Error | null, stdout: string, stderr: string) => void, + ) => { + if (args.join(' ') === 'rev-parse --abbrev-ref HEAD') { + cb(null, 'task/test\n', ''); + return; + } + if (args[0] === 'status') { + cb(null, '', ''); + return; + } + if (args[0] === 'rev-parse') { + cb(new Error('rev-parse failed'), '', 'fatal'); + return; + } + cb(null, '', ''); + }, + ); + + await expect(coordinator.landSelf('task-1', { verification })).rejects.toThrow( + 'landed commit could not be resolved', + ); + + expect(vi.mocked(mergeTask)).toHaveBeenCalled(); + expect(vi.mocked(mockDeleteTask)).not.toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landed_cleanup_failed'); + expect(coordinator.getTask('task-1')?.landedMetadata?.landedCommit).toBe('unresolved'); + }); + + it('records cleanup failure without hiding the landed merge', async () => { + const { deleteTask: mockDeleteTask } = await import('../ipc/tasks.js'); + vi.mocked(mockDeleteTask).mockRejectedValueOnce(new Error('delete failed')); + + await expect(coordinator.landSelf('task-1', { verification })).rejects.toThrow( + 'cleanup failed', + ); + + expect(vi.mocked(mergeTask)).toHaveBeenCalled(); + expect(coordinator.getTask('task-1')?.landingState).toBe('landed_cleanup_failed'); + expect(coordinator.getTask('task-1')?.landedMetadata?.landedCommit).toBe('landed-sha'); + }); + + it('marks legacy landed pending-review task summaries as reviewed', () => { + const task = coordinator.getTask('task-1'); + if (!task) throw new Error('test task missing'); + task.landingState = 'landed_pending_review'; + + coordinator.markTaskReviewed('task-1'); + + expect(coordinator.getTask('task-1')?.landingState).toBe('reviewed'); + expect(coordinator.listTasks()[0]?.landingState).toBe('reviewed'); + expect(mockNotifyRenderer).toHaveBeenCalledWith( + 'mcp_task_state_sync', + expect.objectContaining({ + taskId: 'task-1', + landingState: 'reviewed', + needsReview: false, + }), + ); + }); +}); + // ─── spawn defaults / skipPermissions tests ─────────────────────────────────── describe('Coordinator sub-agent spawn settings', () => { @@ -848,7 +1194,7 @@ describe('Coordinator waitForSignalDone', () => { const waitPromise = coordinator.waitForSignalDone('coord-1'); const task2OutputCb = mockSubscribeToAgent.mock.calls[1][1] as (encoded: string) => void; - task2OutputCb(encode('Done ❯ ')); + emitWorkThenIdle(task2OutputCb); expect(mockNotifyRenderer).not.toHaveBeenCalledWith( 'mcp_coordinator_notification_staged', @@ -870,7 +1216,7 @@ describe('Coordinator waitForSignalDone', () => { await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); coordinator.markPromptDelivered('task-1'); const outputCb = getOutputCb(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); expect(mockNotifyRenderer).toHaveBeenCalledWith( 'mcp_coordinator_notification_staged', @@ -968,6 +1314,52 @@ describe('Coordinator sendPrompt', () => { }); }); +// ─── mergeTask active ownership guard ───────────────────────────────────────── + +describe('Coordinator mergeTask active ownership guard', () => { + let coordinator: InstanceType; + + beforeEach(() => { + vi.clearAllMocks(); + mockExistsSync.mockReturnValue(false); + mockCreateBackendTask.mockResolvedValue({ + id: 'task-1', + branch_name: 'task/test', + worktree_path: '/tmp/test', + }); + vi.mocked(mergeTask).mockResolvedValue({ + main_branch: 'main', + lines_added: 1, + lines_removed: 0, + }); + coordinator = new Coordinator(); + coordinator.setWindow(mockWin); + coordinator.setDefaultProject('proj-1', '/tmp/project'); + coordinator.registerCoordinator('coord-1', 'proj-1'); + }); + + it('rejects merge_task for a running task that has not signaled manual review', async () => { + await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); + + await expect(coordinator.mergeTask('task-1')).rejects.toThrow('still running'); + + expect(vi.mocked(mergeTask)).not.toHaveBeenCalled(); + }); + + it('allows legacy signal_done manual-review tasks even if the agent process is still running', async () => { + await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); + coordinator.signalDone('task-1'); + + await expect(coordinator.mergeTask('task-1')).resolves.toEqual({ + mainBranch: 'main', + linesAdded: 1, + linesRemoved: 0, + }); + + expect(vi.mocked(mergeTask)).toHaveBeenCalled(); + }); +}); + // ─── deregisterCoordinator tests ────────────────────────────────────────────── describe('Coordinator deregisterCoordinator', () => { @@ -1011,7 +1403,7 @@ describe('Coordinator deregisterCoordinator', () => { await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); coordinator.markPromptDelivered('task-1'); const outputCb = getOutputCb(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); expect(mockNotifyRenderer).toHaveBeenCalledWith( 'mcp_coordinator_notification_staged', @@ -1200,6 +1592,27 @@ describe('Coordinator MCP_TaskCreated spawn settings', () => { expect(payload.agentArgs).not.toContain('--dangerously-skip-permissions'); }); + it('includes subtask MCP launch args in MCP_TaskCreated payload', async () => { + coordinator.setMCPServerInfo( + 'coord-1', + 'http://localhost:3001', + 'coordinator-token', + 'subtask-token', + '/tmp/mcp-server.cjs', + ); + coordinator.setCoordinatorSpawnDefaults('coord-1', 'codex', []); + + await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); + + const payload = mockNotifyRenderer.mock.calls.find((c) => c[0] === 'mcp_task_created')?.[1] as { + mcpLaunchArgs: string[]; + }; + expect(payload.mcpLaunchArgs).toEqual([ + '--config', + expect.stringContaining('mcp_servers.parallel-code'), + ]); + }); + it('includes skipPermissions true in MCP_TaskCreated payload when coordinator has propagateSkipPermissions', async () => { // skipPermissions is now inherited from the coordinator's propagateSkipPermissions, // not from createTask opts. Re-register with skipPermissions: true. @@ -1736,7 +2149,7 @@ describe('Coordinator waitForSignalDone — notification lifecycle', () => { await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' }); coordinator.markPromptDelivered('task-1'); const outputCb = getOutputCb(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); // Confirm a notification was staged expect(mockNotifyRenderer).toHaveBeenCalledWith( @@ -1787,7 +2200,7 @@ describe('Coordinator waitForSignalDone — notification lifecycle', () => { // Trigger an idle so a notification is queued const outputCb = getOutputCb(); - outputCb(encode('Done ❯ ')); + emitWorkThenIdle(outputCb); // Start wait — this clears staged notifications const waitPromise = coordinator.waitForSignalDone('coord-1', 100); @@ -2628,6 +3041,101 @@ describe('Coordinator restart round-trip integration', () => { expect(writtenToken).not.toBe('new-coordinator-secret'); }); + it('hydrateTask returns fresh Codex MCP launch args after rewriting config', () => { + coordinator.setMCPServerInfo( + 'coord-1', + 'http://localhost:3002', + 'new-coordinator-secret', + 'new-subtask-secret', + '/path/server.js', + ); + + const taskId = 'hydrated-restart-codex'; + const configPath = join(os.tmpdir(), `parallel-code-subtask-${taskId}.json`); + + const result = coordinator.hydrateTask({ + id: taskId, + name: 'hydrated-task', + projectId: 'proj-1', + projectRoot: '/tmp/project', + branchName: 'task/hydrated', + worktreePath: '/tmp/hydrated', + agentId: 'agent-restart-codex', + coordinatorTaskId: 'coord-1', + mcpConfigPath: configPath, + agentCommand: 'codex', + }); + + expect(result.mcpLaunchArgs).toEqual([ + '--config', + expect.stringContaining('new-subtask-secret'), + ]); + }); + + it('hydrateTask returns Codex MCP launch args even without a persisted config path', () => { + coordinator.setMCPServerInfo( + 'coord-1', + 'http://localhost:3002', + 'new-coordinator-secret', + 'new-subtask-secret', + '/path/server.js', + ); + + mockAtomicWriteFileSync.mockClear(); + const result = coordinator.hydrateTask({ + id: 'hydrated-restart-codex-inline', + name: 'hydrated-task', + projectId: 'proj-1', + projectRoot: '/tmp/project', + branchName: 'task/hydrated', + worktreePath: '/tmp/hydrated', + agentId: 'agent-restart-codex-inline', + coordinatorTaskId: 'coord-1', + agentCommand: 'codex', + }); + + expect(mockAtomicWriteFileSync).not.toHaveBeenCalled(); + expect(result.mcpLaunchArgs).toEqual([ + '--config', + expect.stringContaining('new-subtask-secret'), + ]); + expect(result.mcpLaunchArgs?.[1]).toContain('--task-id'); + expect(result.mcpLaunchArgs?.[1]).toContain('hydrated-restart-codex-inline'); + }); + + it('hydrateTask returns launch args when the task is already hydrated', () => { + coordinator.setMCPServerInfo( + 'coord-1', + 'http://localhost:3002', + 'new-coordinator-secret', + 'new-subtask-secret', + '/path/server.js', + ); + + const taskId = 'hydrated-restart-existing'; + const configPath = join(os.tmpdir(), `parallel-code-subtask-${taskId}.json`); + const args = { + id: taskId, + name: 'hydrated-task', + projectId: 'proj-1', + projectRoot: '/tmp/project', + branchName: 'task/hydrated', + worktreePath: '/tmp/hydrated', + agentId: 'agent-restart-existing', + coordinatorTaskId: 'coord-1', + mcpConfigPath: configPath, + agentCommand: 'codex', + }; + + coordinator.hydrateTask(args); + const result = coordinator.hydrateTask(args); + + expect(result.mcpLaunchArgs).toEqual([ + '--config', + expect.stringContaining('new-subtask-secret'), + ]); + }); + it('waitForIdle resolves after agent output fires post-hydration', async () => { coordinator.setMCPServerInfo( 'coord-1', diff --git a/electron/mcp/coordinator.ts b/electron/mcp/coordinator.ts index 647f1d0b..5553bf6e 100644 --- a/electron/mcp/coordinator.ts +++ b/electron/mcp/coordinator.ts @@ -56,6 +56,10 @@ import type { ApiTaskSummary, ApiTaskDetail, ApiDiffResult, + ApiLandSelfResult, + LandSelfInput, + LandingState, + SubtaskVerification, WaitForSignalDoneResult, } from './types.js'; import { IPC } from '../ipc/channels.js'; @@ -63,6 +67,45 @@ import { IPC } from '../ipc/channels.js'; const DEFAULT_WAIT_TIMEOUT_MS = 300_000; // 5 minutes const PROMPT_WRITE_DELAY_MS = 50; const REST_COORDINATOR_SENTINEL = 'api'; +const PREAMBLE_ARTIFACT_PATHS = new Set([ + 'AGENTS.md', + 'GEMINI.md', + '.agent.md', + '.claude/settings.local.json', +]); +const UNRESOLVED_LANDED_COMMIT = 'unresolved'; + +function isPassedVerification(verification: SubtaskVerification | undefined): boolean { + return Boolean( + verification?.checks.length && verification.checks.every((check) => check.result === 'passed'), + ); +} + +function verificationFailureReason(verification: SubtaskVerification | undefined): string { + if (!verification?.checks.length) return 'land_self requires at least one verification check'; + const failed = verification.checks.find((check) => check.result !== 'passed'); + if (!failed) return 'verification passed'; + return `verification ${failed.result}: ${failed.name}${failed.reason ? ` — ${failed.reason}` : ''}`; +} + +function parsePorcelainPaths(statusOut: string): string[] { + return statusOut + .split('\n') + .map((line) => line.trimEnd()) + .filter(Boolean) + .map((line) => { + const renameMarker = ' -> '; + const rawPath = line.slice(3); + if (!rawPath.includes(renameMarker)) return rawPath; + const parts = rawPath.split(renameMarker); + return parts[parts.length - 1] ?? rawPath; + }); +} + +function execStdout(result: Awaited>): string { + const stdout = typeof result === 'string' ? result : result.stdout; + return typeof stdout === 'string' ? stdout : stdout.toString('utf8'); +} export class Coordinator { private tasks = new Map(); @@ -83,6 +126,7 @@ export class Coordinator { private projectRoot: string | null = null; private projectId: string | null = null; private defaultCoordinatorTaskId: string | null = null; + private landedOrderCounters = new Map(); private coordinatorSpawnDefaults: { command: string; args: string[] } = { command: 'claude', args: [], @@ -110,6 +154,7 @@ export class Coordinator { for (const resolve of resolvers) resolve({ reason: 'exited' }); this.idleResolvers.delete(task.id); } + if (this.closingTaskIds.has(task.id)) break; // Resolve any signal waiters so wait_for_signal_done doesn't hang // when the last sub-task exits without calling signal_done. const coordinatorId = task.coordinatorTaskId; @@ -130,17 +175,15 @@ export class Coordinator { }); this.finishSignalWait(coordinatorId); } - if (this.closingTaskIds.has(task.id)) break; this.maybeQueueReviewNotification(task, 'exited', exitCode ?? null); break; } } }); - // Re-subscribe our output callback when the renderer respawns a managed agent. - // TerminalView kills the existing PTY (clearing all subscribers) then spawns a - // new one with the same agentId. Without this, our outputCb is lost and we - // can never detect idle for that sub-task. + // Re-subscribe our output callback when the renderer reattaches to, or explicitly + // replaces, a managed agent. Without this, our outputCb is lost and we can never + // detect idle for that sub-task. onPtyEvent('spawn', (agentId) => { const outputCb = this.subscribers.get(agentId); if (!outputCb) return; // not a coordinated agent, or initial spawn (not yet subscribed) @@ -277,6 +320,11 @@ export class Coordinator { // Always notify for exits — a task killed before prompt delivery still needs to be // reported so the coordinator doesn't think it's still running. if (!task.assignedPromptDelivered && state !== 'exited') return; + if (state === 'idle' && task.suppressNextIdleNotification) { + task.suppressNextIdleNotification = false; + this.tailBuffers.set(task.agentId, ''); + return; + } const coordinator = this.coordinators.get(task.coordinatorTaskId); if (!coordinator) { @@ -422,13 +470,16 @@ export class Coordinator { ); } - if (opts.baseBranch !== undefined) { - validateBranchName(opts.baseBranch, 'baseBranch'); - } - const root = opts.projectRoot ?? coordinatorState.projectRoot ?? this.projectRoot; const projId = opts.projectId ?? coordinatorState.projectId ?? this.projectId; if (!root || !projId) throw new Error('No project configured for coordinator'); + const coordinatorBranch = coordinatorState.branchName?.trim() + ? coordinatorState.branchName + : undefined; + const baseBranch = opts.baseBranch ?? coordinatorBranch; + if (baseBranch !== undefined) { + validateBranchName(baseBranch, 'baseBranch'); + } // Create worktree + branch via existing backend const result = await createBackendTask( @@ -436,7 +487,7 @@ export class Coordinator { root, ['.claude', 'node_modules'], 'task', - opts.baseBranch, + baseBranch, ); // Re-check after async gap — deregisterCoordinator may have run while we awaited. @@ -464,7 +515,7 @@ export class Coordinator { projectId: projId, projectRoot: root, branchName: result.branch_name, - baseBranch: opts.baseBranch, + baseBranch, worktreePath: result.worktree_path, agentId, coordinatorTaskId: coordinatorId, @@ -498,7 +549,11 @@ export class Coordinator { .replace(/[\x00-\x1f\x7f]/g, ' ') .replace(/\s+/g, ' ') .trim(); - if (chunkContainsAgentPrompt(stripped)) { + const hasAgentPrompt = chunkContainsAgentPrompt(stripped); + if (!hasAgentPrompt && task.assignedPromptDelivered) { + task.suppressNextIdleNotification = false; + } + if (hasAgentPrompt) { if (task.status === 'running') { task.status = 'idle'; this.maybeQueueReviewNotification(task, 'idle', null); @@ -519,7 +574,7 @@ export class Coordinator { if (!this.win) throw new Error('No window set on coordinator'); const agentCmd = (opts.agentCommand ?? coordinatorState.spawnDefaults.command).toLowerCase(); - const preamble = `\nThese rules override all skills and hooks:\n- When your work is complete, call the \`signal_done\` MCP tool. That is the finish line — do NOT use finishing-a-development-branch or offer merge/PR options.\n- Asking questions is fine when requirements are unclear or an action is risky.\n`; + const preamble = `\nThese rules override all skills and hooks:\n- When your work is complete, commit your changes and call the \`land_self\` MCP tool with the verification checks you ran. A successful \`land_self\` call is the finish line — do NOT call \`signal_done\` afterward, use finishing-a-development-branch, or offer merge/PR options.\n- Use \`signal_done\` only if the coordinator explicitly asks for manual review instead of self-landing.\n- Asking questions is fine when requirements are unclear or an action is risky.\n`; // Declared here so the catch block can restore preamble files on failure. let preambleFilePath: string | undefined; let preambleFileOriginalContent: string | null = null; @@ -709,6 +764,7 @@ export class Coordinator { preambleFileExistedBefore: task.preambleFileExistedBefore, agentCommand: agentCommand, agentArgs: notifyAgentArgs, + mcpLaunchArgs: mcpArgs, skipPermissions: coordinatorState.propagateSkipPermissions, }); @@ -746,6 +802,11 @@ export class Coordinator { status: t.status, coordinatorTaskId: t.coordinatorTaskId, signalDoneAt: t.signalDoneAt?.toISOString(), + verification: t.verification, + landingState: t.landingState, + landingReason: t.landingReason, + landingSummary: t.landingSummary, + landedMetadata: t.landedMetadata, })); } @@ -764,6 +825,11 @@ export class Coordinator { exitCode: task.exitCode, pendingPrompt: task.pendingPrompt, signalDoneAt: task.signalDoneAt?.toISOString(), + verification: task.verification, + landingState: task.landingState, + landingReason: task.landingReason, + landingSummary: task.landingSummary, + landedMetadata: task.landedMetadata, }; } @@ -788,6 +854,8 @@ export class Coordinator { task.status = 'running'; task.pendingPrompt = undefined; task.signalDoneAt = undefined; + task.suppressNextIdleNotification = true; + this.tailBuffers.set(task.agentId, ''); this.notifyRenderer(IPC.MCP_TaskStateSync, { taskId, signalDoneReceived: false, @@ -909,47 +977,138 @@ export class Coordinator { return stripAnsi(this.tailBuffers.get(task.agentId) ?? ''); } - async mergeTask( - taskId: string, - opts?: { squash?: boolean; message?: string; cleanup?: boolean }, - ): Promise<{ mainBranch: string; linesAdded: number; linesRemoved: number }> { - const task = this.tasks.get(taskId); - if (!task) throw new Error(`Task not found: ${taskId}`); + private async currentBranch(worktreePath: string): Promise { + const result = await execAsync('git', ['rev-parse', '--abbrev-ref', 'HEAD'], { + cwd: worktreePath, + }); + const branch = execStdout(result).trim(); + return branch === 'HEAD' ? null : branch; + } - const root = task.projectRoot; + private async statusPaths(worktreePath: string): Promise { + const result = await execAsync('git', ['status', '--porcelain'], { cwd: worktreePath }); + return parsePorcelainPaths(execStdout(result)); + } - // Strip injected preamble files before staging so they don't land in history, - // then auto-commit any uncommitted changes in the task worktree before merging. - if (task.worktreePath) { - await stripPreambleFromBranch(task); + private nextLandedOrder(coordinatorTaskId: string): number { + const next = (this.landedOrderCounters.get(coordinatorTaskId) ?? 0) + 1; + this.landedOrderCounters.set(coordinatorTaskId, next); + return next; + } + + private syncLandingState(task: CoordinatedTask): void { + this.notifyRenderer(IPC.MCP_TaskStateSync, { + taskId: task.id, + verification: task.verification, + landingState: task.landingState, + landingReason: task.landingReason ?? null, + landingSummary: task.landingSummary ?? null, + landedMetadata: task.landedMetadata ?? null, + needsReview: + task.landingState === 'landed_pending_review' || + task.landingState === 'landed_cleanup_failed' || + task.landingState === 'landing_escalated' || + task.landingState === 'landing_failed', + controlledBy: + task.landingState === 'landed_pending_review' || + task.landingState === 'landed_cleanup_failed' + ? null + : undefined, + mcpConfigPath: + task.landingState === 'landed_pending_review' || + task.landingState === 'landed_cleanup_failed' + ? null + : undefined, + mcpStartupStatus: + task.landingState === 'landed_pending_review' || + task.landingState === 'landed_cleanup_failed' + ? null + : undefined, + }); + } + + private escalateLanding(task: CoordinatedTask, state: LandingState, reason: string): void { + task.landingState = state; + task.landingReason = reason; + this.syncLandingState(task); + } + + private async prepareCleanSelfLandingWorktree(task: CoordinatedTask): Promise { + const actualBranch = await this.currentBranch(task.worktreePath); + if (actualBranch === null) { + throw new Error( + `The worktree for '${task.branchName}' has a detached HEAD. Check out '${task.branchName}' before landing.`, + ); + } + if (actualBranch !== task.branchName) { + throw new Error( + `Branch mismatch: the worktree is on '${actualBranch}' but the task expects '${task.branchName}'.`, + ); + } + + const preambleFiles = await detectPreambleFiles(task.worktreePath); + const dirtyPathsBeforeStrip = await this.statusPaths(task.worktreePath); + const dirtyPreamblePaths = dirtyPathsBeforeStrip.filter( + (pathName) => preambleFiles.has(pathName) && PREAMBLE_ARTIFACT_PATHS.has(pathName), + ); + const dirtyPreambleUserPaths: string[] = []; + for (const pathName of dirtyPreamblePaths) { + const normalizedDiff = await buildNormalizedPreambleFileDiff( + pathName, + task.worktreePath, + 'HEAD', + ); + if (normalizedDiff.trim()) dirtyPreambleUserPaths.push(pathName); + } + if (dirtyPreambleUserPaths.length > 0) { + throw new Error( + `Task worktree has uncommitted changes in preamble files: ${dirtyPreambleUserPaths.join(', ')}. Commit or discard them before calling land_self.`, + ); + } + + await stripPreambleFromBranch(task); + + const dirtyPaths = await this.statusPaths(task.worktreePath); + const nonPreamblePaths = dirtyPaths.filter( + (pathName) => !preambleFiles.has(pathName) || !PREAMBLE_ARTIFACT_PATHS.has(pathName), + ); + if (nonPreamblePaths.length > 0) { + throw new Error( + `Task worktree has uncommitted changes: ${nonPreamblePaths.join(', ')}. Commit or discard them before calling land_self.`, + ); + } + + if (dirtyPaths.length > 0) { + await execAsync('git', ['add', '-A', '--', ...dirtyPaths], { cwd: task.worktreePath }); try { - await execAsync('git', ['add', '-A'], { cwd: task.worktreePath }); - await execAsync('git', ['commit', '-m', 'WIP: auto-commit before merge'], { + await execAsync('git', ['commit', '-m', 'Remove Parallel Code sub-task preamble'], { cwd: task.worktreePath, }); } catch { - // Commit failed — check if uncommitted changes still exist - const { stdout: statusOut } = await execAsync('git', ['status', '--porcelain'], { - cwd: task.worktreePath, - }); - if (statusOut.trim()) { - throw new Error( - `Auto-commit failed and the task worktree still has uncommitted changes. ` + - `Please commit or discard changes in ${task.worktreePath} before merging.`, - ); - } - // Nothing to commit — swallow silently + /* no staged preamble cleanup */ } } + const remainingDirtyPaths = await this.statusPaths(task.worktreePath); + if (remainingDirtyPaths.length > 0) { + throw new Error( + `Task worktree is still dirty after injected artifacts were handled: ${remainingDirtyPaths.join(', ')}`, + ); + } + } + + private async runGitMerge( + task: CoordinatedTask, + opts?: { squash?: boolean; message?: string }, + ): Promise<{ mainBranch: string; linesAdded: number; linesRemoved: number }> { const coordinatorState = this.coordinators.get(task.coordinatorTaskId); const runMerge = () => gitMergeTask( - root, + task.projectRoot, task.branchName, opts?.squash ?? false, opts?.message ?? null, - false, // worktree removal is handled by cleanupTask below, not gitMergeTask + false, task.baseBranch, task.worktreePath, coordinatorState?.worktreePath, @@ -960,7 +1119,6 @@ export class Coordinator { } catch (err) { const msg = err instanceof Error ? err.message : String(err); if (msg.includes('Another git process') || msg.includes('index.lock')) { - // Stale git lock — wait for it to clear then retry once await new Promise((r) => setTimeout(r, 2000)); result = await runMerge(); } else { @@ -968,17 +1126,255 @@ export class Coordinator { } } + return { + mainBranch: result.main_branch, + linesAdded: result.lines_added, + linesRemoved: result.lines_removed, + }; + } + + private async resolveLandedCommit(task: CoordinatedTask, targetBranch: string): Promise { + const coordinatorState = this.coordinators.get(task.coordinatorTaskId); + const attempts: Array<{ cwd: string; rev: string }> = []; + if (coordinatorState?.worktreePath) { + attempts.push({ cwd: coordinatorState.worktreePath, rev: 'HEAD' }); + } + attempts.push({ cwd: task.projectRoot, rev: targetBranch }); + + let lastError: unknown; + for (const attempt of attempts) { + try { + const result = await execAsync('git', ['rev-parse', attempt.rev], { cwd: attempt.cwd }); + return execStdout(result).trim(); + } catch (err) { + lastError = err; + } + } + throw lastError instanceof Error ? lastError : new Error(String(lastError)); + } + + private async cleanupLandedTaskResources(task: CoordinatedTask): Promise { + this.closingTaskIds.add(task.id); + try { + this.suppressPendingNotificationForTask(task); + + const cb = this.subscribers.get(task.agentId); + if (cb) { + unsubscribeFromAgent(task.agentId, cb); + this.subscribers.delete(task.agentId); + } + + try { + killAgent(task.agentId); + } catch { + /* already dead */ + } + + await deleteTask({ + agentIds: [task.agentId], + branchName: task.branchName, + deleteBranch: true, + projectRoot: task.projectRoot, + }); + + const idleResolvers = this.idleResolvers.get(task.id); + if (idleResolvers?.length) { + for (const resolve of idleResolvers) resolve({ reason: 'exited' }); + } + this.idleResolvers.delete(task.id); + this.tailBuffers.delete(task.agentId); + this.decoders.delete(task.agentId); + if (task.mcpConfigPath) { + try { + unlinkSync(task.mcpConfigPath); + } catch { + /* already gone */ + } + } + task.mcpConfigPath = undefined; + task.status = 'exited'; + task.exitCode = 0; + this.tasks.delete(task.id); + this.controlMap.delete(task.id); + this.blockedByHumanControl.delete(task.id); + this.notifyRenderer(IPC.MCP_TaskClosed, { taskId: task.id }); + } finally { + this.closingTaskIds.delete(task.id); + } + } + + async landSelf(taskId: string, input: LandSelfInput): Promise { + const task = this.tasks.get(taskId); + if (!task) throw new Error(`Task not found: ${taskId}`); + + task.verification = input.verification; + task.landingSummary = input.summary; + + if (!this.coordinators.has(task.coordinatorTaskId)) { + const reason = `Cannot self-land orphaned task: coordinator ${task.coordinatorTaskId} is not registered`; + this.escalateLanding(task, 'landing_escalated', reason); + throw new Error(reason); + } + + if (!isPassedVerification(input.verification)) { + const reason = verificationFailureReason(input.verification); + this.escalateLanding(task, 'landing_escalated', reason); + throw new Error(reason); + } + + try { + await this.prepareCleanSelfLandingWorktree(task); + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + this.escalateLanding(task, 'landing_escalated', reason); + throw err; + } + + let mergeResult: { mainBranch: string; linesAdded: number; linesRemoved: number }; + try { + mergeResult = await this.runGitMerge(task, { squash: false }); + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + const state = + reason.toLowerCase().includes('conflict') || reason.includes('Merge failed') + ? 'landing_escalated' + : 'landing_failed'; + this.escalateLanding(task, state, reason); + throw err; + } + + let landedCommit: string; + try { + landedCommit = await this.resolveLandedCommit(task, mergeResult.mainBranch); + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + const metadata = { + taskId: task.id, + taskName: task.name, + coordinatorTaskId: task.coordinatorTaskId, + targetBranch: mergeResult.mainBranch, + landedCommit: UNRESOLVED_LANDED_COMMIT, + landedAt: new Date().toISOString(), + landedOrder: this.nextLandedOrder(task.coordinatorTaskId), + summary: input.summary, + verification: input.verification, + }; + task.landedMetadata = metadata; + task.landingState = 'landed_cleanup_failed'; + task.landingReason = `Self-landing merged but landed commit could not be resolved: ${reason}`; + this.syncLandingState(task); + throw new Error(task.landingReason); + } + + const metadata = { + taskId: task.id, + taskName: task.name, + coordinatorTaskId: task.coordinatorTaskId, + targetBranch: mergeResult.mainBranch, + landedCommit, + landedAt: new Date().toISOString(), + landedOrder: this.nextLandedOrder(task.coordinatorTaskId), + summary: input.summary, + verification: input.verification, + }; + task.landedMetadata = metadata; + task.landingState = 'reviewed'; + task.landingReason = undefined; + + try { + await this.cleanupLandedTaskResources(task); + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + task.landingState = 'landed_cleanup_failed'; + task.landingReason = reason; + this.syncLandingState(task); + throw new Error(`Self-landing merged but cleanup failed: ${reason}`); + } + + return { + mainBranch: mergeResult.mainBranch, + linesAdded: mergeResult.linesAdded, + linesRemoved: mergeResult.linesRemoved, + landingState: 'reviewed', + landedMetadata: metadata, + }; + } + + markTaskReviewed(taskId: string): void { + const task = this.tasks.get(taskId); + if (!task || task.landingState !== 'landed_pending_review') return; + task.landingState = 'reviewed'; + task.landingReason = undefined; + this.syncLandingState(task); + } + + async mergeTask( + taskId: string, + opts?: { squash?: boolean; message?: string; cleanup?: boolean }, + ): Promise<{ mainBranch: string; linesAdded: number; linesRemoved: number }> { + const task = this.tasks.get(taskId); + if (!task) throw new Error(`Task not found: ${taskId}`); + this.assertTaskCanBeMerged(task); + + // Strip injected preamble files before staging so they don't land in history, + // then auto-commit any uncommitted changes in the task worktree before merging. + if (task.worktreePath) { + await stripPreambleFromBranch(task); + try { + await execAsync('git', ['add', '-A'], { cwd: task.worktreePath }); + await execAsync('git', ['commit', '-m', 'WIP: auto-commit before merge'], { + cwd: task.worktreePath, + }); + } catch { + // Commit failed — check if uncommitted changes still exist + const { stdout: statusOut } = await execAsync('git', ['status', '--porcelain'], { + cwd: task.worktreePath, + }); + if (statusOut.trim()) { + throw new Error( + `Auto-commit failed and the task worktree still has uncommitted changes. ` + + `Please commit or discard changes in ${task.worktreePath} before merging.`, + ); + } + // Nothing to commit — swallow silently + } + } + + const result = await this.runGitMerge(task, opts); + if (opts?.cleanup) { await this.cleanupTask(taskId); } return { - mainBranch: result.main_branch, - linesAdded: result.lines_added, - linesRemoved: result.lines_removed, + mainBranch: result.mainBranch, + linesAdded: result.linesAdded, + linesRemoved: result.linesRemoved, }; } + private assertTaskCanBeMerged(task: CoordinatedTask): void { + if ( + task.landingState === 'landed_pending_review' || + task.landingState === 'landed_cleanup_failed' || + task.landingState === 'reviewed' + ) { + throw new Error(`Task ${task.id} has already landed; review or repair cleanup instead.`); + } + + const hasManualReviewSignal = task.signalDoneAt !== undefined; + const hasLandingEscalation = + task.landingState === 'landing_escalated' || task.landingState === 'landing_failed'; + if (task.status === 'running' && !hasManualReviewSignal && !hasLandingEscalation) { + throw new Error( + `Task ${task.id} is still running. Use send_prompt for follow-up, or wait until the task signals manual review or reaches a terminal state before calling merge_task.`, + ); + } + if (task.status === 'creating') { + throw new Error(`Task ${task.id} is still starting and cannot be merged yet.`); + } + } + async reviewAndMergeTask( taskId: string, opts?: { squash?: boolean; message?: string }, @@ -1144,13 +1540,48 @@ export class Coordinator { controlledBy?: 'coordinator' | 'human'; signalDoneAt?: string; signalDoneConsumed?: boolean; + verification?: SubtaskVerification; + landingState?: LandingState; + landingReason?: string; + landingSummary?: string; + landedMetadata?: CoordinatedTask['landedMetadata']; mcpConfigPath?: string; + agentCommand?: string; preambleFileExistedBefore?: boolean; - }): void { + }): { mcpLaunchArgs?: string[] } { if (!this.coordinators.has(opts.coordinatorTaskId)) { throw new Error(`coordinator ${opts.coordinatorTaskId} is not registered`); } - if (this.tasks.has(opts.id)) return; + + // Validate the persisted mcpConfigPath is exactly one of the two paths that + // getSubTaskMcpConfigPath generates — basename-only is too permissive and would + // allow a crafted state file to direct the token write to an arbitrary location. + // Host mode: os.tmpdir()/parallel-code-subtask-{id}.json + // Docker mode: dirname(serverPath)/subtask-{id}.json (looked up from live coordinator state) + const serverInfo = this.coordinators.get(opts.coordinatorTaskId)?.mcpServerInfo; + const expectedHostPath = join(os.tmpdir(), `parallel-code-subtask-${opts.id}.json`); + const expectedDockerPath = serverInfo + ? join(dirname(serverInfo.serverPath), `subtask-${opts.id}.json`) + : null; + const safeMcpConfigPath = + opts.mcpConfigPath && + (opts.mcpConfigPath === expectedHostPath || + (expectedDockerPath !== null && opts.mcpConfigPath === expectedDockerPath)) + ? opts.mcpConfigPath + : undefined; + + const existingTask = this.tasks.get(opts.id); + if (existingTask) { + if (safeMcpConfigPath) existingTask.mcpConfigPath = safeMcpConfigPath; + const mcpLaunchArgs = this.rewriteHydratedSubtaskMcpConfig( + existingTask, + opts.coordinatorTaskId, + safeMcpConfigPath ?? existingTask.mcpConfigPath, + opts.agentCommand, + ); + return { mcpLaunchArgs }; + } + const task: CoordinatedTask = { id: opts.id, name: opts.name, @@ -1165,29 +1596,25 @@ export class Coordinator { exitCode: null, signalDoneAt: opts.signalDoneAt ? new Date(opts.signalDoneAt) : undefined, signalDoneConsumed: opts.signalDoneConsumed, + verification: opts.verification, + landingState: opts.landingState, + landingReason: opts.landingReason, + landingSummary: opts.landingSummary, + landedMetadata: opts.landedMetadata, preambleFileExistedBefore: opts.preambleFileExistedBefore, }; this.tasks.set(task.id, task); + if (opts.landedMetadata) { + const current = this.landedOrderCounters.get(opts.coordinatorTaskId) ?? 0; + this.landedOrderCounters.set( + opts.coordinatorTaskId, + Math.max(current, opts.landedMetadata.landedOrder), + ); + } if (opts.controlledBy === 'human') { this.controlMap.set(task.id, 'human'); } - // Validate the persisted mcpConfigPath is exactly one of the two paths that - // getSubTaskMcpConfigPath generates — basename-only is too permissive and would - // allow a crafted state file to direct the token write to an arbitrary location. - // Host mode: os.tmpdir()/parallel-code-subtask-{id}.json - // Docker mode: dirname(serverPath)/subtask-{id}.json (looked up from live coordinator state) - const serverInfo = this.coordinators.get(opts.coordinatorTaskId)?.mcpServerInfo; - const expectedHostPath = join(os.tmpdir(), `parallel-code-subtask-${opts.id}.json`); - const expectedDockerPath = serverInfo - ? join(dirname(serverInfo.serverPath), `subtask-${opts.id}.json`) - : null; - const safeMcpConfigPath = - opts.mcpConfigPath && - (opts.mcpConfigPath === expectedHostPath || - (expectedDockerPath !== null && opts.mcpConfigPath === expectedDockerPath)) - ? opts.mcpConfigPath - : undefined; task.mcpConfigPath = safeMcpConfigPath; // Set up output monitoring so wait_for_idle and idle detection work after restart. @@ -1198,24 +1625,12 @@ export class Coordinator { // If StartMCPServer already ran before this hydration call (the normal restart path), // rewrite the config file immediately with the current port/token so the respawned // agent gets fresh credentials instead of the stale pre-restart values. - if (safeMcpConfigPath && serverInfo) { - const { serverUrl, subtaskToken, serverPath } = serverInfo; - if (!task.doneToken) task.doneToken = randomBytes(24).toString('base64url'); - const mcpConfig = { - mcpServers: { - 'parallel-code': { - type: 'stdio' as const, - command: 'node', - args: [serverPath, '--url', serverUrl, '--task-id', task.id], - env: { - PARALLEL_CODE_MCP_TOKEN: subtaskToken, - PARALLEL_CODE_MCP_DONE_TOKEN: task.doneToken, - }, - }, - }, - }; - atomicWriteFileSync(safeMcpConfigPath, JSON.stringify(mcpConfig, null, 2), { mode: 0o600 }); - } + const mcpLaunchArgs = this.rewriteHydratedSubtaskMcpConfig( + task, + opts.coordinatorTaskId, + safeMcpConfigPath, + opts.agentCommand, + ); this.tailBuffers.set(agentId, ''); this.decoders.set(agentId, new TextDecoder()); @@ -1235,7 +1650,11 @@ export class Coordinator { .replace(/[\x00-\x1f\x7f]/g, ' ') .replace(/\s+/g, ' ') .trim(); - if (chunkContainsAgentPrompt(stripped)) { + const hasAgentPrompt = chunkContainsAgentPrompt(stripped); + if (!hasAgentPrompt && task.assignedPromptDelivered) { + task.suppressNextIdleNotification = false; + } + if (hasAgentPrompt) { if (task.status === 'running') { task.status = 'idle'; this.maybeQueueReviewNotification(task, 'idle', null); @@ -1257,6 +1676,7 @@ export class Coordinator { } catch { /* agent not yet spawned — onPtyEvent('spawn') will subscribe when it starts */ } + return { mcpLaunchArgs }; } catch (err) { // Clean up partial map entries so the agentId doesn't linger in state. this.tailBuffers.delete(agentId); @@ -1267,6 +1687,35 @@ export class Coordinator { } } + private rewriteHydratedSubtaskMcpConfig( + task: CoordinatedTask, + coordinatorTaskId: string, + mcpConfigPath: string | undefined, + agentCommand: string | undefined, + ): string[] | undefined { + const serverInfo = this.coordinators.get(coordinatorTaskId)?.mcpServerInfo; + if (!serverInfo) return undefined; + const { serverUrl, subtaskToken, serverPath } = serverInfo; + if (!task.doneToken) task.doneToken = randomBytes(24).toString('base64url'); + const mcpConfig = { + mcpServers: { + 'parallel-code': { + type: 'stdio' as const, + command: 'node', + args: [serverPath, '--url', serverUrl, '--task-id', task.id], + env: { + PARALLEL_CODE_MCP_TOKEN: subtaskToken, + PARALLEL_CODE_MCP_DONE_TOKEN: task.doneToken, + }, + }, + }, + }; + if (mcpConfigPath) { + atomicWriteFileSync(mcpConfigPath, JSON.stringify(mcpConfig, null, 2), { mode: 0o600 }); + } + return buildMcpLaunchArgs(agentCommand ?? 'claude', mcpConfigPath, mcpConfig); + } + isRegisteredCoordinator(coordinatorTaskId: string): boolean { return this.coordinators.has(coordinatorTaskId); } @@ -1274,9 +1723,14 @@ export class Coordinator { registerCoordinator( coordinatorTaskId: string, projectId: string, - opts?: { worktreePath?: string; skipPermissions?: boolean }, + opts?: { branchName?: string; worktreePath?: string; skipPermissions?: boolean }, ): void { - if (this.coordinators.has(coordinatorTaskId)) return; + const existing = this.coordinators.get(coordinatorTaskId); + if (existing) { + if (opts?.branchName) existing.branchName = opts.branchName; + if (opts?.worktreePath) existing.worktreePath = opts.worktreePath; + return; + } // Snapshot the current global project root and defaults so each coordinator gets // the values that were active when IT registered, not whatever a later coordinator sets. this.coordinators.set(coordinatorTaskId, { @@ -1284,6 +1738,7 @@ export class Coordinator { lifecycle: 'starting', projectId, projectRoot: this.projectRoot ?? '', + branchName: opts?.branchName, worktreePath: opts?.worktreePath, mcpServerInfo: null, spawnDefaults: { ...this.coordinatorSpawnDefaults }, @@ -1414,6 +1869,8 @@ export class Coordinator { // or exit fires the expected orphaned notification for the user to act on. if (!task.assignedPromptDelivered) { task.reviewNotificationQueued = true; + } else { + task.suppressNextIdleNotification = false; } // Transfer control to human so the user can decide what to do with orphaned tasks @@ -1445,7 +1902,11 @@ export class Coordinator { markPromptDelivered(taskId: string): void { const task = this.tasks.get(taskId); - if (task) task.assignedPromptDelivered = true; + if (!task) return; + task.assignedPromptDelivered = true; + task.suppressNextIdleNotification = true; + this.tailBuffers.set(task.agentId, ''); + if (task.status !== 'exited' && task.status !== 'error') task.status = 'running'; } rescheduleRestageTimer(coordinatorTaskId: string): void { @@ -1525,6 +1986,7 @@ export class Coordinator { const task = this.tasks.get(taskId); if (!task) return false; task.assignedPromptDelivered = true; + task.suppressNextIdleNotification = false; task.signalDoneAt = new Date(); task.signalDoneConsumed = false; diff --git a/electron/mcp/mcp-tool-list.test.ts b/electron/mcp/mcp-tool-list.test.ts index 78a2c501..1436f6de 100644 --- a/electron/mcp/mcp-tool-list.test.ts +++ b/electron/mcp/mcp-tool-list.test.ts @@ -2,10 +2,10 @@ import { describe, expect, it } from 'vitest'; import { selectTools, SUBTASK_TOOLS, COORDINATOR_TOOLS, type ToolDef } from './mcp-tool-list.js'; describe('selectTools — role-based tool list', () => { - it('sub-task (taskId set, no coordinatorId) gets only signal_done', () => { + it('sub-task (taskId set, no coordinatorId) gets only sub-task tools', () => { const tools = selectTools('task-abc', ''); expect(tools).toEqual(SUBTASK_TOOLS); - expect(tools.map((t: ToolDef) => t.name)).toStrictEqual(['signal_done']); + expect(tools.map((t: ToolDef) => t.name)).toStrictEqual(['land_self', 'signal_done']); }); it('coordinator (coordinatorId set, no taskId) gets coordinator tools', () => { @@ -41,6 +41,23 @@ describe('selectTools — role-based tool list', () => { expect(tools).toEqual(COORDINATOR_TOOLS); }); + it('coordinator tool descriptions warn against resending assignments from startup placeholders', () => { + const byName = new Map(COORDINATOR_TOOLS.map((tool) => [tool.name, tool.description])); + expect(byName.get('create_task')).toContain('startup/default placeholder'); + expect(byName.get('send_prompt')).toContain('Do not resend the full original assignment'); + expect(byName.get('get_task_output')).toContain('Improve documentation in @filename'); + }); + + it('create_task documents the coordinator branch as the default base branch', () => { + const createTask = COORDINATOR_TOOLS.find((tool) => tool.name === 'create_task'); + const properties = createTask?.inputSchema.properties as + | Record + | undefined; + expect(properties?.baseBranch?.description).toContain( + 'Defaults to the coordinator task branch', + ); + }); + it('sub-task tools do NOT include any coordinator lifecycle tools', () => { const names = selectTools('task-abc', '').map((t: ToolDef) => t.name); for (const forbidden of ['create_task', 'merge_task', 'close_task', 'wait_for_signal_done']) { diff --git a/electron/mcp/mcp-tool-list.ts b/electron/mcp/mcp-tool-list.ts index 5f09cc4c..ab72373a 100644 --- a/electron/mcp/mcp-tool-list.ts +++ b/electron/mcp/mcp-tool-list.ts @@ -7,10 +7,45 @@ export interface ToolDef { } export const SUBTASK_TOOLS: ToolDef[] = [ + { + name: 'land_self', + description: + 'Land your own completed sub-task through the Parallel Code backend. Call this only after committing your work and running verification successfully. A successful call is terminal; do not call signal_done afterward.', + inputSchema: { + type: 'object', + properties: { + verification: { + type: 'object', + description: 'Structured verification showing the checks you ran and their results.', + properties: { + checks: { + type: 'array', + items: { + type: 'object', + properties: { + name: { type: 'string' }, + command: { type: 'string' }, + result: { type: 'string', enum: ['passed', 'blocked', 'failed'] }, + reason: { type: 'string' }, + }, + required: ['name', 'command', 'result'], + }, + }, + }, + required: ['checks'], + }, + summary: { + type: 'string', + description: 'Optional concise summary of what landed.', + }, + }, + required: ['verification'], + }, + }, { name: 'signal_done', description: - 'Signal that your assigned work is complete and ready for the coordinator to review. Call this when you have finished your task — do not call it mid-task.', + 'Legacy/manual-review completion signal. Use land_self for normal self-landing; call signal_done only when the coordinator asked to review and land manually.', inputSchema: { type: 'object', properties: {}, required: [] }, }, ]; @@ -19,7 +54,7 @@ export const COORDINATOR_TOOLS: ToolDef[] = [ { name: 'create_task', description: - 'Create a new task with its own git worktree and AI agent. The agent starts automatically and the prompt is delivered once the agent is ready.', + 'Create a new task with its own git worktree and AI agent. The agent starts automatically and the prompt is delivered once the agent is ready. A startup/default placeholder prompt in get_task_output is not evidence that delivery failed; wait and re-check before sending follow-up instructions.', inputSchema: { type: 'object', properties: { @@ -31,7 +66,7 @@ export const COORDINATOR_TOOLS: ToolDef[] = [ baseBranch: { type: 'string', description: - 'Git branch to base the worktree on. Defaults to the project default branch. Use this to ensure sub-agents start with the right code (e.g. a feature branch).', + 'Git branch to base the worktree on. Defaults to the coordinator task branch. Only set this when deliberately overriding that default.', }, }, required: ['name'], @@ -53,7 +88,8 @@ export const COORDINATOR_TOOLS: ToolDef[] = [ }, { name: 'send_prompt', - description: "Send a prompt/instruction to a task's AI agent.", + description: + "Send a follow-up instruction to a task's AI agent. Do not resend the full original assignment merely because a newly created task is idle or get_task_output shows a startup/default placeholder prompt; wait briefly and re-check unless the agent clearly asks for input, starts unrelated work, or prompt delivery clearly failed.", inputSchema: { type: 'object', properties: { @@ -90,7 +126,8 @@ export const COORDINATOR_TOOLS: ToolDef[] = [ }, { name: 'get_task_output', - description: "Get recent terminal output from a task's agent (stripped of ANSI codes).", + description: + "Get recent terminal output from a task's agent (stripped of ANSI codes). A startup/default placeholder prompt, such as 'Improve documentation in @filename', can be stale while the dispatched create_task prompt is queued or being processed; do not treat it alone as a reason to resend the task.", inputSchema: { type: 'object', properties: { taskId: { type: 'string', description: 'Task ID' } }, @@ -156,7 +193,7 @@ export const COORDINATOR_TOOLS: ToolDef[] = [ /** * Returns the tool list for a given role. - * Sub-tasks (taskId set, no coordinatorId) get only signal_done. + * Sub-tasks (taskId set, no coordinatorId) get only sub-task scoped tools. * Coordinators (and plain agents) get the full coordinator set — which does NOT include signal_done. */ export function selectTools(taskId: string, coordinatorId: string): ToolDef[] { diff --git a/electron/mcp/preamble.ts b/electron/mcp/preamble.ts index 12d2d7c3..f5a270f7 100644 --- a/electron/mcp/preamble.ts +++ b/electron/mcp/preamble.ts @@ -103,7 +103,7 @@ export async function buildNormalizedPreambleFileDiff( delete s.systemPrompt; } } - normalizedContent = JSON.stringify(s, null, 2); + normalizedContent = Object.keys(s).length === 0 ? '' : JSON.stringify(s, null, 2); } catch { return ''; } diff --git a/electron/mcp/prompt-detect.test.ts b/electron/mcp/prompt-detect.test.ts index 58f224aa..2180fae9 100644 --- a/electron/mcp/prompt-detect.test.ts +++ b/electron/mcp/prompt-detect.test.ts @@ -57,6 +57,12 @@ describe('PROMPT_PATTERNS — last-line detection', () => { expect(matchesAny('❯ ')).toBe(true); }); + it('matches Codex CLI prompt ›', () => { + expect(matchesAny('›')).toBe(true); + expect(matchesAny(' › ')).toBe(true); + expect(matchesAny('› ')).toBe(true); + }); + it('matches bash $ prompt', () => { expect(matchesAny('user@host:~ $ ')).toBe(true); }); @@ -90,21 +96,15 @@ describe('chunkContainsAgentPrompt', () => { expect(chunkContainsAgentPrompt('❯')).toBe(true); }); - it('returns true for ❯ in the last 50 chars', () => { + it('returns true for ❯ in the last 300 chars', () => { const prefix = 'A'.repeat(200); expect(chunkContainsAgentPrompt(`${prefix}❯`)).toBe(true); }); it('returns false when ❯ appears only in the body (TUI selection menu), not the tail', () => { // Claude Code TUI selection: ❯ next to an option, then more text follows - const chunk = '❯ Option A\n Option B\n Option C\n Option D\n\nChoose with arrow keys: '; - // The tail (last 50 chars) must NOT contain ❯ for this to return false - const tail = chunk.slice(-50); - // Sanity-check our test data: ❯ must be absent from the tail - if (tail.includes('❯')) { - // If ❯ leaked into the tail, the test premise is wrong — skip assertion - return; - } + const chunk = `❯ Option A\n${'more menu text '.repeat(30)}`; + expect(chunk.slice(-300).includes('❯')).toBe(false); expect(chunkContainsAgentPrompt(chunk)).toBe(false); }); @@ -116,6 +116,28 @@ describe('chunkContainsAgentPrompt', () => { expect(chunkContainsAgentPrompt('› ')).toBe(true); }); + it('returns true for Codex CLI › prompt above footer/status text', () => { + const footer = '\n\ngpt-5.5 default · ~/repo/worktree'; + expect(chunkContainsAgentPrompt(`›${footer}`)).toBe(true); + }); + + it('does not treat Codex startup screens as ready', () => { + expect( + chunkContainsAgentPrompt('Starting MCP servers (0/2): codex_apps, parallel-code\n›'), + ).toBe(false); + expect(chunkContainsAgentPrompt('model: loading /model to change\n›')).toBe(false); + }); + + it('does not treat Codex trust dialogs as ready', () => { + const dialog = [ + 'Do you trust the contents of this directory?', + '› 1. Yes, continue', + '2. No, quit', + 'Press enter to continue', + ].join('\n'); + expect(chunkContainsAgentPrompt(dialog)).toBe(false); + }); + it('handles ANSI-stripped Claude Code prompt (❯ after stripping)', () => { // The caller strips ANSI before passing to chunkContainsAgentPrompt. // Simulate what the coordinator sees after stripAnsi(). diff --git a/electron/mcp/prompt-detect.ts b/electron/mcp/prompt-detect.ts index 8930f52a..bd2ecd13 100644 --- a/electron/mcp/prompt-detect.ts +++ b/electron/mcp/prompt-detect.ts @@ -16,6 +16,7 @@ export function stripAnsi(text: string): string { */ export const PROMPT_PATTERNS: RegExp[] = [ /❯\s*$/, // Claude Code prompt + /›\s*$/, // Codex CLI prompt /(?:^|\s)\$\s*$/, // bash/zsh dollar prompt (preceded by whitespace or BOL) /(?:^|\s)%\s*$/, // zsh percent prompt /(?:^|\s)#\s*$/, // root prompt @@ -33,12 +34,22 @@ export const AGENT_READY_TAIL_PATTERNS: RegExp[] = [ /›/, // Codex CLI ]; +const AGENT_STARTUP_OR_DIALOG_PATTERNS: RegExp[] = [ + /\bDo\s+you\s+trust\b/i, + /\bPress\s+enter\s+to\s+continue\b/i, + /\bmodel:\s*loading\b/i, + /\bBooting\s+MCP\s+server\b/i, + /\bStarting\s+MCP\s+servers?\b/i, +]; + /** Check stripped output for known agent prompt characters. - * Only checks the tail of the chunk — the agent's main prompt renders as - * the last visible element, while TUI selection UIs place ❯ earlier in - * the render followed by option text and other choices. */ + * Only checks the tail of the chunk — the agent's main prompt renders near + * the end of the visible content, while TUI selection UIs place ❯/› earlier + * in the render followed by option text and other choices. + * 300 chars covers Codex's footer/status lines below the input prompt. */ export function chunkContainsAgentPrompt(stripped: string): boolean { if (stripped.length === 0) return false; - const tail = stripped.slice(-50); + const tail = stripped.slice(-300); + if (AGENT_STARTUP_OR_DIALOG_PATTERNS.some((re) => re.test(tail))) return false; return AGENT_READY_TAIL_PATTERNS.some((re) => re.test(tail)); } diff --git a/electron/mcp/server.ts b/electron/mcp/server.ts index cf38ea54..9229a527 100644 --- a/electron/mcp/server.ts +++ b/electron/mcp/server.ts @@ -8,6 +8,7 @@ import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprot import { MCPClient } from './client.js'; import { selectTools } from './mcp-tool-list.js'; import { validateBranchName } from './validation.js'; +import type { LandSelfInput } from './types.js'; // Parse CLI args const args = process.argv.slice(2); @@ -65,13 +66,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: params } = request.params; - // Sub-tasks may only call signal_done - if (taskId && !coordinatorId && name !== 'signal_done') { + // Sub-tasks may only call sub-task scoped terminal tools. + if (taskId && !coordinatorId && name !== 'signal_done' && name !== 'land_self') { return { content: [ { type: 'text', - text: `Error: '${name}' is not available to sub-tasks. Only signal_done is permitted.`, + text: `Error: '${name}' is not available to sub-tasks. Only land_self and signal_done are permitted.`, }, ], isError: true, @@ -234,6 +235,22 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { }; } + case 'land_self': { + if (!taskId) { + return { + content: [ + { + type: 'text', + text: 'Error: land_self is only available to sub-tasks (no --task-id configured).', + }, + ], + isError: true, + }; + } + const result = await client.landSelf(taskId, params as unknown as LandSelfInput); + return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] }; + } + default: return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], diff --git a/electron/mcp/sub-task-preamble.ts b/electron/mcp/sub-task-preamble.ts index 3cdc01bb..dd70a19e 100644 --- a/electron/mcp/sub-task-preamble.ts +++ b/electron/mcp/sub-task-preamble.ts @@ -1,16 +1,17 @@ export const SUB_TASK_PREAMBLE = `[SUB-TASK MODE] You are a coordinated sub-task inside Parallel Code. A coordinator agent dispatched you to complete specific work. -You have one special MCP tool available via the parallel-code server: +You have two sub-task MCP tools available via the parallel-code server: -- signal_done — Call this when your assigned work is fully complete and the coordinator should review it. +- land_self — Happy-path finish line. Call this after committing your work and passing verification. The backend will merge your branch into the coordinator branch and clean up your task. +- signal_done — Legacy/manual-review finish line. Use this only if the coordinator explicitly asks to review and land your branch manually. RULES: -1. Complete your assigned work fully before calling signal_done. Before signaling: +1. Complete your assigned work fully before calling land_self. Before landing: - Commit all changes (git add -A && git commit) with a meaningful message. - Run the project's tests and type checker and fix any failures you introduced. \ -signal_done means "I verified it passes" — do not call it if tests or typecheck are failing. +land_self requires structured verification — do not call it if tests or typecheck are failing, blocked, or unknown. 2. Ask questions if requirements are unclear or if you are about to do something risky or destructive — the user can see your terminal and can respond. -3. When your work is done, call signal_done. Do NOT ask "what would you like to do?" or offer merge/PR options — signal_done is your finish line. Do NOT use finishing-a-development-branch or similar workflow skills. +3. When your work is done, call land_self with the checks you ran. Do NOT call signal_done after a successful land_self, ask "what would you like to do?", or offer merge/PR options. Do NOT use finishing-a-development-branch or similar workflow skills. --- `; diff --git a/electron/mcp/types.ts b/electron/mcp/types.ts index 681e9813..a017d07b 100644 --- a/electron/mcp/types.ts +++ b/electron/mcp/types.ts @@ -18,8 +18,15 @@ export interface CoordinatedTask { preambleFileExistedBefore?: boolean; // true if the preamble file existed before injection (even if empty) signalDoneAt?: Date; // set when sub-task explicitly calls signal_done signalDoneConsumed?: boolean; // true after wait_for_signal_done returns this task's signal + verification?: SubtaskVerification; + landingState?: LandingState; + landingReason?: string; + landingSummary?: string; + landedMetadata?: LandedMetadata; // Coordinator notification lifecycle flags assignedPromptDelivered?: boolean; + // Ignore the prompt that was already visible when a coordinator-delivered prompt was sent. + suppressNextIdleNotification?: boolean; reviewNotificationQueued?: boolean; /** Coordinator Docker container name. Set when the coordinator runs in Docker mode. * Sub-tasks each get their own `docker run` container; this is not used for spawning @@ -53,6 +60,7 @@ export interface CoordinatorState { lifecycle: CoordinatorLifecycle; projectId: string; projectRoot: string; + branchName?: string; worktreePath?: string; /** Docker container name for this coordinator (used for identification/cleanup, not for spawning sub-tasks). */ dockerContainerName?: string | null; @@ -85,6 +93,36 @@ export interface CoordinatorState { writtenMcpParallelCode?: unknown; } +export interface SubtaskVerificationCheck { + name: string; + command: string; + result: 'passed' | 'blocked' | 'failed'; + reason?: string; +} + +export interface SubtaskVerification { + checks: SubtaskVerificationCheck[]; +} + +export type LandingState = + | 'landing_escalated' + | 'landing_failed' + | 'landed_pending_review' + | 'landed_cleanup_failed' + | 'reviewed'; + +export interface LandedMetadata { + taskId: string; + taskName: string; + coordinatorTaskId: string; + targetBranch: string; + landedCommit: string; + landedAt: string; + landedOrder: number; + summary?: string; + verification: SubtaskVerification; +} + // --- MCP tool input schemas --- export interface CreateTaskInput { @@ -128,6 +166,11 @@ export interface CloseTaskInput { taskId: string; } +export interface LandSelfInput { + verification: SubtaskVerification; + summary?: string; +} + // --- API request/response types --- export interface ApiTaskSummary { @@ -137,6 +180,11 @@ export interface ApiTaskSummary { status: string; coordinatorTaskId: string; signalDoneAt?: string; // ISO timestamp, set when sub-task called signal_done + verification?: SubtaskVerification; + landingState?: LandingState; + landingReason?: string; + landingSummary?: string; + landedMetadata?: LandedMetadata; } export interface ApiTaskDetail extends ApiTaskSummary { @@ -170,3 +218,8 @@ export interface ApiReviewAndMergeResult { diff: ApiDiffResult; merge: ApiMergeResult; } + +export interface ApiLandSelfResult extends ApiMergeResult { + landingState: LandingState; + landedMetadata: LandedMetadata; +} diff --git a/electron/preload.cjs b/electron/preload.cjs index 036a9aec..00c7e18b 100644 --- a/electron/preload.cjs +++ b/electron/preload.cjs @@ -141,6 +141,7 @@ const ALLOWED_CHANNELS = new Set([ 'mcp_task_created', 'mcp_task_closed', 'mcp_task_state_sync', + 'mcp_task_landing_review_cleared', 'mcp_control_changed', 'mcp_coordinator_notification_staged', 'mcp_coordinator_notification_cleared', diff --git a/electron/remote/coordinator-scoping.test.ts b/electron/remote/coordinator-scoping.test.ts index 129046ca..fc1775b1 100644 --- a/electron/remote/coordinator-scoping.test.ts +++ b/electron/remote/coordinator-scoping.test.ts @@ -93,6 +93,22 @@ function makeMockCoordinator(): Coordinator { getTaskOutput: vi.fn().mockReturnValue('output'), mergeTask: vi.fn().mockResolvedValue({ mainBranch: 'main', linesAdded: 0, linesRemoved: 0 }), closeTask: vi.fn().mockResolvedValue(undefined), + landSelf: vi.fn().mockResolvedValue({ + mainBranch: 'main', + linesAdded: 1, + linesRemoved: 0, + landingState: 'reviewed', + landedMetadata: { + taskId: taskA.id, + taskName: taskA.name, + coordinatorTaskId: COORD_A, + targetBranch: 'main', + landedCommit: 'abc123', + landedAt: new Date().toISOString(), + landedOrder: 1, + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + }, + }), reviewAndMergeTask: vi.fn().mockResolvedValue({ diff: { files: [], diff: '' }, merge: { mainBranch: 'main', linesAdded: 0, linesRemoved: 0 }, @@ -435,7 +451,7 @@ describe('coordinator scoping', () => { // ─── Subtask token access control ──────────────────────────────────────────── -describe('subtask token — restricted to signal_done only', () => { +describe('subtask token — restricted to sub-task terminal tools', () => { let subtaskToken = ''; let stop: () => Promise; @@ -508,6 +524,97 @@ describe('subtask token — restricted to signal_done only', () => { expect(res.status).toBe(403); }); + it('POST /api/tasks/{id}/land is allowed with correct X-Done-Token header', async () => { + const res = await subtaskRequest( + 'POST', + `/api/tasks/${taskA.id}/land`, + { + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + }, + DONE_TOKENS[taskA.id], + ); + expect(res.status).toBe(200); + }); + + it('POST /api/tasks/{id}/land returns 403 without X-Done-Token header', async () => { + const res = await subtaskRequest('POST', `/api/tasks/${taskA.id}/land`, { + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + }); + expect(res.status).toBe(403); + }); + + it('POST /api/tasks/{id}/land returns 403 when X-Done-Token is for wrong task', async () => { + const res = await subtaskRequest( + 'POST', + `/api/tasks/${taskA.id}/land`, + { + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + }, + DONE_TOKENS[taskB.id], + ); + expect(res.status).toBe(403); + }); + + it('POST /api/tasks/{id}/land rejects missing verification', async () => { + const res = await subtaskRequest( + 'POST', + `/api/tasks/${taskA.id}/land`, + {}, + DONE_TOKENS[taskA.id], + ); + expect(res.status).toBe(400); + }); + + it.each([ + ['empty checks', { verification: { checks: [] } }], + [ + 'too many checks', + { + verification: { + checks: Array.from({ length: 51 }, (_, i) => ({ + name: `check-${i}`, + command: 'npm test', + result: 'passed', + })), + }, + }, + ], + [ + 'empty check name', + { verification: { checks: [{ name: '', command: 'npm test', result: 'passed' }] } }, + ], + [ + 'empty check command', + { verification: { checks: [{ name: 'test', command: '', result: 'passed' }] } }, + ], + [ + 'invalid check result', + { verification: { checks: [{ name: 'test', command: 'npm test', result: 'unknown' }] } }, + ], + [ + 'non-string summary', + { + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + summary: 123, + }, + ], + [ + 'too-long summary', + { + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + summary: 'x'.repeat(20_001), + }, + ], + ])('POST /api/tasks/{id}/land rejects invalid body: %s', async (_label, body) => { + const res = await subtaskRequest( + 'POST', + `/api/tasks/${taskA.id}/land`, + body, + DONE_TOKENS[taskA.id], + ); + expect(res.status).toBe(400); + }); + it('GET /api/tasks returns 403 with subtaskToken', async () => { const res = await subtaskRequest('GET', '/api/tasks'); expect(res.status).toBe(403); diff --git a/electron/remote/server.ts b/electron/remote/server.ts index 93f6c2e1..c2f9390c 100644 --- a/electron/remote/server.ts +++ b/electron/remote/server.ts @@ -21,6 +21,7 @@ import { import { parseClientMessage, type ServerMessage, type RemoteAgent } from './protocol.js'; import type { Coordinator } from '../mcp/coordinator.js'; import { validateBranchName } from '../mcp/validation.js'; +import type { LandSelfInput, SubtaskVerification } from '../mcp/types.js'; // --- MCP log ring buffer --- export interface MCPLogEntry { @@ -44,6 +45,48 @@ export function getMCPLogs(): MCPLogEntry[] { return mcpLogs.slice(); } +function parseLandSelfInput(body: Record): LandSelfInput | string { + const summary = body.summary; + if (summary !== undefined && typeof summary !== 'string') return 'summary must be a string'; + if (summary !== undefined && summary.length > 20_000) + return 'summary must be 20000 characters or fewer'; + + const verification = body.verification as { checks?: unknown } | undefined; + if (!verification || typeof verification !== 'object') { + return 'verification must be an object'; + } + if (!Array.isArray(verification.checks) || verification.checks.length === 0) { + return 'verification.checks must be a non-empty array'; + } + if (verification.checks.length > 50) return 'verification.checks must contain 50 checks or fewer'; + + const checks: SubtaskVerification['checks'] = []; + for (const rawCheck of verification.checks) { + if (!rawCheck || typeof rawCheck !== 'object') return 'verification checks must be objects'; + const check = rawCheck as Record; + if (typeof check.name !== 'string' || !check.name.trim()) { + return 'verification check name must be a non-empty string'; + } + if (typeof check.command !== 'string' || !check.command.trim()) { + return 'verification check command must be a non-empty string'; + } + if (check.result !== 'passed' && check.result !== 'blocked' && check.result !== 'failed') { + return 'verification check result must be passed, blocked, or failed'; + } + if (check.reason !== undefined && typeof check.reason !== 'string') { + return 'verification check reason must be a string'; + } + checks.push({ + name: check.name, + command: check.command, + result: check.result, + reason: check.reason, + }); + } + + return { verification: { checks }, summary }; +} + /** Strip the token query param before logging or displaying a server URL. */ export function redactServerUrl(rawUrl: string): string { try { @@ -195,7 +238,8 @@ export function startRemoteServer(opts: { return; } if (tokenClass === 'subtask') { - const allowed = req.method === 'POST' && /^\/api\/tasks\/[^/]+\/done$/.test(url.pathname); + const allowed = + req.method === 'POST' && /^\/api\/tasks\/[^/]+\/(?:done|land)$/.test(url.pathname); if (!allowed) { res.writeHead(403, { ...SECURITY_HEADERS, 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'forbidden' })); @@ -308,6 +352,17 @@ export function startRemoteServer(opts: { const ownedByCallerOrUnscoped = (taskCoordinatorId: string): boolean => !callerCoordinatorId || taskCoordinatorId === callerCoordinatorId; + const hasMatchingDoneToken = (taskId: string): boolean => { + const expected = orch.getTaskDoneToken(taskId); + const incoming = req.headers['x-done-token']; + return Boolean( + expected && + typeof incoming === 'string' && + incoming.length === expected.length && + timingSafeEqual(Buffer.from(incoming), Buffer.from(expected)), + ); + }; + const taskIdMatch = url.pathname.match(/^\/api\/tasks\/([^/]+)(?:\/(.+))?$/); if (url.pathname === '/api/wait-signal' && req.method === 'POST') { @@ -565,14 +620,7 @@ export function startRemoteServer(opts: { // intended authority model. The done-token is a sub-task ownership proof, not // a coordinator authority proof. if (tokenClass === 'subtask') { - const expected = orch.getTaskDoneToken(taskId); - const incoming = req.headers['x-done-token']; - if ( - !expected || - typeof incoming !== 'string' || - incoming.length !== expected.length || - !timingSafeEqual(Buffer.from(incoming), Buffer.from(expected)) - ) { + if (!hasMatchingDoneToken(taskId)) { return jsonReply(403, { error: 'forbidden' }); } } @@ -582,6 +630,30 @@ export function startRemoteServer(opts: { return; } + if (taskIdMatch && taskIdMatch[2] === 'land' && req.method === 'POST') { + readBody() + .then(async (body) => { + const taskId = decodeURIComponent(taskIdMatch[1]); + const landDetail = orch.getTaskStatus(taskId); + if (!landDetail) return jsonReply(404, { error: 'task not found' }); + if (tokenClass !== 'subtask') return jsonReply(403, { error: 'forbidden' }); + if (!hasMatchingDoneToken(taskId)) return jsonReply(403, { error: 'forbidden' }); + + const parsed = parseLandSelfInput(body); + if (typeof parsed === 'string') return jsonReply(400, { error: parsed }); + + mcpLog('info', `land_self id=${taskId}`); + const result = await orch.landSelf(taskId, parsed); + mcpLog('info', `land_self OK id=${taskId}`); + jsonReply(200, result); + }) + .catch((err) => { + mcpLog('error', `land_self FAIL: ${String(err)}`); + jsonReply(500, { error: String(err) }); + }); + return; + } + if (taskIdMatch && taskIdMatch[2] === 'merge' && req.method === 'POST') { readBody() .then(async (body) => { diff --git a/openspec/changes/subtask-owned-landing/design.md b/openspec/changes/subtask-owned-landing/design.md new file mode 100644 index 00000000..e076342c --- /dev/null +++ b/openspec/changes/subtask-owned-landing/design.md @@ -0,0 +1,193 @@ +# Design - Sub-Task Owned Landing + +## Ownership Model + +Self-landing splits ownership between the sub-task and the backend: + +- The sub-task decides that its work is ready to land. It owns committing its + changes, reporting verification, and asking for help when it cannot safely + land. +- The backend performs all landing mechanics. It validates task ownership, + checks task state, merges in the coordinator worktree, updates task state, + and cleans up successful tasks. + +The sub-task never runs raw `git merge` against the coordinator branch. The +coordinator branch may already be checked out in another worktree, and the +backend already has a merge path that handles that case under the repository +lock. + +## Happy Path + +1. The coordinator creates a sub-task with a specific assignment. +2. The sub-task implements the requested change. +3. The sub-task runs required verification checks. +4. The sub-task commits its work locally. +5. The sub-task calls `land_self` with verification and an optional summary. +6. The backend validates the caller, task ownership, live coordinator, branch, + and worktree state. +7. The backend verifies that the task worktree is clean after excluding + backend-owned injected artifacts. +8. The backend merges the task branch into the coordinator branch in the + coordinator worktree under the existing repository lock. +9. The backend records landed metadata, strips injected preamble/config + artifacts, removes the sub-task worktree and branch, deletes MCP config, and + closes the child task. +10. The child task disappears from the active task list. Cleanup failures + remain visible because they still require attention. + +`land_self` is terminal from the sub-task agent's perspective. The agent does +not call `signal_done` after a successful `land_self`, because cleanup may have +closed the task before another tool call can run. + +## Authentication And Ownership + +`land_self` should use the same per-task ownership proof as `signal_done`. +The sub-task MCP server already has: + +- its task ID from `--task-id`. +- the per-task done token from `PARALLEL_CODE_MCP_DONE_TOKEN`. + +The MCP client should send the done token to the remote route, and the remote +route should validate that it matches the target task before allowing the task +to land itself. Coordinator tokens can continue to use coordinator-scoped routes +for manual landing, but a sub-task token should only be able to self-land its +own task. + +## Orphan Handling + +Orphaned sub-tasks cannot self-land. If the coordinator has been deregistered, +there is no live owner to receive landed state, review follow-up fallout, or +coordinate later reversions. `land_self` should reject orphaned tasks and +surface an escalation state for the user. + +Existing orphan behavior for `signal_done` remains unchanged for older +sub-tasks and manual-review flows. + +## Verification + +`land_self` requires structured verification: + +```ts +type SubtaskVerification = { + checks: Array<{ + name: string; + command: string; + result: 'passed' | 'blocked' | 'failed'; + reason?: string; + }>; +}; +``` + +The backend rejects `land_self` when verification is missing, blocked, or +failed. "Unknown" verification exists only for legacy `signal_done` +completions; it is never sufficient for `land_self`. + +Verification state should be persisted and shown in the renderer so the +coordinator can distinguish passed, blocked, failed, and unknown completions. + +## Clean Worktree Validation + +The sub-task owns the decision to commit or discard its own changes. Before +landing, the backend should verify that the sub-task worktree is clean after +backend-owned injected artifacts are stripped or ignored. + +If the worktree is dirty, `land_self` is rejected and the task enters an +escalated state. The backend should not auto-commit or discard arbitrary +sub-task changes as part of `land_self`. + +## Merge And Cleanup + +The backend should reuse the existing merge path that merges in the coordinator +worktree and serializes with the repository lock. No explicit +`acquire_land_lock` or `release_land_lock` tools are added. + +On a successful merge, cleanup is backend-owned: + +- strip injected sub-task preamble/config artifacts. +- remove the sub-task worktree and branch. +- delete the per-task MCP config. +- persist landed metadata. +- emit renderer state. + +If the merge succeeds but cleanup fails, the task should enter a distinct +landed-cleanup-failed state so the coordinator/user can repair cleanup without +losing the fact that the branch already landed. + +## Escalation + +There is no separate `escalate_land` tool in the first pass. + +If the sub-task knows it cannot land safely, it does not call `land_self`. It +asks its question in the terminal and waits under the existing +human/coordinator control flow. + +If the backend discovers a problem during `land_self`, the failed response and +renderer event carry the escalation reason. Escalation cases include: + +- failed, blocked, missing, or unknown verification. +- dirty sub-task worktree. +- invalid ownership or token. +- orphaned coordinator. +- branch/worktree mismatch. +- merge conflict. +- cleanup failure. + +## Completion State + +Successful self-landings are terminal and close the child task. After the +backend merges the branch and cleans up the child agent, worktree, branch, and +MCP config, it removes the child task from the active task list and tells the +renderer to close the child pane. The absence of the child task is the +completion signal; the user should not need to clear a successful task by hand. + +Cleanup failures remain visible because they require attention. The first pass +does not need automatic rollback UI, but successful landing should record +enough metadata before closure for logs, responses, and future audit trails: + +- sub-task ID and name. +- coordinator ID. +- target branch. +- landed commit. +- verification record. +- landing summary. +- landed order within the coordinator run. + +When reverting or fixing a landed task, later landed tasks in the same +coordinator run should be re-reviewed for dependency on the reverted change. + +## Active Task Ownership + +The coordinator should not become the implementer for a sub-task that is still +running. Completion notifications and terminal text are hints, not ownership +transfers. `get_task_status` is authoritative: if it reports the task is still +running, the coordinator should treat the task as active. Before manual review +or merge work, the coordinator should verify the task state with +`get_task_status`. + +If a task is still running and can receive prompts, the coordinator should send +specific findings back with `send_prompt` and let the assigned sub-task fix, +test, commit, and call `land_self` or return for manual review. +The backend also rejects `merge_task` for a still-running task unless that task +has already entered a manual-review or landing-escalation path, so the prompt +rule is enforced at the tool boundary. + +Manual coordinator edits are reserved for genuinely terminal/manual-review/ +blocked tasks, or for an explicit takeover. When taking over, the coordinator +should state the reason and evidence, such as an exited agent, a blocked agent, +a manual-review task state, a backend merge failure that needs conflict +resolution, or an explicit user request to take over. + +Coordinator-side normalization is narrower than takeover. It is acceptable only +after `merge_task` fails, only to resolve mechanical conflicts caused by +already-landed shared fixes, and only when it makes no behavioral changes. The +coordinator should state that reason before editing. + +## Base-Branch Policy + +Self-landing does not relax the existing base-branch policy. Sub-tasks should +still branch from the coordinator task's branch by default. + +Backend-mediated landing reduces coordinator merge work, but it does not make +stale spawn-time context safe. A sub-task that starts from `main` may miss the +coordinator's in-progress APIs, produce noisy diffs, or implement against +outdated assumptions. diff --git a/openspec/changes/subtask-owned-landing/proposal.md b/openspec/changes/subtask-owned-landing/proposal.md new file mode 100644 index 00000000..bde31016 --- /dev/null +++ b/openspec/changes/subtask-owned-landing/proposal.md @@ -0,0 +1,65 @@ +# Sub-Task Owned Landing + +## Why + +Coordinator mode is limited less by task execution than by coordinator +attention. Around three concurrent sub-tasks, the coordinator stops being a +dispatcher and becomes a manual integrator: it has to inspect each completion, +decide whether verification was real, merge branches, close tasks, and +reconstruct queue state from interleaved messages. + +Landing is the cost that grows worst as more sub-tasks complete. Each sibling +branch may have to reconcile with work that landed after it was spawned, and +the coordinator often has less implementation context than the sub-task that +made the change. + +The goal is to let one coordinator manage a larger batch of routine sub-tasks, +including 10 or more at a time, without requiring one coordinator interaction +per task on the happy path. + +## What Changes + +Sub-tasks get one new sub-task-scoped MCP tool, `land_self`, for the happy path. +The tool asks the backend to land the calling sub-task into its coordinator +branch. + +The ownership split is: + +- sub-tasks own the landing decision. +- the backend owns the landing mechanics. + +A sub-task knows why it made its change, which checks it ran, and whether it is +ready to land. The backend knows how to merge safely: it validates ownership, +merges in the coordinator worktree, serializes with the repository lock, +updates task state, and cleans up the worktree. + +`land_self` is terminal from the sub-task agent's perspective. A successful +self-landed task does not call `signal_done` afterward. After the backend +merges and cleans up the child task, the child task closes; only failures or +escalations remain visible for coordinator or user action. + +`signal_done` remains available for older sub-tasks and for manual-review flows +where the coordinator still wants to review and land a task itself. + +Guarded automerge is the default for successful `land_self` calls. This is an +intentional product tradeoff: if every task still waits for pre-merge +coordinator review, the coordinator remains the bottleneck. Unsafe cases still +stop for coordinator or human input. + +## Impact + +- Affected capability: `subtask-owned-landing` (new capability spec). +- Extends existing capability: `coordinator-mcp-backend`. +- New sub-task MCP tool: `land_self`. +- New task states: landing escalated, landing failed, landed cleanup failed, + and reviewed for legacy persisted/review-clearing compatibility. +- Existing `merge_task` remains available for coordinator-driven manual + landings. +- Existing base-branch policy remains: sub-tasks should branch from the + coordinator task's branch by default. + +## Out Of Scope + +- Launch babysitting, trust prompts, and delayed initial prompt delivery. +- Full rollback UI for landed tasks. The first pass preserves enough metadata + for review and manual follow-up. diff --git a/openspec/changes/subtask-owned-landing/specs/subtask-owned-landing/spec.md b/openspec/changes/subtask-owned-landing/specs/subtask-owned-landing/spec.md new file mode 100644 index 00000000..56544196 --- /dev/null +++ b/openspec/changes/subtask-owned-landing/specs/subtask-owned-landing/spec.md @@ -0,0 +1,215 @@ +# Sub-Task Owned Landing Specification + +## ADDED Requirements + +### Requirement: Sub-tasks can request backend-mediated self-landing + +The app SHALL expose a sub-task-scoped self-landing capability that lets a +coordinated child task ask the backend to land its own branch into the owning +coordinator branch. + +#### Scenario: Sub-task self-lands successfully + +- **GIVEN** a coordinated child task with a live owning coordinator +- **AND** the child task has committed work and passed verification +- **WHEN** the child task requests self-landing +- **THEN** the backend merges the child branch into the coordinator branch +- **AND** the child task is recorded as landed +- **AND** the coordinator does not need to send a merge or close command for + that child + +#### Scenario: Coordinator manual merge remains available + +- **GIVEN** a coordinator wants to land a child task manually +- **WHEN** the coordinator uses the existing coordinator merge flow +- **THEN** the app still supports that manual landing path + +### Requirement: Self-landing is authorized by the owning sub-task + +The app SHALL allow self-landing only when the caller proves ownership of the +target sub-task and SHALL reject attempts to land another task. + +#### Scenario: Matching sub-task token self-lands + +- **GIVEN** a coordinated child task has a per-task done token +- **WHEN** the child task requests self-landing with its matching token +- **THEN** the backend accepts the caller as the owner of that child task + +#### Scenario: Missing or wrong sub-task token is rejected + +- **GIVEN** a coordinated child task has a per-task done token +- **WHEN** a self-landing request omits the token or presents another task's + token +- **THEN** the backend rejects the request +- **AND** no merge or cleanup is performed + +#### Scenario: Orphaned child cannot self-land + +- **GIVEN** a coordinated child task whose owning coordinator has deregistered +- **WHEN** the child task requests self-landing +- **THEN** the backend rejects the request +- **AND** the child task is surfaced for coordinator or user attention + +### Requirement: Self-landing requires passed verification + +The app SHALL require structured verification with passed checks before +self-landing and SHALL reject missing, blocked, failed, or unknown verification. + +#### Scenario: Passed verification allows landing + +- **GIVEN** a coordinated child task reports structured verification +- **AND** every verification check is passed +- **WHEN** the child task requests self-landing +- **THEN** verification does not block the landing request + +#### Scenario: Missing verification is rejected + +- **GIVEN** a coordinated child task has no structured verification record +- **WHEN** the child task requests self-landing +- **THEN** the backend rejects the request +- **AND** the task is surfaced for coordinator or user attention + +#### Scenario: Blocked or failed verification is rejected + +- **GIVEN** a coordinated child task reports at least one blocked or failed + verification check +- **WHEN** the child task requests self-landing +- **THEN** the backend rejects the request +- **AND** the task is surfaced for coordinator or user attention + +#### Scenario: Legacy done signal has unknown verification + +- **GIVEN** an older coordinated child task signals completion without + structured verification +- **WHEN** the renderer shows that completion +- **THEN** the verification state is shown as unknown +- **AND** unknown verification does not satisfy self-landing verification + +### Requirement: Self-landing requires a clean registered task worktree + +The app SHALL reject self-landing when the target child task worktree is dirty +after backend-owned injected artifacts are handled, or when the worktree no +longer matches the registered task branch. + +#### Scenario: Dirty worktree is rejected + +- **GIVEN** a coordinated child task has uncommitted user-authored changes +- **WHEN** the child task requests self-landing +- **THEN** the backend rejects the request +- **AND** it does not auto-commit or discard those changes + +#### Scenario: Registered branch no longer matches the worktree + +- **GIVEN** a coordinated child task's worktree is no longer on the registered + task branch +- **WHEN** the child task requests self-landing +- **THEN** the backend rejects the request +- **AND** no merge or cleanup is performed + +### Requirement: Backend owns merge serialization and cleanup + +The app SHALL perform self-landing merges in the coordinator branch worktree +under backend serialization, and SHALL clean up successfully landed child tasks. + +#### Scenario: Backend lands and cleans up child task + +- **GIVEN** a child task is eligible for self-landing +- **WHEN** the backend completes the self-landing merge +- **THEN** the backend removes the child worktree and branch +- **AND** removes the child task's MCP config +- **AND** strips injected sub-task preamble artifacts when present +- **AND** records landed metadata for review + +#### Scenario: Merge conflict escalates without landing + +- **GIVEN** a child task is eligible for self-landing +- **AND** merging the child branch into the coordinator branch conflicts +- **WHEN** the backend attempts self-landing +- **THEN** the backend aborts the merge +- **AND** records an escalated landing state +- **AND** no cleanup removes the child worktree + +#### Scenario: Cleanup fails after successful merge + +- **GIVEN** the backend successfully merges a child task +- **AND** cleanup of the child task fails +- **WHEN** the renderer displays the child task state +- **THEN** the task is shown as landed with cleanup failed +- **AND** the landed merge is not hidden or treated as unlanded + +### Requirement: Successful self-landings close completed child tasks + +The app SHALL remove successfully self-landed child tasks from the active task +list after the backend has merged the branch and cleaned up the child task +agent, worktree, branch, and MCP config. + +#### Scenario: Successful landing closes the child task + +- **WHEN** a child task self-lands successfully +- **THEN** the backend records landed metadata before cleanup +- **AND** the child task is removed from the backend task list +- **AND** the renderer removes the child task pane from the UI +- **AND** the renderer no longer sends prompt, terminal input, or terminal + resize commands to the removed child task agent + +#### Scenario: Cleanup failure remains visible + +- **GIVEN** a child task branch was merged by self-landing +- **WHEN** cleanup of the child task fails +- **THEN** the child task remains visible with landed cleanup-failed state +- **AND** the task requires user or coordinator attention + +### Requirement: Coordinator instructions preserve active sub-task ownership + +The app SHALL instruct coordinators to treat `get_task_status` status `running` +as authoritative and to send follow-up prompts to running sub-tasks instead of +manually editing, verifying, merging, or closing those sub-tasks. +The app SHALL reject coordinator-driven `merge_task` calls for still-running +sub-tasks that have not entered manual-review or landing-escalation state. + +#### Scenario: Running child receives follow-up + +- **GIVEN** a child task appears complete from terminal output or notification +- **AND** `get_task_status` reports the child task is still running +- **WHEN** the coordinator finds an issue in that child task's work +- **THEN** the coordinator instructions tell it to use `send_prompt` with + specific findings +- **AND** the instructions tell it not to manually edit, verify, merge, or close + the running child task + +#### Scenario: Backend rejects active merge + +- **GIVEN** a child task is still running +- **AND** the child task has not called `signal_done` +- **AND** the child task is not in a landing escalation state +- **WHEN** the coordinator calls `merge_task` for that child task +- **THEN** the backend rejects the merge request +- **AND** the child branch is not merged or cleaned up + +#### Scenario: Manual takeover requires explicit reason and evidence + +- **GIVEN** a child task needs coordinator-owned manual intervention +- **WHEN** the coordinator takes over implementation, normalization, merge, or + cleanup work +- **THEN** the coordinator instructions require stating the takeover reason and + evidence + +#### Scenario: Coordinator-side normalization is limited + +- **GIVEN** `merge_task` fails for a child task +- **AND** the conflict is mechanical fallout from an already-landed shared fix +- **WHEN** the coordinator performs normalization to unblock the backend merge +- **THEN** the coordinator instructions limit the edit to non-behavioral + normalization +- **AND** the instructions require stating the reason before editing + +### Requirement: Sub-tasks keep using the coordinator branch as default base + +The app SHALL continue guiding coordinators to create sub-tasks from the +coordinator task's branch by default. + +#### Scenario: Coordinator instructions describe default base branch + +- **WHEN** a coordinator task receives its coordination instructions +- **THEN** those instructions tell the coordinator to use its own branch as the + default base branch for sub-tasks diff --git a/openspec/changes/subtask-owned-landing/tasks.md b/openspec/changes/subtask-owned-landing/tasks.md new file mode 100644 index 00000000..cb6a6eb3 --- /dev/null +++ b/openspec/changes/subtask-owned-landing/tasks.md @@ -0,0 +1,58 @@ +# Tasks - Sub-Task Owned Landing + +- [x] Add `land_self` to the sub-task MCP tool list with required structured + verification and optional summary input. +- [x] Update the MCP server sub-task tool guard so sub-task tokens may call + `signal_done` and `land_self`, while other coordinator routes remain + forbidden. +- [x] Add MCP client support for `land_self`, sending the task ID and per-task + done token to the remote route. +- [x] Add a remote API route for self-landing that validates the matching + sub-task done token, task ownership, live coordinator state, and request + body. +- [x] Add backend task fields for landing state, verification state, landed + metadata, escalation reason, landed order, and cleanup-failed state. +- [x] Persist and hydrate the new landing and verification fields. +- [x] Implement backend `land_self` handling using the existing coordinator + merge path and repository lock. +- [x] Reject `land_self` when verification is missing, blocked, failed, or + unknown. +- [x] Reject `land_self` for orphaned sub-tasks. +- [x] Reject `land_self` when the task branch/worktree no longer matches the + registered task. +- [x] Validate the sub-task worktree is clean after backend-owned injected + artifacts are stripped or ignored; reject dirty worktrees without + auto-committing or discarding sub-task changes. +- [x] On successful self-landing, strip injected preamble/config artifacts, + delete the per-task MCP config, remove the worktree/branch, record landed + metadata, and close the completed child task. +- [x] Add explicit landing escalation events/state for verification rejection, + dirty worktree, orphaned coordinator, invalid ownership, branch/worktree + mismatch, merge conflict, and cleanup failure. +- [x] Represent cleanup failure as landed-cleanup-failed so a successful merge + is not hidden by failed cleanup. +- [x] Add renderer display for verification status, landing escalated, landing + failed, landed cleanup failed, and legacy landed pending-review states. +- [x] Prevent landed/reviewed child task terminals and prompt sends from + writing to the removed backend agent after self-landing cleanup. +- [x] Add a user action to clear legacy landed pending-review state. +- [x] Update sub-task preamble text so new sub-tasks call `land_self` on the + happy path and reserve `signal_done` for legacy/manual-review flows. +- [x] Keep coordinator preamble base-branch guidance: sub-tasks branch from the + coordinator task's branch by default. +- [x] Reject coordinator-driven `merge_task` for still-running tasks that have + not entered manual-review or landing-escalation state. +- [x] Add tests for sub-task tool exposure and forbidden cross-route access. +- [x] Add remote-route tests for matching/missing/wrong done tokens. +- [x] Add backend tests for clean self-landing, dirty worktree rejection, + orphan rejection, branch/worktree mismatch, merge conflict escalation, + cleanup failure state, and landed metadata persistence. +- [x] Add renderer/store tests for successful close, legacy landed + pending-review clearing, verification display, and escalation states. +- [x] Run `openspec validate --all --strict`, `npm run compile`, + `npm run typecheck`, `git diff --check`, and focused tests covering + `electron/mcp/coordinator.test.ts`, `electron/mcp/mcp-tool-list.test.ts`, + `electron/remote/coordinator-scoping.test.ts`, `src/store/persistence.test.ts`, + and `src/store/tasks.test.ts`. + Note: repo-wide OpenSpec strict validation is still blocked by pre-existing + `custom-themes` issues; `subtask-owned-landing` validates on its own. diff --git a/src/App.tsx b/src/App.tsx index d9ea047a..1f104aad 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -51,8 +51,7 @@ import { toggleTaskFocusMode, initMCPListeners, markTaskMcpPending, - markTaskMcpReady, - setTaskMcpLaunchArgs, + applyTaskMcpLaunchResult, markTaskMcpError, } from './store/store'; import { isGitHubUrl } from './lib/github-url'; @@ -395,6 +394,7 @@ function App() { coordinatorTaskId: task.id, projectId: task.projectId, projectRoot, + coordinatorBranch: task.branchName || undefined, worktreePath: task.gitIsolation === 'worktree' ? task.worktreePath : undefined, skipPermissions: task.skipPermissions ?? false, propagateSkipPermissions: task.propagateSkipPermissions ?? false, @@ -404,8 +404,7 @@ function App() { dockerImage: task.dockerMode ? task.dockerImage : undefined, }) .then((result) => { - setTaskMcpLaunchArgs(taskId, result?.mcpLaunchArgs); - markTaskMcpReady(taskId); + applyTaskMcpLaunchResult(taskId, result); }) .catch((err) => { console.warn(`[MCP] Failed to restore MCP server for coordinator task ${taskId}:`, err); @@ -423,6 +422,12 @@ function App() { for (const taskId of [...store.taskOrder, ...store.collapsedTaskOrder]) { const task = store.tasks[taskId]; if (!task?.coordinatedBy) continue; + if ( + task.landingState === 'landed_pending_review' || + task.landingState === 'landed_cleanup_failed' || + task.landingState === 'reviewed' + ) + continue; // Skip if coordinator restore failed — hydrating into a broken coordinator leaves // children with stale MCP wiring and misleading 'ready' status. if (store.tasks[task.coordinatedBy]?.mcpStartupStatus !== 'ready') continue; @@ -430,7 +435,7 @@ function App() { if (!projectRoot) continue; markTaskMcpPending(task.id); hydratePromises.push( - invoke(IPC.MCP_HydrateCoordinatedTask, { + invoke<{ mcpLaunchArgs?: string[] }>(IPC.MCP_HydrateCoordinatedTask, { id: task.id, name: task.name, projectId: task.projectId, @@ -443,10 +448,18 @@ function App() { agentId: task.agentIds[0], signalDoneAt: task.signalDoneAt, signalDoneConsumed: task.signalDoneConsumed, + verification: task.verification, + landingState: task.landingState, + landingReason: task.landingReason, + landingSummary: task.landingSummary, + landedMetadata: task.landedMetadata, mcpConfigPath: task.mcpConfigPath, + agentCommand: store.agents[task.agentIds[0]]?.def.command ?? 'claude', preambleFileExistedBefore: task.preambleFileExistedBefore, }) - .then(() => markTaskMcpReady(task.id)) + .then((result) => { + applyTaskMcpLaunchResult(task.id, result); + }) .catch((err) => { console.warn(`[MCP] Failed to hydrate coordinated task ${taskId}:`, err); markTaskMcpError(task.id, String(err)); diff --git a/src/components/PromptInput.tsx b/src/components/PromptInput.tsx index ad242a14..4dfbf2e2 100644 --- a/src/components/PromptInput.tsx +++ b/src/components/PromptInput.tsx @@ -17,6 +17,7 @@ import { isTrustQuestionAutoHandled, isAutoTrustSettling, isAgentAskingQuestion, + isAgentIdle, getTaskFocusedPanel, setTaskFocusedPanel, setTaskLastInputAt, @@ -25,8 +26,10 @@ import { showNotification, } from '../store/store'; import { clearStagedNotification, setStagedNotificationUserEdited } from '../store/tasks'; +import { isLandedTaskState } from '../store/landing'; import { processAutoFireTick } from './autofire-tick'; -import { shouldHandoffCoordinatorQuestion } from './prompt-control'; +import { shouldAckInitialPromptDelivery, shouldHandoffCoordinatorQuestion } from './prompt-control'; +import { isStartupBlockingAutoSend } from './prompt-autosend-readiness'; import type { StagedNotification } from '../store/types'; import { debug, warn as logWarn } from '../lib/log'; import { theme } from '../lib/theme'; @@ -77,6 +80,7 @@ const AUTOSEND_MAX_WAIT_MS = 45_000; const PROMPT_VERIFY_TIMEOUT_MS = 5_000; const PROMPT_VERIFY_POLL_MS = 250; const PROMPT_MARKER_SCAN_CHARS = 500; +const PROMPT_ECHO_HANDOFF_SUPPRESS_MS = 5_000; /** True when auto-send should be blocked by a question in the output. * Trust-dialog questions are NOT blocking when auto-trust handles them. */ @@ -181,6 +185,7 @@ export function PromptInput(props: PromptInputProps) { // Don't tear down the auto-send mechanism if we can't send yet — // the quiescence timer needs to stay alive to retry after settling. if (isAutoTrustSettling(agentId)) return; + if (isStartupBlockingAutoSend(getAgentOutputTail(agentId))) return; cleanup(); void handleSend('auto'); } @@ -203,6 +208,10 @@ export function PromptInput(props: PromptInputProps) { onAgentReady(agentId, onReady); return; } + if (isStartupBlockingAutoSend(getAgentOutputTail(agentId))) { + onAgentReady(agentId, onReady); + return; + } // Start a series of stability checks. Some agents (e.g. Claude Code) // render ❯ before fully initializing — the marker persists while the @@ -228,6 +237,10 @@ export function PromptInput(props: PromptInputProps) { pendingSendTimer = undefined; if (cancelled) return; const tail = getAgentOutputTail(agentId); + if (isStartupBlockingAutoSend(tail)) { + onAgentReady(agentId, onReady); + return; + } if (isQuestionBlockingAutoSend(tail)) { onAgentReady(agentId, onReady); return; @@ -284,6 +297,11 @@ export function PromptInput(props: PromptInputProps) { const tail = getAgentOutputTail(agentId); if (!tail) return; + if (isStartupBlockingAutoSend(tail)) { + lastRawTail = tail; + stableSince = 0; + return; + } // If a prompt marker is visible, use the fast path's stability checks // instead of pure quiescence — they verify ❯ persists AND output is stable. @@ -555,6 +573,16 @@ export function PromptInput(props: PromptInputProps) { // When the agent shows a question/dialog, focus the terminal so the user // can interact with the TUI directly. const questionActive = () => isAgentAskingQuestion(props.agentId); + const isRecentPromptEcho = (tail: string): boolean => { + const task = store.tasks[props.taskId]; + const lastPrompt = task?.lastPrompt?.trim(); + if (!lastPrompt || !task?.lastInputAt) return false; + const lastInputAt = Date.parse(task.lastInputAt); + if (!Number.isFinite(lastInputAt)) return false; + if (Date.now() - lastInputAt > PROMPT_ECHO_HANDOFF_SUPPRESS_MS) return false; + const snippet = stripAnsi(lastPrompt).slice(0, 40); + return Boolean(snippet && stripAnsi(tail).includes(snippet)); + }; createEffect(() => { if (questionActive() && getTaskFocusedPanel(props.taskId) === 'prompt') { setTaskFocusedPanel(props.taskId, 'ai-terminal'); @@ -571,9 +599,17 @@ export function PromptInput(props: PromptInputProps) { // is still true — that fires when the user explicitly clicks Release Control // but the tail buffer hasn't cleared yet, which immediately overrides them. const questionJustActivated = active && !prevActive; + const tail = getAgentOutputTail(props.agentId); if ( questionJustActivated && - shouldHandoffCoordinatorQuestion({ controlledBy, questionActive: active }) + shouldHandoffCoordinatorQuestion({ + controlledBy, + questionActive: active, + agentIdle: isAgentIdle(props.agentId), + startupBlocking: isStartupBlockingAutoSend(tail), + autoTrustSettling: isAutoTrustSettling(props.agentId), + recentPromptEcho: isRecentPromptEcho(tail), + }) ) { setTaskControl(props.taskId, 'human'); setTaskFocusedPanel(props.taskId, 'ai-terminal'); @@ -629,6 +665,7 @@ export function PromptInput(props: PromptInputProps) { async function handleSend(mode: 'manual' | 'auto' = 'manual') { if (sending()) return; + if (isLandedTaskState(store.tasks[props.taskId]?.landingState)) return; if (mode === 'manual' && props.controlledBy === 'coordinator') return; // Block sends while the agent is showing a question/dialog. // For auto-sends, use a fresh tail-buffer check instead of the reactive @@ -636,6 +673,9 @@ export function PromptInput(props: PromptInputProps) { // the callers (onReady, quiescence timer) already verified with fresh data. if (mode === 'auto') { const tail = getAgentOutputTail(props.agentId); + if (isStartupBlockingAutoSend(tail)) { + return; + } if (isQuestionBlockingAutoSend(tail)) { return; } @@ -668,6 +708,12 @@ export function PromptInput(props: PromptInputProps) { setSending(true); try { + const initialPromptSnapshot = props.initialPrompt?.trim(); + const shouldAckInitialPrompt = shouldAckInitialPromptDelivery({ + coordinatedBy: props.coordinatedBy, + initialPrompt: initialPromptSnapshot, + sentText: val, + }); // Snapshot tail before send for verification comparison. const preSendTail = getAgentOutputTail(props.agentId); await sendPrompt(props.taskId, props.agentId, val); @@ -679,11 +725,11 @@ export function PromptInput(props: PromptInputProps) { if (signal.aborted) return; - if (props.initialPrompt?.trim()) { - setAutoSentInitialPrompt(props.initialPrompt.trim()); - if (props.coordinatedBy) { - invoke(IPC.MCP_CoordinatedTaskPromptDelivered, { taskId: props.taskId }).catch(() => {}); - } + if (initialPromptSnapshot && val === initialPromptSnapshot) { + setAutoSentInitialPrompt(initialPromptSnapshot); + } + if (shouldAckInitialPrompt) { + invoke(IPC.MCP_CoordinatedTaskPromptDelivered, { taskId: props.taskId }).catch(() => {}); } // If the user manually sent the staged notification text exactly, ack it const staged = props.stagedNotification; diff --git a/src/components/SubTaskStrip.tsx b/src/components/SubTaskStrip.tsx index d4aaf18c..28829723 100644 --- a/src/components/SubTaskStrip.tsx +++ b/src/components/SubTaskStrip.tsx @@ -152,6 +152,23 @@ export function SubTaskStrip(props: SubTaskStripProps) { return [...active, ...collapsed].map((id) => store.tasks[id]).filter(Boolean); }); + const taskTone = (task: (typeof store.tasks)[string]) => { + if (task.landingState === 'landed_pending_review' || task.landingState === 'reviewed') { + return { color: '#22c55e', label: 'landed' }; + } + if ( + task.landingState === 'landed_cleanup_failed' || + task.landingState === 'landing_escalated' + ) { + return { color: theme.warning, label: task.landingState }; + } + if (task.landingState === 'landing_failed') { + return { color: theme.error, label: 'landing failed' }; + } + if (task.signalDoneReceived) return { color: '#22c55e', label: 'signalled done' }; + return null; + }; + return ( <> @@ -190,17 +207,17 @@ export function SubTaskStrip(props: SubTaskStripProps) { } setActiveTask(task.id); }} - title={task.signalDoneReceived ? `${task.name} — signalled done` : task.name} + title={taskTone(task) ? `${task.name} — ${taskTone(task)?.label}` : task.name} style={{ display: 'inline-flex', 'align-items': 'center', gap: '4px', padding: '2px 8px', 'border-radius': '10px', - background: task.signalDoneReceived - ? `color-mix(in srgb, #22c55e 12%, transparent)` + background: taskTone(task) + ? `color-mix(in srgb, ${taskTone(task)?.color} 12%, transparent)` : `color-mix(in srgb, ${theme.fgSubtle} 8%, transparent)`, - border: `1px solid ${task.signalDoneReceived ? '#22c55e44' : theme.border}`, + border: `1px solid ${taskTone(task) ? `${taskTone(task)?.color}44` : theme.border}`, color: theme.fgMuted, 'font-size': sf(11), 'font-family': "'JetBrains Mono', monospace", @@ -213,10 +230,10 @@ export function SubTaskStrip(props: SubTaskStripProps) { }} > } > - + {(tone) => } {task.name} diff --git a/src/components/TaskAITerminal.tsx b/src/components/TaskAITerminal.tsx index 64e40aee..7b554a7e 100644 --- a/src/components/TaskAITerminal.tsx +++ b/src/components/TaskAITerminal.tsx @@ -607,7 +607,7 @@ function AgentTerminalPane(props: { ) } attachExisting={a().attachExisting} - preserveOnWindowUnload + preserveSessionOnCleanup onExit={(code) => markAgentExited(a().id, code)} onData={(data) => markAgentOutput(a().id, data, props.task.id)} onFileLink={props.onFileLink} diff --git a/src/components/TaskPanel.tsx b/src/components/TaskPanel.tsx index c1f14aab..e770aa5f 100644 --- a/src/components/TaskPanel.tsx +++ b/src/components/TaskPanel.tsx @@ -78,6 +78,10 @@ export function TaskPanel(props: TaskPanelProps) { const [showPushConfirm, setShowPushConfirm] = createSignal(false); const [pushSuccess, setPushSuccess] = createSignal(false); const [pushing, setPushing] = createSignal(false); + const isLandedTask = () => + props.task.landingState === 'landed_pending_review' || + props.task.landingState === 'landed_cleanup_failed' || + props.task.landingState === 'reviewed'; let pushSuccessTimer: ReturnType | undefined; onCleanup(() => clearTimeout(pushSuccessTimer)); const [diffScrollTarget, setDiffScrollTarget] = createSignal(null); @@ -194,10 +198,10 @@ export function TaskPanel(props: TaskPanelProps) { setShowCloseConfirm(true); break; case 'merge': - if (props.task.gitIsolation === 'worktree') setShowMergeConfirm(true); + if (props.task.gitIsolation === 'worktree' && !isLandedTask()) setShowMergeConfirm(true); break; case 'push': - if (props.task.gitIsolation === 'worktree') setShowPushConfirm(true); + if (props.task.gitIsolation === 'worktree' && !isLandedTask()) setShowPushConfirm(true); break; } }); @@ -210,6 +214,7 @@ export function TaskPanel(props: TaskPanelProps) { const worktreePath = props.task.worktreePath; const baseBranch = props.task.baseBranch; const isolation = props.task.gitIsolation; + if (isLandedTask()) return; if (isolation !== 'worktree' && isolation !== 'direct') return; let cancelled = false; @@ -396,7 +401,7 @@ export function TaskPanel(props: TaskPanelProps) { content: () => promptInputEl, }; - const isNoneGit = () => props.task.gitIsolation === 'none'; + const isGitUnavailable = () => props.task.gitIsolation === 'none' || isLandedTask(); // Notes and changed-files children reused across stack and split trees. // In the stack-mode inner horizontal split, both children absorb (50/50 default). @@ -424,7 +429,7 @@ export function TaskPanel(props: TaskPanelProps) { absorberWeight: 0.5, content: () => (
- {isNoneGit() ? ( + {isGitUnavailable() ? ( notesBodyEl ) : ( setShowCloseConfirm(false)} /> - + getTaskDockerBadgeLabel(props.task.dockerSource); + const isLandedTask = () => + props.task.landingState === 'landed_pending_review' || + props.task.landingState === 'landed_cleanup_failed' || + props.task.landingState === 'reviewed'; + const landingBadge = () => { + switch (props.task.landingState) { + case 'landed_pending_review': + return { label: 'Landed', color: theme.success, title: 'Landed — pending review' }; + case 'landed_cleanup_failed': + return { + label: 'Cleanup failed', + color: theme.warning, + title: props.task.landingReason ?? 'Landed, but cleanup failed', + }; + case 'landing_escalated': + return { + label: 'Landing blocked', + color: theme.warning, + title: props.task.landingReason ?? 'Landing needs attention', + }; + case 'landing_failed': + return { + label: 'Landing failed', + color: theme.error, + title: props.task.landingReason ?? 'Landing failed', + }; + case 'reviewed': + return { label: 'Reviewed', color: theme.fgMuted, title: 'Landing reviewed' }; + default: + return null; + } + }; + const verificationBadge = () => { + const checks = props.task.verification?.checks; + if (!checks?.length) return null; + if (checks.every((check) => check.result === 'passed')) { + return { + label: `Verified ${checks.length}`, + color: theme.success, + title: checks.map((check) => `${check.name}: ${check.command}`).join('\n'), + }; + } + const failed = checks.find((check) => check.result !== 'passed'); + return { + label: failed?.result === 'blocked' ? 'Verification blocked' : 'Verification failed', + color: theme.warning, + title: failed + ? `${failed.name}: ${failed.command}${failed.reason ? `\n${failed.reason}` : ''}` + : 'Verification did not pass', + }; + }; const titleLabel = () => { const initialPrompt = props.task.savedInitialPrompt?.trim(); if ( @@ -104,11 +156,41 @@ export function TaskTitleBar(props: TaskTitleBarProps) { Review + + {(badge) => ( + + )} + + + {(badge) => ( + + {badge().label} + + )} + updateTaskName(props.task.id, v)} @@ -118,7 +200,7 @@ export function TaskTitleBar(props: TaskTitleBarProps) { />
- + diff --git a/src/components/TerminalView.tsx b/src/components/TerminalView.tsx index 7a0e7906..adb9974f 100644 --- a/src/components/TerminalView.tsx +++ b/src/components/TerminalView.tsx @@ -13,11 +13,12 @@ import { isMac } from '../lib/platform'; import { resolvedBindings } from '../store/keybindings'; import { matchesKeyEvent } from '../lib/keybindings'; import { store, setTaskLastInputAt, retryTaskMcpStartup } from '../store/store'; +import { isLandedTaskState } from '../store/landing'; import { warn as logWarn } from '../lib/log'; import { registerTerminal, unregisterTerminal, markDirty } from '../lib/terminalFitManager'; import { dataTransferToShellArgs, escapePath } from '../lib/terminalDrop'; import { cleanCopiedTerminalText } from '../lib/copy-text'; -import { computeDisableStdin } from '../lib/terminalDisableStdin'; +import { computeDisableStdin, shouldForwardTerminalInput } from '../lib/terminalDisableStdin'; import type { PtyOutput } from '../ipc/types'; let windowUnloading = false; @@ -72,7 +73,7 @@ interface TerminalViewProps { dockerImage?: string; spawnDelayMs?: number; attachExisting?: boolean; - preserveOnWindowUnload?: boolean; + preserveSessionOnCleanup?: boolean; dockerMountWorktreeParent?: boolean; onExit?: (exitInfo: { exit_code: number | null; @@ -127,8 +128,17 @@ export function TerminalView(props: TerminalViewProps) { const taskId = props.taskId; const agentId = props.agentId; const initialFontSize = props.fontSize ?? 13; - const attachExisting = props.attachExisting; - const preserveOnWindowUnload = props.preserveOnWindowUnload === true; + const attachExisting = props.attachExisting ?? true; + const preserveSessionOnCleanup = props.preserveSessionOnCleanup === true; + let ptyDetachedByLanding = false; + + function taskPtyDetached(): boolean { + return ptyDetachedByLanding || isLandedTaskState(store.tasks[taskId]?.landingState); + } + + function canForwardInput(): boolean { + return shouldForwardTerminalInput(store.tasks[taskId]?.controlledBy, taskPtyDetached()); + } term = new Terminal({ cursorBlink: true, @@ -137,6 +147,7 @@ export function TerminalView(props: TerminalViewProps) { theme: activeTerminalTheme(), allowProposedApi: true, scrollback: TERMINAL_SCROLLBACK_LINES, + disableStdin: computeDisableStdin(store.tasks[taskId]?.controlledBy) || taskPtyDetached(), }); fitAddon = new FitAddon(); @@ -158,13 +169,16 @@ export function TerminalView(props: TerminalViewProps) { term.open(containerRef); - // Block direct PTY keyboard input when the task is coordinator-controlled. + // Block direct PTY keyboard input when the task is coordinator-controlled or + // after self-landing has removed the backend PTY. // disableStdin prevents xterm from forwarding keystrokes to the process; - // copy/paste/scrollback still work. The effect re-runs when controlledBy - // changes so Take Control / Release Control works without a remount. + // copy/paste/scrollback still work. The effect re-runs when control or + // landing state changes, without remounting the terminal. createEffect(() => { - const controlledBy = store.tasks[props.taskId]?.controlledBy; - if (term) term.options.disableStdin = computeDisableStdin(controlledBy); + const task = store.tasks[taskId]; + if (isLandedTaskState(task?.landingState)) ptyDetachedByLanding = true; + if (term) + term.options.disableStdin = computeDisableStdin(task?.controlledBy) || taskPtyDetached(); }); // File path link provider — makes file paths clickable in terminal output @@ -463,6 +477,7 @@ export function TerminalView(props: TerminalViewProps) { // Resume PTY reader when xterm.js has caught up if (watermark < FLOW_LOW && ptyPaused) { ptyPaused = false; + if (taskPtyDetached()) return; invoke(IPC.ResumeAgent, { agentId }).catch((err: unknown) => { logWarn('terminal.flow', 'ResumeAgent failed', { err }); ptyPaused = false; @@ -496,7 +511,7 @@ export function TerminalView(props: TerminalViewProps) { watermark += chunk.length; // Pause PTY reader when xterm.js falls behind - if (watermark > FLOW_HIGH && !ptyPaused) { + if (watermark > FLOW_HIGH && !ptyPaused && !taskPtyDetached()) { ptyPaused = true; invoke(IPC.PauseAgent, { agentId }).catch((err: unknown) => { logWarn('terminal.flow', 'PauseAgent failed', { err }); @@ -545,6 +560,7 @@ export function TerminalView(props: TerminalViewProps) { clearTimeout(inputFlushTimer); inputFlushTimer = undefined; } + if (!canForwardInput()) return; fireAndForget(IPC.WriteToAgent, { agentId, data }); if (!props.isShell && (data.includes('\r') || data.includes('\n'))) { setTaskLastInputAt(props.taskId); @@ -552,6 +568,7 @@ export function TerminalView(props: TerminalViewProps) { } function enqueueInput(data: string) { + if (!canForwardInput()) return; pendingInput += data; if (pendingInput.length >= 2048) { flushPendingInput(); @@ -567,6 +584,7 @@ export function TerminalView(props: TerminalViewProps) { // eslint-disable-next-line solid/reactivity -- event handler reads current prop values intentionally term.onData((data) => { + if (!canForwardInput()) return; if (props.onPromptDetected) { for (const ch of data) { if (ch === '\r') { @@ -597,6 +615,7 @@ export function TerminalView(props: TerminalViewProps) { if (!pendingResize) return; const { cols, rows } = pendingResize; pendingResize = null; + if (taskPtyDetached()) return; if (cols === lastSentCols && rows === lastSentRows) return; lastSentCols = cols; lastSentRows = rows; @@ -637,6 +656,13 @@ export function TerminalView(props: TerminalViewProps) { function startSpawn() { if (!term || spawnStarted) return; + const landingState = store.tasks[taskId]?.landingState; + if ( + landingState === 'landed_pending_review' || + landingState === 'landed_cleanup_failed' || + landingState === 'reviewed' + ) + return; spawnStarted = true; invoke(IPC.SpawnAgent, { taskId, @@ -702,7 +728,7 @@ export function TerminalView(props: TerminalViewProps) { } onCleanup(() => { - const preserveSession = windowUnloading && preserveOnWindowUnload; + const preserveSession = preserveSessionOnCleanup; if (!windowUnloading || preserveSession) { flushPendingInput(); flushPendingResize(); @@ -715,11 +741,11 @@ export function TerminalView(props: TerminalViewProps) { webglAddon?.dispose(); webglAddon = undefined; unregisterTerminal(agentId); - if (preserveSession && ptyPaused) { + if (ptyPaused && !taskPtyDetached()) { fireAndForget(IPC.ResumeAgent, { agentId }); ptyPaused = false; } - if (!preserveSession && spawnStarted) { + if (!preserveSession && spawnStarted && !taskPtyDetached()) { fireAndForget(IPC.KillAgent, { agentId }); } term?.dispose(); diff --git a/src/components/prompt-autosend-readiness.test.ts b/src/components/prompt-autosend-readiness.test.ts new file mode 100644 index 00000000..3495d101 --- /dev/null +++ b/src/components/prompt-autosend-readiness.test.ts @@ -0,0 +1,25 @@ +import { describe, expect, it } from 'vitest'; +import { isStartupBlockingAutoSend } from './prompt-autosend-readiness'; + +describe('isStartupBlockingAutoSend', () => { + it('blocks while Codex is still loading the model', () => { + expect(isStartupBlockingAutoSend('model: loading /model to change\n›')).toBe(true); + }); + + it('blocks while Codex is starting MCP servers', () => { + expect( + isStartupBlockingAutoSend( + 'Starting MCP servers (0/2): codex_apps, parallel-code\n› Explain this codebase', + ), + ).toBe(true); + }); + + it('blocks while Codex is booting a single MCP server', () => { + expect(isStartupBlockingAutoSend('Booting MCP server: parallel-code\n›')).toBe(true); + }); + + it('ignores stale startup text before the latest screen clear', () => { + const tail = 'Starting MCP servers (0/2): parallel-code\x1b[2J\x1b[H›'; + expect(isStartupBlockingAutoSend(tail)).toBe(false); + }); +}); diff --git a/src/components/prompt-autosend-readiness.ts b/src/components/prompt-autosend-readiness.ts new file mode 100644 index 00000000..a1d3b67a --- /dev/null +++ b/src/components/prompt-autosend-readiness.ts @@ -0,0 +1,13 @@ +import { normalizeCurrentFrame } from '../store/taskStatus'; + +const STARTUP_BLOCKING_PATTERNS: RegExp[] = [ + /\bmodel:\s*loading\b/i, + /\bBooting\s+MCP\s+server\b/i, + /\bStarting\s+MCP\s+servers?\b/i, +]; + +export function isStartupBlockingAutoSend(tail: string): boolean { + const frame = normalizeCurrentFrame(tail); + if (!frame) return false; + return STARTUP_BLOCKING_PATTERNS.some((re) => re.test(frame)); +} diff --git a/src/components/prompt-control.test.ts b/src/components/prompt-control.test.ts index c6fca1b7..efbb8b83 100644 --- a/src/components/prompt-control.test.ts +++ b/src/components/prompt-control.test.ts @@ -1,28 +1,96 @@ import { describe, expect, it } from 'vitest'; -import { shouldHandoffCoordinatorQuestion } from './prompt-control'; +import { shouldAckInitialPromptDelivery, shouldHandoffCoordinatorQuestion } from './prompt-control'; + +const handoffDefaults = { + controlledBy: 'coordinator' as const, + questionActive: true, + agentIdle: true, + startupBlocking: false, + autoTrustSettling: false, + recentPromptEcho: false, +}; describe('shouldHandoffCoordinatorQuestion', () => { it('hands off when a coordinator-controlled task is asking a question', () => { - expect( - shouldHandoffCoordinatorQuestion({ controlledBy: 'coordinator', questionActive: true }), - ).toBe(true); + expect(shouldHandoffCoordinatorQuestion(handoffDefaults)).toBe(true); }); it('does not hand off when already under human control', () => { - expect(shouldHandoffCoordinatorQuestion({ controlledBy: 'human', questionActive: true })).toBe( + expect(shouldHandoffCoordinatorQuestion({ ...handoffDefaults, controlledBy: 'human' })).toBe( false, ); }); it('does not hand off when no question is active', () => { + expect(shouldHandoffCoordinatorQuestion({ ...handoffDefaults, questionActive: false })).toBe( + false, + ); + }); + + it('does not hand off when controlledBy is undefined even if questionActive is true', () => { + expect(shouldHandoffCoordinatorQuestion({ ...handoffDefaults, controlledBy: undefined })).toBe( + false, + ); + }); + + it('does not hand off while startup output is still blocking auto-send', () => { + expect(shouldHandoffCoordinatorQuestion({ ...handoffDefaults, startupBlocking: true })).toBe( + false, + ); + }); + + it('does not hand off while auto-trust is still settling', () => { + expect(shouldHandoffCoordinatorQuestion({ ...handoffDefaults, autoTrustSettling: true })).toBe( + false, + ); + }); + + it('does not hand off while the agent is actively producing output', () => { + expect(shouldHandoffCoordinatorQuestion({ ...handoffDefaults, agentIdle: false })).toBe(false); + }); + + it('does not hand off when the question-like text is from a recent prompt echo', () => { + expect(shouldHandoffCoordinatorQuestion({ ...handoffDefaults, recentPromptEcho: true })).toBe( + false, + ); + }); +}); + +describe('shouldAckInitialPromptDelivery', () => { + it('acks an exact coordinated initial-prompt send', () => { + expect( + shouldAckInitialPromptDelivery({ + coordinatedBy: 'coordinator-1', + initialPrompt: 'do the work', + sentText: 'do the work', + }), + ).toBe(true); + }); + + it('uses the pre-send initial prompt snapshot, not later prop state', () => { expect( - shouldHandoffCoordinatorQuestion({ controlledBy: 'coordinator', questionActive: false }), + shouldAckInitialPromptDelivery({ + coordinatedBy: 'coordinator-1', + initialPrompt: undefined, + sentText: 'do the work', + }), ).toBe(false); }); - it('does not hand off when controlledBy is undefined even if questionActive is true', () => { + it('does not ack edited or non-coordinated sends', () => { + expect( + shouldAckInitialPromptDelivery({ + coordinatedBy: 'coordinator-1', + initialPrompt: 'do the work', + sentText: 'different work', + }), + ).toBe(false); expect( - shouldHandoffCoordinatorQuestion({ controlledBy: undefined, questionActive: true }), + shouldAckInitialPromptDelivery({ + coordinatedBy: undefined, + initialPrompt: 'do the work', + sentText: 'do the work', + }), ).toBe(false); }); }); diff --git a/src/components/prompt-control.ts b/src/components/prompt-control.ts index 431cad14..405d0e31 100644 --- a/src/components/prompt-control.ts +++ b/src/components/prompt-control.ts @@ -1,6 +1,26 @@ export function shouldHandoffCoordinatorQuestion(params: { controlledBy: 'coordinator' | 'human' | undefined; questionActive: boolean; + agentIdle: boolean; + startupBlocking: boolean; + autoTrustSettling: boolean; + recentPromptEcho: boolean; }): boolean { - return params.controlledBy === 'coordinator' && params.questionActive; + return ( + params.controlledBy === 'coordinator' && + params.questionActive && + params.agentIdle && + !params.startupBlocking && + !params.autoTrustSettling && + !params.recentPromptEcho + ); +} + +export function shouldAckInitialPromptDelivery(params: { + coordinatedBy: string | undefined; + initialPrompt: string | undefined; + sentText: string; +}): boolean { + const initialPrompt = params.initialPrompt?.trim(); + return Boolean(params.coordinatedBy && initialPrompt && params.sentText.trim() === initialPrompt); } diff --git a/src/lib/terminalDisableStdin.test.ts b/src/lib/terminalDisableStdin.test.ts index 7a79aecb..7b4f8a8d 100644 --- a/src/lib/terminalDisableStdin.test.ts +++ b/src/lib/terminalDisableStdin.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest'; -import { computeDisableStdin } from './terminalDisableStdin'; +import { computeDisableStdin, shouldForwardTerminalInput } from './terminalDisableStdin'; describe('computeDisableStdin', () => { it('disables stdin when task is coordinator-controlled', () => { @@ -14,3 +14,18 @@ describe('computeDisableStdin', () => { expect(computeDisableStdin(undefined)).toBe(false); }); }); + +describe('shouldForwardTerminalInput', () => { + it('drops input while coordinator-controlled', () => { + expect(shouldForwardTerminalInput('coordinator', false)).toBe(false); + }); + + it('drops input after the backend PTY is detached', () => { + expect(shouldForwardTerminalInput('human', true)).toBe(false); + }); + + it('forwards input for human-controlled and uncoordinated live PTYs', () => { + expect(shouldForwardTerminalInput('human', false)).toBe(true); + expect(shouldForwardTerminalInput(undefined, false)).toBe(true); + }); +}); diff --git a/src/lib/terminalDisableStdin.ts b/src/lib/terminalDisableStdin.ts index af9534ab..48213a6a 100644 --- a/src/lib/terminalDisableStdin.ts +++ b/src/lib/terminalDisableStdin.ts @@ -1,3 +1,10 @@ export function computeDisableStdin(controlledBy: 'coordinator' | 'human' | undefined): boolean { return controlledBy === 'coordinator'; } + +export function shouldForwardTerminalInput( + controlledBy: 'coordinator' | 'human' | undefined, + ptyDetached: boolean, +): boolean { + return !ptyDetached && !computeDisableStdin(controlledBy); +} diff --git a/src/store/agents.test.ts b/src/store/agents.test.ts new file mode 100644 index 00000000..48e23752 --- /dev/null +++ b/src/store/agents.test.ts @@ -0,0 +1,136 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockSetStore, mockMarkAgentSpawned } = vi.hoisted(() => ({ + mockSetStore: vi.fn(), + mockMarkAgentSpawned: vi.fn(), +})); + +let mockAgents: Record = {}; + +interface AgentLike { + id: string; + taskId: string; + def: AgentDefLike; + resumed: boolean; + status: 'running' | 'exited'; + exitCode: number | null; + signal: string | null; + lastOutput: string[]; + generation: number; + spawnDelayMs?: number; + attachExisting?: boolean; +} + +interface AgentDefLike { + id: string; + name: string; + command: string; + args: string[]; + resume_args: string[]; + skip_permissions_args: string[]; + description: string; +} + +function applySetStore(...args: unknown[]): void { + if (args.length === 1 && typeof args[0] === 'function') { + (args[0] as (s: { agents: Record }) => void)({ agents: mockAgents }); + } +} + +vi.mock('./core', () => ({ + store: new Proxy({} as Record, { + get(_target, prop) { + if (prop === 'agents') return mockAgents; + return undefined; + }, + }), + setStore: mockSetStore, +})); + +vi.mock('./taskStatus', () => ({ + markAgentSpawned: mockMarkAgentSpawned, + refreshTaskStatus: vi.fn(), + clearAgentActivity: vi.fn(), +})); + +vi.mock('./persistence', () => ({ saveState: vi.fn() })); +vi.mock('../lib/ipc', () => ({ invoke: vi.fn() })); + +import { restartAgent, switchAgent } from './agents'; + +const codexDef: AgentDefLike = { + id: 'codex', + name: 'Codex', + command: 'codex', + args: [], + resume_args: ['resume', '--last'], + skip_permissions_args: [], + description: '', +}; + +function exitedAgent(overrides: Partial = {}): AgentLike { + return { + id: 'agent-1', + taskId: 'task-1', + def: codexDef, + resumed: false, + status: 'exited', + exitCode: 1, + signal: '1', + lastOutput: ['interrupted'], + generation: 2, + spawnDelayMs: 500, + attachExisting: true, + ...overrides, + }; +} + +beforeEach(() => { + vi.clearAllMocks(); + mockSetStore.mockImplementation((...args: unknown[]) => applySetStore(...args)); + mockAgents = { 'agent-1': exitedAgent() }; +}); + +describe('restartAgent', () => { + it('marks the next terminal mount as an explicit process replacement', () => { + restartAgent('agent-1', true); + + expect(mockAgents['agent-1']).toMatchObject({ + status: 'running', + exitCode: null, + signal: null, + lastOutput: [], + resumed: true, + generation: 3, + attachExisting: false, + }); + expect(mockAgents['agent-1'].spawnDelayMs).toBeUndefined(); + expect(mockMarkAgentSpawned).toHaveBeenCalledWith('agent-1'); + }); +}); + +describe('switchAgent', () => { + it('marks the next terminal mount as an explicit process replacement', () => { + const claudeDef: AgentDefLike = { + ...codexDef, + id: 'claude', + name: 'Claude', + command: 'claude', + }; + + switchAgent('agent-1', claudeDef); + + expect(mockAgents['agent-1']).toMatchObject({ + def: claudeDef, + status: 'running', + exitCode: null, + signal: null, + lastOutput: [], + resumed: false, + generation: 3, + attachExisting: false, + }); + expect(mockAgents['agent-1'].spawnDelayMs).toBeUndefined(); + expect(mockMarkAgentSpawned).toHaveBeenCalledWith('agent-1'); + }); +}); diff --git a/src/store/agents.ts b/src/store/agents.ts index 66409c7c..e1854981 100644 --- a/src/store/agents.ts +++ b/src/store/agents.ts @@ -113,7 +113,7 @@ export function restartAgent(agentId: string, useResumeArgs: boolean): void { s.agents[agentId].lastOutput = []; s.agents[agentId].resumed = useResumeArgs; s.agents[agentId].spawnDelayMs = undefined; - s.agents[agentId].attachExisting = undefined; + s.agents[agentId].attachExisting = false; s.agents[agentId].generation += 1; } }), @@ -132,7 +132,7 @@ export function switchAgent(agentId: string, newDef: AgentDef): void { s.agents[agentId].lastOutput = []; s.agents[agentId].resumed = false; s.agents[agentId].spawnDelayMs = undefined; - s.agents[agentId].attachExisting = undefined; + s.agents[agentId].attachExisting = false; s.agents[agentId].generation += 1; } }), diff --git a/src/store/autosave.ts b/src/store/autosave.ts index 90de7854..1acf68e4 100644 --- a/src/store/autosave.ts +++ b/src/store/autosave.ts @@ -68,6 +68,11 @@ function persistedSnapshot(): string { signalDoneAt: t.signalDoneAt, signalDoneConsumed: t.signalDoneConsumed, needsReview: t.needsReview, + verification: t.verification, + landingState: t.landingState, + landingReason: t.landingReason, + landingSummary: t.landingSummary, + landedMetadata: t.landedMetadata, controlledBy: t.controlledBy, }, ]; diff --git a/src/store/coordinator-preamble.test.ts b/src/store/coordinator-preamble.test.ts new file mode 100644 index 00000000..8b84f211 --- /dev/null +++ b/src/store/coordinator-preamble.test.ts @@ -0,0 +1,11 @@ +import { describe, expect, it } from 'vitest'; +import { COORDINATOR_PREAMBLE } from './coordinator-preamble'; + +describe('COORDINATOR_PREAMBLE', () => { + it('warns coordinators not to resend assignments based on startup placeholders', () => { + expect(COORDINATOR_PREAMBLE).toContain('do NOT assume the initial assignment failed'); + expect(COORDINATOR_PREAMBLE).toContain('Improve documentation in @filename'); + expect(COORDINATOR_PREAMBLE).toContain('Wait briefly'); + expect(COORDINATOR_PREAMBLE).toContain('re-sending the full task'); + }); +}); diff --git a/src/store/coordinator-preamble.ts b/src/store/coordinator-preamble.ts index 1d3aebf1..4005504e 100644 --- a/src/store/coordinator-preamble.ts +++ b/src/store/coordinator-preamble.ts @@ -10,7 +10,7 @@ You have MCP tools to coordinate work across isolated git worktree tasks: - list_tasks — List all coordinated tasks with status - get_task_status — Detailed status of a task - send_prompt — Send follow-up instructions to a task's agent -- wait_for_signal_done — Wait for ANY sub-task to call signal_done. Returns { taskId, name, remaining }. +- wait_for_signal_done — Legacy/manual-review wait for ANY sub-task to call signal_done. Normal self-landing tasks do not call it. - wait_for_idle — Wait until an agent is idle at its prompt (use for send_prompt follow-ups) - get_task_diff — Get changed files and diff for a task - get_task_output — Get recent terminal output from a task @@ -19,20 +19,18 @@ You have MCP tools to coordinate work across isolated git worktree tasks: RULES: 1. You MUST NOT use your built-in Agent tool to spawn new Parallel Code tasks — you MUST use \ -create_task for all new work. The sole EXCEPTION is review/landing: after wait_for_signal_done \ -returns a completed taskId, dispatch a native background Agent to run \ -get_task_diff → merge_task → close_task for that taskId. Give the Agent the taskId and all context \ -it needs to be self-contained, including the baseBranch. The Agent must handle merge conflicts, \ -test failures, and retries autonomously. You MUST wait for each landing Agent's result before \ -declaring the overall job complete — inspect the result and escalate clearly if it reports failure. +create_task for all new work. Normal sub-tasks self-land by calling land_self after they commit \ +and verify their work. Do not send merge_task or close_task for a task that has already landed. \ +Use merge_task and close_task only for legacy/manual-review tasks that called signal_done or for \ +tasks you explicitly asked to land manually. 2. If the user's request is ambiguous, the specified work queue file does not exist, or you are \ unsure how to split the work into tasks, STOP and ASK the user before proceeding. Do not improvise \ a work queue from other files or directories — work only from sources explicitly specified in your \ prompt. 3. Assign each sub-agent one specific, concrete task — never point at a list and ask it to "pick one." \ Give complete, self-contained context: file paths, expected behavior, constraints. Sub-agents start \ -with zero memory of this conversation. Always tell sub-agents to run the project's tests and type \ -checker before calling signal_done — signal_done means "verified passing", not "I think I'm done." +with zero memory of this conversation. Always tell sub-agents to commit their work, run the \ +project's tests and type checker, and call land_self with structured verification when done. 4. baseBranch for sub-tasks MUST be your coordinator task's own branch. Run \ \`git rev-parse --abbrev-ref HEAD\` in your worktree to find it. Sub-tasks branch from your commit, \ so they inherit all your in-progress work. Do NOT use main or another shared branch as baseBranch \ @@ -41,21 +39,40 @@ coordinator branch means sub-tasks miss your changes and their diffs bloat with 5. Run at most {{MAX_CONCURRENT}} sub-tasks concurrently. Never exceed this limit. Avoid giving \ parallel sub-tasks work that touches the same files — run those sequentially. 6. THE SLIDING-WINDOW PATTERN — YOU MUST FOLLOW THIS EXACTLY: - a. Pick up to {{MAX_CONCURRENT}} items from your backlog and create a task for each. Track two \ -sets yourself: backlog (items not yet assigned) and landingAgents (dispatched but not yet returned). - b. Call wait_for_signal_done() — no taskId argument — to wait for ANY in-flight task to complete. - c. Immediately dispatch a background Agent to land that task (see rule 1). Add it to landingAgents. \ -Pass the Agent: the taskId, the absolute path to the work queue file in YOUR worktree, and the \ -baseBranch. The Agent runs: get_task_diff → update and commit the work queue file (remove the \ -completed item) → merge_task(taskId, { squash: true }) → close_task(taskId). \ -The Agent must commit the work queue update BEFORE calling merge_task (rule 8). - d. If backlog is non-empty AND in-flight sub-task count < {{MAX_CONCURRENT}}, spawn a replacement \ -task immediately (without waiting for the landing Agent). - e. If remaining > 0 OR backlog is non-empty, go back to step (b) to wait for the next sub-task. - f. When remaining === 0 AND backlog is empty, wait for every Agent in landingAgents to return. \ -Inspect each result. If any reports failure, conflict, or inability to merge/close, report it \ -clearly — do NOT declare the job complete until all landings have succeeded or been escalated. -7. merge_task is REQUIRED before close_task. close_task without a prior successful merge_task \ + a. Pick up to {{MAX_CONCURRENT}} items from your backlog and create a task for each. Track \ +backlog (items not yet assigned), inFlight (created tasks that still appear in list_tasks without a \ +terminal/error state), landed (self-landed tasks that disappeared from list_tasks, plus any legacy \ +landed_pending_review or reviewed tasks), and blocked (landing_escalated, landing_failed, \ +landed_cleanup_failed, exited, or signal_done manual-review tasks). + b. Poll list_tasks to find tasks that completed or need attention. A successful self-landed task \ +is merged, cleaned up, and removed from list_tasks by the backend; do not merge or close it. Legacy \ +tasks with landed_pending_review or reviewed have also already merged and cleaned up. If no task \ +changed state, wait at least 10 seconds before polling list_tasks again. + c. For each task that appears complete, blocked, or manual-review, first inspect get_task_status. \ +Treat status=running as authoritative, even if a notification or terminal output says "completed." \ +If its status is still running and the task can receive prompts, do NOT edit its worktree, verify \ +its work yourself, merge, or close it. Use send_prompt with specific findings and let that sub-agent \ +fix, test, commit, and call land_self or return for review. Then poll list_tasks or use wait_for_idle \ +when you need to wait for the agent's next response. + d. Immediately after create_task, do NOT assume the initial assignment failed just because \ +get_task_status says idle or get_task_output shows a startup/default placeholder such as \ +"Improve documentation in @filename." That placeholder can be stale while the dispatched prompt is \ +queued or being processed. Wait briefly, then check get_task_status and get_task_output again. Only \ +send_prompt if there is clear evidence the agent is asking for input, started unrelated work, or the \ +prompt delivery actually failed; if uncertain, status-check instead of re-sending the full task. + e. Only use the manual get_task_diff → merge_task → close_task path when the task is genuinely \ +terminal/manual-review/blocked, or when you explicitly take ownership. Before taking ownership, \ +state the reason AND the evidence, such as "the agent exited," "the agent is blocked," \ +"the task is under manual-review state," "backend merge failed and needs conflict resolution," \ +or "the user explicitly asked me to take over." \ +Coordinator-side normalization is allowed only after merge_task fails, only for mechanical conflicts \ +caused by already-landed shared fixes, and only with no behavioral changes. State the reason before editing. + f. If backlog is non-empty AND inFlight count < {{MAX_CONCURRENT}}, spawn replacements immediately. + g. Repeat steps (b)-(f) until backlog is empty and inFlight is empty. + h. Before declaring the overall job complete, list landed and blocked outcomes clearly. Do not \ +declare success if any task is blocked, failed, cleanup-failed, or waiting for manual review. +7. For manual-review tasks only, merge_task is REQUIRED before close_task. close_task without a \ +prior successful merge_task \ permanently discards all sub-task work. Direct git operations (git merge, git cherry-pick) do NOT \ substitute for merge_task — the backend cleans up worktrees and branches only when merge_task \ succeeds. If merge_task fails with "uncommitted changes", commit your local edits first (see rule 8) \ @@ -64,7 +81,8 @@ then retry merge_task. any git operation. A dirty working tree will cause merge_task to fail. 9. Before assigning a task, verify it is not already implemented. Read the relevant files rather \ than assuming work is pending. -10. Use send_prompt + wait_for_idle to give follow-up instructions to a running task. +10. Use send_prompt + wait_for_idle to give follow-up instructions to a running task. Do not resend \ +the full original assignment merely because startup output still shows a placeholder prompt. --- `; diff --git a/src/store/desktopNotifications.test.ts b/src/store/desktopNotifications.test.ts new file mode 100644 index 00000000..7e369c3c --- /dev/null +++ b/src/store/desktopNotifications.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest'; +import { + clearPendingNotification, + reconcilePendingNotification, + type NotificationType, +} from './desktopNotifications'; + +describe('reconcilePendingNotification', () => { + it('drops a queued notification when the task recovers before flush', () => { + const pending = new Map([['task-1', 'error']]); + + const result = reconcilePendingNotification(pending, 'task-1', 'error', 'active'); + + expect(result).toBeNull(); + expect(pending.has('task-1')).toBe(false); + }); + + it('keeps scheduling real error transitions', () => { + const pending = new Map(); + + const result = reconcilePendingNotification(pending, 'task-1', 'active', 'error'); + + expect(result).toBe('error'); + }); + + it('does not notify during initial population', () => { + const pending = new Map(); + + const result = reconcilePendingNotification(pending, 'task-1', undefined, 'error'); + + expect(result).toBeNull(); + expect(pending.has('task-1')).toBe(false); + }); + + it('drops queued notifications for removed tasks', () => { + const pending = new Map([['task-1', 'error']]); + + clearPendingNotification(pending, 'task-1'); + + expect(pending.has('task-1')).toBe(false); + }); +}); diff --git a/src/store/desktopNotifications.ts b/src/store/desktopNotifications.ts index 285f36ac..32da842b 100644 --- a/src/store/desktopNotifications.ts +++ b/src/store/desktopNotifications.ts @@ -7,7 +7,28 @@ import { IPC } from '../../electron/ipc/channels'; const DEBOUNCE_MS = 3_000; -type NotificationType = 'ready' | 'needs_input' | 'error'; +export type NotificationType = 'ready' | 'needs_input' | 'error'; + +export function reconcilePendingNotification( + pending: Map, + taskId: string, + previous: TaskAttentionState | undefined, + current: TaskAttentionState, +): NotificationType | null { + if (previous === undefined || previous === current) return null; + if (current === 'ready') return 'ready'; + if (current === 'needs_input') return 'needs_input'; + if (current === 'error') return 'error'; + pending.delete(taskId); + return null; +} + +export function clearPendingNotification( + pending: Map, + taskId: string, +): void { + pending.delete(taskId); +} export function startDesktopNotificationWatcher(windowFocused: Accessor): () => void { const previousAttention = new Map(); @@ -83,21 +104,16 @@ export function startDesktopNotificationWatcher(windowFocused: Accessor previousAttention.set(taskId, current); // Skip initial population - if (prev === undefined) continue; - if (prev === current) continue; - - if (current === 'ready' && prev !== 'ready') { - scheduleBatch('ready', taskId); - } else if (current === 'needs_input' && prev !== 'needs_input') { - scheduleBatch('needs_input', taskId); - } else if (current === 'error' && prev !== 'error') { - scheduleBatch('error', taskId); - } + const notificationType = reconcilePendingNotification(pending, taskId, prev, current); + if (notificationType) scheduleBatch(notificationType, taskId); } // Clean up removed tasks for (const taskId of previousAttention.keys()) { - if (!seen.has(taskId)) previousAttention.delete(taskId); + if (!seen.has(taskId)) { + previousAttention.delete(taskId); + clearPendingNotification(pending, taskId); + } } }); diff --git a/src/store/landing.ts b/src/store/landing.ts new file mode 100644 index 00000000..6b2c3896 --- /dev/null +++ b/src/store/landing.ts @@ -0,0 +1,7 @@ +import type { LandingState } from './types'; + +export function isLandedTaskState(state: LandingState | null | undefined): boolean { + return ( + state === 'landed_pending_review' || state === 'landed_cleanup_failed' || state === 'reviewed' + ); +} diff --git a/src/store/persistence.test.ts b/src/store/persistence.test.ts index 734e8da0..a4b9f8d8 100644 --- a/src/store/persistence.test.ts +++ b/src/store/persistence.test.ts @@ -195,6 +195,52 @@ describe('loadState agent definition migrations', () => { }); }); +describe('landing state persistence', () => { + it('hydrates landed metadata and verification fields', async () => { + const def = agentDef(); + mockInvoke.mockResolvedValueOnce( + JSON.stringify({ + projects: [{ id: 'project-1', name: 'Repo', path: '/repo', color: 'hsl(0, 70%, 75%)' }], + lastProjectId: 'project-1', + lastAgentId: null, + taskOrder: ['task-1'], + collapsedTaskOrder: [], + tasks: { + 'task-1': { + ...persistedTask(def), + coordinatedBy: 'coord-1', + needsReview: true, + verification: { + checks: [{ name: 'test', command: 'npm test', result: 'passed' }], + }, + landingState: 'landed_pending_review', + landedMetadata: { + taskId: 'task-1', + taskName: 'Task', + coordinatorTaskId: 'coord-1', + targetBranch: 'main', + landedCommit: 'abc123', + landedAt: '2026-05-24T00:00:00Z', + landedOrder: 1, + verification: { + checks: [{ name: 'test', command: 'npm test', result: 'passed' }], + }, + }, + }, + }, + activeTaskId: 'task-1', + sidebarVisible: true, + }), + ); + + await loadState(); + + expect(store.tasks['task-1'].landingState).toBe('landed_pending_review'); + expect(store.tasks['task-1'].landedMetadata?.landedCommit).toBe('abc123'); + expect(store.tasks['task-1'].verification?.checks[0].result).toBe('passed'); + }); +}); + // Minimal valid payload — no theme fields — used as a base for theme migration tests. function basePayload(overrides: Record = {}): string { return JSON.stringify({ diff --git a/src/store/persistence.ts b/src/store/persistence.ts index 3212b7ce..08879505 100644 --- a/src/store/persistence.ts +++ b/src/store/persistence.ts @@ -207,6 +207,11 @@ export async function saveState(): Promise { signalDoneAt: task.signalDoneAt, signalDoneConsumed: task.signalDoneConsumed, needsReview: task.needsReview, + verification: task.verification, + landingState: task.landingState, + landingReason: task.landingReason, + landingSummary: task.landingSummary, + landedMetadata: task.landedMetadata, }; } @@ -262,6 +267,11 @@ export async function saveState(): Promise { signalDoneAt: task.signalDoneAt, signalDoneConsumed: task.signalDoneConsumed, needsReview: task.needsReview, + verification: task.verification, + landingState: task.landingState, + landingReason: task.landingReason, + landingSummary: task.landingSummary, + landedMetadata: task.landedMetadata, }; } @@ -686,6 +696,11 @@ export async function loadState(): Promise { signalDoneAt: pt.signalDoneAt, signalDoneConsumed: pt.signalDoneConsumed, needsReview: pt.needsReview, + verification: pt.verification, + landingState: pt.landingState, + landingReason: pt.landingReason, + landingSummary: pt.landingSummary, + landedMetadata: pt.landedMetadata, }; s.tasks[taskId] = task; @@ -785,6 +800,11 @@ export async function loadState(): Promise { signalDoneAt: pt.signalDoneAt, signalDoneConsumed: pt.signalDoneConsumed, needsReview: pt.needsReview, + verification: pt.verification, + landingState: pt.landingState, + landingReason: pt.landingReason, + landingSummary: pt.landingSummary, + landedMetadata: pt.landedMetadata, }; s.tasks[taskId] = task; diff --git a/src/store/store.ts b/src/store/store.ts index 4c499b4c..d9a8b228 100644 --- a/src/store/store.ts +++ b/src/store/store.ts @@ -40,6 +40,7 @@ export { setLastPrompt, clearInitialPrompt, clearPrefillPrompt, + clearTaskLandingReview, setPrefillPrompt, reorderTask, reorderTaskVisually, @@ -62,6 +63,7 @@ export { markTaskMcpPending, markTaskMcpReady, setTaskMcpLaunchArgs, + applyTaskMcpLaunchResult, markTaskMcpError, retryTaskMcpStartup, } from './tasks'; @@ -159,6 +161,7 @@ export { isTrustQuestionAutoHandled, isAutoTrustSettling, isAgentAskingQuestion, + isAgentIdle, refreshTaskStatus, startTaskStatusPolling, stopTaskStatusPolling, diff --git a/src/store/tasks.test.ts b/src/store/tasks.test.ts index 4c3b1b20..c11d0610 100644 --- a/src/store/tasks.test.ts +++ b/src/store/tasks.test.ts @@ -125,6 +125,7 @@ vi.stubGlobal('window', { }); import { + createTask, initMCPListeners, setTaskControl, collapseTask, @@ -135,9 +136,12 @@ import { markTaskMcpReady, markTaskMcpError, retryTaskMcpStartup, + clearTaskLandingReview, } from './tasks'; import { getCoordinatorChildren } from './sidebar-order'; import { markAgentSpawned, rescheduleTaskStatusPolling } from './taskStatus'; +import { saveState } from './persistence'; +import { getProjectBranchPrefix, getProjectPath, isProjectMissing } from './projects'; // ─── Coordinator listener setup ─────────────────────────────────────────────── @@ -168,6 +172,7 @@ const baseEvent = { worktreePath: '/repo/.worktrees/sub-task-1', agentId: 'agent-sub-1', coordinatorTaskId: 'coordinator-1', + mcpLaunchArgs: ['--config', 'mcp_servers.parallel-code={ command = "node" }'], }; describe('coordinator controlledBy state machine (item 9: UI disabled-state regression tests)', () => { @@ -243,6 +248,11 @@ describe('MCP_TaskCreated IPC handler', () => { expect(mockTasks['sub-task-1'].coordinatedBy).toBe('coordinator-1'); }); + it('stores MCP launch args so coordinated subtasks respawn with MCP configured', () => { + taskCreatedHandler(baseEvent); + expect(mockTasks['sub-task-1'].mcpLaunchArgs).toEqual(baseEvent.mcpLaunchArgs); + }); + it('regression: sub-tasks must not be created without controlledBy defined', () => { taskCreatedHandler(baseEvent); expect(mockTasks['sub-task-1'].controlledBy).toBeDefined(); @@ -384,12 +394,60 @@ describe('MCP startup status transitions', () => { worktreePath: '/repo/.worktrees/coord', }; mockAgents['agent-coord'] = { def: { command: 'claude', args: [] } }; - mockInvoke.mockResolvedValueOnce(undefined); + mockInvoke.mockResolvedValueOnce({ mcpLaunchArgs: ['--mcp-config', '/tmp/coord.json'] }); + + markTaskMcpPending('coord-1'); + await retryTaskMcpStartup('coord-1'); + + expect(mockTasks['coord-1'].mcpStartupStatus).toBe('ready'); + expect(mockTasks['coord-1'].mcpLaunchArgs).toEqual(['--mcp-config', '/tmp/coord.json']); + }); + + it('allows Claude Docker coordinator MCP startup with no launch args', async () => { + mockTasks['coord-1'] = { + agentIds: ['agent-coord'], + shellAgentIds: [], + coordinatorMode: true, + projectId: 'proj-1', + gitIsolation: 'worktree', + worktreePath: '/repo/.worktrees/coord', + dockerMode: true, + }; + mockAgents['agent-coord'] = { def: { command: 'claude', args: [] } }; + mockInvoke.mockResolvedValueOnce({ mcpLaunchArgs: [] }); markTaskMcpPending('coord-1'); await retryTaskMcpStartup('coord-1'); expect(mockTasks['coord-1'].mcpStartupStatus).toBe('ready'); + expect(mockTasks['coord-1'].mcpStartupError).toBeUndefined(); + }); + + it('missing MCP launch args leaves a Codex coordinated task in error', async () => { + mockTasks['coord-1'] = { + agentIds: [], + shellAgentIds: [], + coordinatorMode: true, + projectId: 'proj-1', + mcpStartupStatus: 'ready', + }; + mockTasks['child-1'] = { + agentIds: ['agent-child'], + shellAgentIds: [], + coordinatedBy: 'coord-1', + projectId: 'proj-1', + gitIsolation: 'worktree', + worktreePath: '/repo/.worktrees/child-1', + branchName: 'task/child-1', + }; + mockAgents['agent-child'] = { def: { command: 'codex', args: [] } }; + mockInvoke.mockResolvedValueOnce({ mcpLaunchArgs: [] }); + + markTaskMcpPending('child-1'); + await retryTaskMcpStartup('child-1'); + + expect(mockTasks['child-1'].mcpStartupStatus).toBe('error'); + expect(String(mockTasks['child-1'].mcpStartupError)).toContain('no launch args'); }); it('child hydration failure marks only that child as error, leaving sibling spawnable', async () => { @@ -420,7 +478,9 @@ describe('MCP startup status transitions', () => { }; // child-a fails, child-b succeeds - mockInvoke.mockRejectedValueOnce(new Error('hydrate failed')).mockResolvedValueOnce(undefined); + mockInvoke + .mockRejectedValueOnce(new Error('hydrate failed')) + .mockResolvedValueOnce({ mcpLaunchArgs: ['--mcp-config', '/tmp/child-b.json'] }); markTaskMcpPending('child-a'); await retryTaskMcpStartup('child-a'); @@ -429,6 +489,7 @@ describe('MCP startup status transitions', () => { expect(mockTasks['child-a'].mcpStartupStatus).toBe('error'); expect(mockTasks['child-b'].mcpStartupStatus).toBe('ready'); + expect(mockTasks['child-b'].mcpLaunchArgs).toEqual(['--mcp-config', '/tmp/child-b.json']); }); it('retry of child when coordinator is in error surfaces dependency message', async () => { @@ -458,6 +519,67 @@ describe('MCP startup status transitions', () => { }); }); +describe('createTask coordinator base branch prompt', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockSetStore.mockImplementation((...args: unknown[]) => applySetStore(...args)); + mockTasks = {}; + mockAgents = {}; + mockTaskOrder = []; + vi.mocked(getProjectPath).mockReturnValue('/repo'); + vi.mocked(getProjectBranchPrefix).mockReturnValue('task'); + vi.mocked(isProjectMissing).mockReturnValue(false); + mockInvoke.mockImplementation((channel: string) => { + if (channel === IPC.CreateTask) { + return Promise.resolve({ + id: 'coord-1', + branch_name: 'task/coordinator-work', + worktree_path: '/repo/.worktrees/coordinator-work', + }); + } + if (channel === IPC.StartMCPServer) { + return Promise.resolve({ mcpLaunchArgs: ['--mcp-config', '/tmp/coord.json'] }); + } + return Promise.resolve(undefined); + }); + }); + + it('tells coordinators to base sub-tasks on the coordinator branch, not its base branch', async () => { + await createTask({ + name: 'Coordinator', + agentDef: { + id: 'agent-def', + name: 'Claude', + command: 'claude', + args: [], + resume_args: [], + skip_permissions_args: [], + description: 'Claude', + }, + projectId: 'proj-1', + gitIsolation: 'worktree', + baseBranch: 'main', + initialPrompt: 'Pick a task', + coordinatorMode: true, + }); + + expect(mockTasks['coord-1'].initialPrompt).toContain( + 'Use `task/coordinator-work` as the baseBranch for all sub-tasks.', + ); + expect(mockTasks['coord-1'].initialPrompt).not.toContain( + 'Use `main` as the baseBranch for all sub-tasks.', + ); + expect(mockInvoke).toHaveBeenCalledWith( + IPC.StartMCPServer, + expect.objectContaining({ coordinatorBranch: 'task/coordinator-work' }), + ); + expect(mockInvoke).toHaveBeenCalledWith( + IPC.MCP_CoordinatorRegistered, + expect.objectContaining({ coordinatorBranch: 'task/coordinator-work' }), + ); + }); +}); + // ─── sendPrompt tests ───────────────────────────────────────────────────────── function writePayloads(): string[] { @@ -499,6 +621,20 @@ describe('sendPrompt', () => { expect(writePayloads()).toEqual(['\x1b[I', '\x1b[200~line 1\nline 2\x1b[201~', '\r']); }); + + it.each(['landed_pending_review', 'landed_cleanup_failed', 'reviewed'] as const)( + 'does not write to the agent when the task is %s', + async (landingState) => { + mockTasks['task-1'].landingState = landingState; + + await expect(sendPrompt('task-1', 'agent-1', 'hello Codex')).rejects.toThrow( + 'no longer writable', + ); + + expect(writePayloads()).toEqual([]); + expect(mockSetStore).not.toHaveBeenCalledWith('tasks', 'task-1', 'lastPrompt', 'hello Codex'); + }, + ); }); // ─── MCP_TaskCleanupFailed IPC handler ─────────────────────────────────────── @@ -714,6 +850,49 @@ describe('MCP_TaskStateSync listener', () => { expect(mockTasks['task-1'].mcpStartupStatus).toBeUndefined(); expect(mockTasks['task-1'].mcpStartupError).toBeUndefined(); }); + + it('stores landed pending-review and verification sync fields', () => { + taskStateSyncHandler({ + taskId: 'task-1', + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + landingState: 'landed_pending_review', + landingSummary: 'implemented', + landedMetadata: { + taskId: 'task-1', + taskName: 'Task 1', + coordinatorTaskId: 'coord-1', + targetBranch: 'main', + landedCommit: 'abc123', + landedAt: '2026-05-24T00:00:00Z', + landedOrder: 1, + verification: { checks: [{ name: 'test', command: 'npm test', result: 'passed' }] }, + }, + needsReview: true, + }); + + expect(mockTasks['task-1'].landingState).toBe('landed_pending_review'); + expect( + (mockTasks['task-1'].verification as { checks: Array<{ result: string }> }).checks[0].result, + ).toBe('passed'); + expect((mockTasks['task-1'].landedMetadata as { landedCommit: string }).landedCommit).toBe( + 'abc123', + ); + expect(mockTasks['task-1'].needsReview).toBe(true); + expect(vi.mocked(saveState)).toHaveBeenCalled(); + }); + + it('clears landed pending-review state locally', () => { + mockTasks['task-1'].landingState = 'landed_pending_review'; + mockTasks['task-1'].needsReview = true; + + clearTaskLandingReview('task-1'); + + expect(mockTasks['task-1'].landingState).toBe('reviewed'); + expect(mockTasks['task-1'].needsReview).toBe(false); + expect(mockInvoke).toHaveBeenCalledWith(IPC.MCP_TaskLandingReviewCleared, { + taskId: 'task-1', + }); + }); }); describe('pasteDelayMs', () => { diff --git a/src/store/tasks.ts b/src/store/tasks.ts index 66ddf733..882e73e9 100644 --- a/src/store/tasks.ts +++ b/src/store/tasks.ts @@ -34,6 +34,7 @@ import { DEFAULT_COORDINATOR_CONCURRENT_TASKS, } from '../lib/coordinator-limits'; import { getCoordinatorChildren, isCoordinatedChild } from './sidebar-order'; +import { isLandedTaskState } from './landing'; function initTaskInStore( taskId: string, @@ -81,11 +82,18 @@ function isAgentNotFoundError(err: unknown): boolean { return String(err).toLowerCase().includes('agent not found'); } -async function writeToAgentWhenReady(agentId: string, data: string): Promise { +function assertTaskCanReceiveInput(taskId: string, agentId: string): void { + if (isLandedTaskState(store.tasks[taskId]?.landingState)) { + throw new Error(`Task ${taskId} has already landed; agent ${agentId} is no longer writable.`); + } +} + +async function writeToAgentWhenReady(taskId: string, agentId: string, data: string): Promise { const deadline = Date.now() + AGENT_WRITE_READY_TIMEOUT_MS; let lastErr: unknown; while (Date.now() <= deadline) { + assertTaskCanReceiveInput(taskId, agentId); try { await invoke(IPC.WriteToAgent, { agentId, data }); return; @@ -206,6 +214,7 @@ export async function createTask(opts: CreateTaskOptions): Promise { coordinatorTaskId: taskId, projectId, projectRoot, + coordinatorBranch: branchName || undefined, worktreePath: gitIsolation === 'worktree' ? worktreePath : undefined, skipPermissions: skipPermissions ?? false, propagateSkipPermissions: opts.propagateSkipPermissions ?? false, @@ -220,6 +229,7 @@ export async function createTask(opts: CreateTaskOptions): Promise { await invoke(IPC.MCP_CoordinatorRegistered, { coordinatorTaskId: taskId, projectId, + coordinatorBranch: branchName || undefined, worktreePath, }); } catch (err) { @@ -242,6 +252,10 @@ export async function createTask(opts: CreateTaskOptions): Promise { // Only possible here when an initialPrompt was provided; if not, sendPrompt handles injection. const effectivePrompt = stepsEnabled && initialPrompt ? `${initialPrompt}\n\n---\n${STEPS_INSTRUCTION}` : initialPrompt; + const coordinatorBaseBranchInstruction = + opts.coordinatorMode && branchName + ? `Use \`${branchName}\` as the baseBranch for all sub-tasks.\n\n` + : ''; const task: Task = { id: taskId, @@ -267,7 +281,7 @@ export async function createTask(opts: CreateTaskOptions): Promise { ), ), ) + - `Use \`${opts.baseBranch}\` as the baseBranch for all sub-tasks.\n\n` + + coordinatorBaseBranchInstruction + effectivePrompt : (effectivePrompt ?? undefined), savedInitialPrompt: initialPrompt ?? undefined, @@ -624,6 +638,7 @@ export function updateTaskNotes(taskId: string, notes: string): void { export async function sendPrompt(taskId: string, agentId: string, text: string): Promise { const task = store.tasks[taskId]; + assertTaskCanReceiveInput(taskId, agentId); const promptedAgentIds = task?.promptedAgentIds ?? []; const hasPromptedAgent = promptedAgentIds.includes(agentId); const isQueuedInitialPrompt = @@ -639,7 +654,7 @@ export async function sendPrompt(taskId: string, agentId: string, text: string): // the PromptInput textarea, the xterm.js terminal loses DOM focus. For agents // that enable focus tracking (\x1b[?1004h), xterm.js sends \x1b[O (Focus Out) // to the PTY, which may suspend readline input processing; \x1b[I re-activates it. - await writeToAgentWhenReady(agentId, '\x1b[I'); + await writeToAgentWhenReady(taskId, agentId, '\x1b[I'); // Send text and Enter separately so TUI apps (Claude Code, Codex) // don't treat the \r as part of a pasted block. When the agent has enabled // bracketed paste, wrap only the prompt text; this avoids Codex's paste-burst @@ -647,11 +662,12 @@ export async function sendPrompt(taskId: string, agentId: string, text: string): setTaskLastInputAt(taskId); const useBracketed = isAgentBracketedPasteEnabled(agentId); await writeToAgentWhenReady( + taskId, agentId, useBracketed ? `${BRACKETED_PASTE_START}${effectiveText}${BRACKETED_PASTE_END}` : effectiveText, ); await new Promise((r) => setTimeout(r, pasteDelayMs(effectiveText))); - await writeToAgentWhenReady(agentId, '\r'); + await writeToAgentWhenReady(taskId, agentId, '\r'); setStore('tasks', taskId, 'lastPrompt', text); if (task && !hasPromptedAgent) { setStore('tasks', taskId, 'promptedAgentIds', [...promptedAgentIds, agentId]); @@ -680,6 +696,19 @@ export function clearStagedNotification(taskId: string): void { setStore('tasks', taskId, 'stagedNotification', undefined); } +export function clearTaskLandingReview(taskId: string): void { + const task = store.tasks[taskId]; + if (!task) return; + setStore('tasks', taskId, 'needsReview', false); + if (task.landingState === 'landed_pending_review') { + setStore('tasks', taskId, 'landingState', 'reviewed'); + void invoke(IPC.MCP_TaskLandingReviewCleared, { taskId }).catch((err: unknown) => + logWarn('tasks', 'Failed to clear backend landing review state', { err: String(err) }), + ); + } + void saveState(); +} + export function setStagedNotificationUserEdited(taskId: string): void { setStore( produce((s) => { @@ -1000,6 +1029,7 @@ interface MCPTaskCreatedEvent { preambleFileExistedBefore?: boolean; agentCommand?: string; agentArgs?: string[]; + mcpLaunchArgs?: string[]; skipPermissions?: boolean; } @@ -1027,6 +1057,7 @@ export function initMCPListeners(): () => void { // PromptInput auto-delivers it with stability checks + quiescence. initialPrompt: evt.prompt, mcpConfigPath: evt.mcpConfigPath, + mcpLaunchArgs: evt.mcpLaunchArgs, preambleFileExistedBefore: evt.preambleFileExistedBefore, skipPermissions: evt.skipPermissions ?? false, // Backend-spawned children are already attached to a live MCP server; @@ -1208,6 +1239,11 @@ export function initMCPListeners(): () => void { signalDoneAt?: string; signalDoneConsumed?: boolean; needsReview?: boolean; + verification?: Task['verification']; + landingState?: Task['landingState'] | null; + landingReason?: string | null; + landingSummary?: string | null; + landedMetadata?: Task['landedMetadata'] | null; coordinatedBy?: string | null; controlledBy?: 'coordinator' | 'human' | null; mcpConfigPath?: string | null; @@ -1215,6 +1251,12 @@ export function initMCPListeners(): () => void { mcpStartupError?: string | null; }; if (store.tasks[evt.taskId]) { + const hasLandingStateUpdate = + evt.verification !== undefined || + evt.landingState !== undefined || + evt.landingReason !== undefined || + evt.landingSummary !== undefined || + evt.landedMetadata !== undefined; if (evt.signalDoneReceived !== undefined) setStore('tasks', evt.taskId, 'signalDoneReceived', evt.signalDoneReceived); if (evt.signalDoneAt !== undefined) @@ -1223,6 +1265,16 @@ export function initMCPListeners(): () => void { setStore('tasks', evt.taskId, 'signalDoneConsumed', evt.signalDoneConsumed); if (evt.needsReview !== undefined) setStore('tasks', evt.taskId, 'needsReview', evt.needsReview); + if (evt.verification !== undefined) + setStore('tasks', evt.taskId, 'verification', evt.verification); + if (evt.landingState !== undefined) + setStore('tasks', evt.taskId, 'landingState', evt.landingState ?? undefined); + if (evt.landingReason !== undefined) + setStore('tasks', evt.taskId, 'landingReason', evt.landingReason ?? undefined); + if (evt.landingSummary !== undefined) + setStore('tasks', evt.taskId, 'landingSummary', evt.landingSummary ?? undefined); + if (evt.landedMetadata !== undefined) + setStore('tasks', evt.taskId, 'landedMetadata', evt.landedMetadata ?? undefined); if (evt.coordinatedBy !== undefined) setStore('tasks', evt.taskId, 'coordinatedBy', evt.coordinatedBy ?? undefined); if (evt.controlledBy !== undefined) @@ -1233,6 +1285,7 @@ export function initMCPListeners(): () => void { setStore('tasks', evt.taskId, 'mcpStartupStatus', evt.mcpStartupStatus ?? undefined); if (evt.mcpStartupError !== undefined) setStore('tasks', evt.taskId, 'mcpStartupError', evt.mcpStartupError ?? undefined); + if (hasLandingStateUpdate) void saveState(); } }), window.electron.ipcRenderer.on(IPC.MCP_TaskHydrated, (data: unknown) => { @@ -1260,6 +1313,31 @@ export function setTaskMcpLaunchArgs(taskId: string, args: string[] | undefined) if (store.tasks[taskId]) setStore('tasks', taskId, 'mcpLaunchArgs', args); } +function isCodexCommand(command: string | undefined): boolean { + return command?.split('/').pop()?.includes('codex') === true; +} + +function taskRequiresMcpLaunchArgs(taskId: string): boolean { + const task = store.tasks[taskId]; + if (!task) return true; + const agentDef = task.agentIds[0] ? store.agents[task.agentIds[0]]?.def : undefined; + return isCodexCommand(agentDef?.command) || Boolean(task.mcpConfigPath); +} + +export function applyTaskMcpLaunchResult( + taskId: string, + result: { mcpLaunchArgs?: string[] } | undefined, +): boolean { + const args = result?.mcpLaunchArgs; + if ((!Array.isArray(args) || args.length === 0) && taskRequiresMcpLaunchArgs(taskId)) { + markTaskMcpError(taskId, 'MCP startup returned no launch args'); + return false; + } + if (Array.isArray(args)) setTaskMcpLaunchArgs(taskId, args); + markTaskMcpReady(taskId); + return true; +} + export function markTaskMcpError(taskId: string, errorMsg: string): void { if (!store.tasks[taskId]) return; // eslint-disable-next-line no-control-regex -- strip escape chars to prevent injection @@ -1288,6 +1366,7 @@ export function retryTaskMcpStartup(taskId: string): Promise { coordinatorTaskId: task.id, projectId: task.projectId, projectRoot, + coordinatorBranch: task.branchName || undefined, worktreePath: task.gitIsolation === 'worktree' ? task.worktreePath : undefined, skipPermissions: task.skipPermissions ?? false, propagateSkipPermissions: task.propagateSkipPermissions ?? false, @@ -1297,19 +1376,26 @@ export function retryTaskMcpStartup(taskId: string): Promise { dockerImage: task.dockerImage, }) .then((result) => { - setTaskMcpLaunchArgs(taskId, result?.mcpLaunchArgs); - markTaskMcpReady(taskId); + applyTaskMcpLaunchResult(taskId, result); }) .catch((err: unknown) => markTaskMcpError(taskId, String(err))); } if (task.coordinatedBy) { + if ( + task.landingState === 'landed_pending_review' || + task.landingState === 'landed_cleanup_failed' || + task.landingState === 'reviewed' + ) { + return Promise.resolve(); + } const coordinator = store.tasks[task.coordinatedBy]; if (coordinator?.mcpStartupStatus === 'error') { markTaskMcpError(taskId, 'Coordinator MCP failed — retry the coordinator task first'); return Promise.resolve(); } - return invoke(IPC.MCP_HydrateCoordinatedTask, { + const agentDef = task.agentIds[0] ? store.agents[task.agentIds[0]]?.def : undefined; + return invoke<{ mcpLaunchArgs?: string[] }>(IPC.MCP_HydrateCoordinatedTask, { id: task.id, name: task.name, projectId: task.projectId, @@ -1322,10 +1408,18 @@ export function retryTaskMcpStartup(taskId: string): Promise { agentId: task.agentIds[0], signalDoneAt: task.signalDoneAt, signalDoneConsumed: task.signalDoneConsumed, + verification: task.verification, + landingState: task.landingState, + landingReason: task.landingReason, + landingSummary: task.landingSummary, + landedMetadata: task.landedMetadata, mcpConfigPath: task.mcpConfigPath, + agentCommand: agentDef?.command ?? 'claude', preambleFileExistedBefore: task.preambleFileExistedBefore, }) - .then(() => markTaskMcpReady(taskId)) + .then((result) => { + applyTaskMcpLaunchResult(taskId, result); + }) .catch((err: unknown) => markTaskMcpError(taskId, String(err))); } return Promise.resolve(); diff --git a/src/store/types.ts b/src/store/types.ts index 9f5de4b2..c98cad8a 100644 --- a/src/store/types.ts +++ b/src/store/types.ts @@ -18,6 +18,36 @@ export interface StagedNotification { hiddenCompletionCount?: number; } +export interface SubtaskVerificationCheck { + name: string; + command: string; + result: 'passed' | 'blocked' | 'failed'; + reason?: string; +} + +export interface SubtaskVerification { + checks: SubtaskVerificationCheck[]; +} + +export type LandingState = + | 'landing_escalated' + | 'landing_failed' + | 'landed_pending_review' + | 'landed_cleanup_failed' + | 'reviewed'; + +export interface LandedMetadata { + taskId: string; + taskName: string; + coordinatorTaskId: string; + targetBranch: string; + landedCommit: string; + landedAt: string; + landedOrder: number; + summary?: string; + verification: SubtaskVerification; +} + export interface TaskGitStatusSnapshot extends WorktreeStatus { refreshedAt: number; error?: string; @@ -110,6 +140,11 @@ export interface Task { signalDoneAt?: string; signalDoneConsumed?: boolean; needsReview?: boolean; + verification?: SubtaskVerification; + landingState?: LandingState; + landingReason?: string; + landingSummary?: string; + landedMetadata?: LandedMetadata; mcpStartupStatus?: 'pending' | 'ready' | 'error'; mcpStartupError?: string; } @@ -162,6 +197,11 @@ export interface PersistedTask { signalDoneAt?: string; signalDoneConsumed?: boolean; needsReview?: boolean; + verification?: SubtaskVerification; + landingState?: LandingState; + landingReason?: string; + landingSummary?: string; + landedMetadata?: LandedMetadata; } export interface PersistedTerminal {