diff --git a/.agents/editor/editor.ts b/.agents/editor/editor.ts index 5c63770373..c6d501bf23 100644 --- a/.agents/editor/editor.ts +++ b/.agents/editor/editor.ts @@ -1,9 +1,6 @@ import { Message } from 'types/util-types' import { publisher } from '../constants' -import { - PLACEHOLDER, - type SecretAgentDefinition, -} from '../types/secret-agent-definition' +import { type SecretAgentDefinition } from '../types/secret-agent-definition' const editor: SecretAgentDefinition = { id: 'editor', @@ -28,7 +25,6 @@ const editor: SecretAgentDefinition = { }, }, outputMode: 'structured_output', - includeMessageHistory: true, toolNames: [ 'read_files', 'write_file', @@ -42,7 +38,10 @@ const editor: SecretAgentDefinition = { ], spawnableAgents: ['file-explorer', 'web-researcher', 'docs-researcher'], - systemPrompt: `You are an expert code editor with deep understanding of software engineering principles. + includeMessageHistory: true, + inheritParentSystemPrompt: true, + + instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. # Core Mandates @@ -66,62 +65,18 @@ const editor: SecretAgentDefinition = { - Remove unused variables, functions, and files as a result of your changes. - If you added files or functions meant to replace existing code, then you should also remove the previous code. - **Edit multiple files at once:** When you edit files, you must make as many tool calls as possible in a single message. This is faster and much more efficient than making all the tool calls in separate messages. It saves users thousands of dollars in credits if you do this! - -Assistant: I will now implement feature X. - - -{ - "toolName": "str_replace", - "input": { - "filePath": "src/components/Button.tsx", - "oldContent": "...", - "newContent": "...", - } -} - - - -{ - "toolName": "str_replace", - "input": { - "filePath": "src/components/Button.tsx", - "oldContent": "...", - "newContent": "...", - } -} - - -// ... 8 more str_replace tool calls ... - -Let's see what the code looks like now. - -User: -str_replace -... - - - -str_replace -... - - -// ... 8 more tool_result blocks ... - - **Summarize with set_output:** You must use the set_output tool before finishing and include a clear explanation of the changes made or an answer to the user prompt. Do not write a separate summary outside of the set_output tool. - -${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`, - - instructionsPrompt: `Implement the requested changes, using your judgment as needed, but referring to the original as the most important source of information. +Implement the requested changes, using your judgment as needed, but referring to the original as the most important source of information. # Instructions -- It's helpful to spawn a file explorer to discover all the relevant files for implementing the plan. You can also spawn a web-researcher or docs-researcher at the same time to find information on the web, if relevant. -- You must read all relevant files to understand the current state. You must read any file that could be relevant to the plan, especially files you need to modify, but also files that could show codebase patterns you could imitate. Try to read a lot of files in a single tool call. E.g. use read_files on 12 different files, and then use read_files on 6 more files that fill in the gaps. +- Read any relevant files that have not already been read. Or, spawn a file-explorer to find any other relevant parts of the codebase. - Implement changes using str_replace or write_file. +- Verify your changes by running tests, typechecking, etc. Keep going until you are sure the changes are correct. - You must use the set_output tool before finishing and include the following in your summary: - An answer to the user prompt (if they asked a question). - An explanation of the changes made. - - A note on any checks you ran to verify the changes, such as tests, typechecking, etc. + - A note on any checks you ran to verify the changes, such as tests, typechecking, etc., and the results of those checks. - Do not include a section on the benefits of the changes, as we're most interested in the changes themselves and what still needs to be done. - Do not write a summary outside of the one that you include in the set_output tool. - As soon as you use set_output, you must end your turn using the end_turn tool. diff --git a/.agents/orchestrator/orchestrator.ts b/.agents/orchestrator/orchestrator.ts index cc3df69acb..0654987117 100644 --- a/.agents/orchestrator/orchestrator.ts +++ b/.agents/orchestrator/orchestrator.ts @@ -28,7 +28,7 @@ const definition: SecretAgentDefinition = { }, outputMode: 'last_message', includeMessageHistory: true, - toolNames: ['spawn_agents', 'read_files', 'str_replace', 'write_file'], + toolNames: ['spawn_agents', 'read_files'], spawnableAgents: [ 'read-only-commander', 'researcher-file-explorer', @@ -36,7 +36,7 @@ const definition: SecretAgentDefinition = { 'researcher-docs', 'decomposing-planner', 'editor', - 'reviewer-lite', + 'reviewer-max', 'context-pruner', ], @@ -55,6 +55,9 @@ const definition: SecretAgentDefinition = { - **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it. +${PLACEHOLDER.FILE_TREE_PROMPT_SMALL} +${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS} + # Starting Git Changes The following is the state of the git repository at the start of the conversation. Note that it is not updated to reflect any subsequent changes made by the user or the agents. @@ -62,26 +65,28 @@ The following is the state of the git repository at the start of the conversatio ${PLACEHOLDER.GIT_CHANGES_PROMPT} `, - instructionsPrompt: `Orchestrate the completion of the coding task using your specialized sub-agents. + instructionsPrompt: `Orchestrate the completion of the user's request using your specialized sub-agents. -## Simple workflow +## Example workflow Use this workflow to solve a medium or complex coding task: 1. Spawn a researcher -2. Spawn a decomposing planner to come up with a plan. -3. Spawn an editor to implement the plan. -4. Spawn a reviewer to review the code. If changes are needed, go back to step 3, but only once. -5. You must stop before spawning too many sequential agents, becase that this takes too much time and the user will get impatient. +2. Read all the relevant files using the read_files tool. +3. Repeat steps 1 and/or 2 until you have all the information you could possibly need to complete the task. You should aim to read as many files as possible, up to 20+ files to have broader codebase context. +4. Spawn a decomposing planner to come up with a plan. +5. Spawn an editor to implement the plan. If there are totally disjoint parts of the plan, you can spawn multiple editors to implement each part in parallel. +6. Spawn a reviewer to review the code. If changes are needed, go back to step 5, but no more than once. +7. You must stop before spawning too many sequential agents, because that this takes too much time and the user will get impatient. Feel free to modify this workflow as needed. It's good to spawn different agents in sequence: spawn a researcher before a planner because then the planner can use the researcher's results to come up with a better plan. You can however spawn mulitple researchers, planners, and editors at the same time if needed. ## Guidelines -- You can spawn agents to help you complete the task. Iterate by spawning more agents as needed. +- Spawn agents to help you complete the task. Iterate by spawning more agents as needed. - Don't mastermind the task. Rely on your agents' judgement to research, plan, edit, and review the code. -- Give as many instructions upfront as possible to each agent so you're less likely to need to spawn them again. - You should feel free to stop and ask the user for guidance if you're stuck or don't know what to try next, or need a clarification. -- When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include much context. +- Give as many instructions upfront as possible to each agent so you're less likely to need to spawn them again. +- When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context. - Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, running scripts that could alter production environments, installing packages globally, etc). Don't do any of these unless the user explicitly asks you to. `, diff --git a/.agents/planners/decomposing-planner-lite.ts b/.agents/planners/decomposing-planner-lite.ts index c153c43641..84e8632824 100644 --- a/.agents/planners/decomposing-planner-lite.ts +++ b/.agents/planners/decomposing-planner-lite.ts @@ -9,7 +9,6 @@ const definition: SecretAgentDefinition = { spawnerPrompt: 'Creates a better implementation plan by decomposing the task into smaller plans in parallel and synthesizing them into a final plan. Includes full code changes.', spawnableAgents: ['researcher-file-explorer', 'implementation-planner-lite'], - includeMessageHistory: false, } export default definition diff --git a/.agents/planners/decomposing-planner.ts b/.agents/planners/decomposing-planner.ts index ba06e66cd1..ad0084bd68 100644 --- a/.agents/planners/decomposing-planner.ts +++ b/.agents/planners/decomposing-planner.ts @@ -1,14 +1,11 @@ import { publisher } from '../constants' -import { - PLACEHOLDER, - type SecretAgentDefinition, -} from '../types/secret-agent-definition' +import { type SecretAgentDefinition } from '../types/secret-agent-definition' const definition: SecretAgentDefinition = { id: 'decomposing-planner', publisher, model: 'anthropic/claude-sonnet-4.5', - displayName: 'Decomposing Planner', + displayName: 'Peter Plan', spawnerPrompt: 'Creates the best possible implementation plan by decomposing the task into smaller plans in parallel and synthesizing them into a final plan. Includes full code changes.', inputSchema: { @@ -19,31 +16,27 @@ const definition: SecretAgentDefinition = { }, }, outputMode: 'last_message', - includeMessageHistory: true, - toolNames: ['spawn_agents', 'read_files'], - spawnableAgents: ['file-explorer', 'implementation-planner'], + toolNames: ['spawn_agents'], + spawnableAgents: ['implementation-planner'], - systemPrompt: `You are an expert programmer, architect, and problem solver who excels at breaking down complex tasks. + includeMessageHistory: true, + inheritParentSystemPrompt: true, -${PLACEHOLDER.FILE_TREE_PROMPT} -${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`, + instructionsPrompt: `You are an expert programmer, architect, and problem solver who excels at breaking down complex tasks. - instructionsPrompt: `Instructions: +Instructions: -Step 1: Task Decomposition -- Spawn a file-explorer agent to explore the codebase and read all the relevant files +Step 1: Task Decomposition & Parallel Planning - Carefully analyze the user's request -- Break it down into 3-5 focused subtasks that: +- Break it down into 2-10 focused subtasks that: - Cover different aspects of the implementation (e.g., data layer, business logic, UI, testing) - Are specific and actionable - Together address the complete requirements - -Step 2: Parallel Planning -- Spawn 3-5 implementation-planner agents in parallel (one spawn_agents call with multiple agents) +- Spawn 2-10 implementation-planner agents in parallel (one spawn_agents call with multiple agents) - Give each agent a focused subtask from your decomposition - Each subtask prompt should be specific about what that agent should focus on -Step 3: Synthesis +Step 2: Synthesis - Review all the plans from the spawned agents - Create a unified implementation plan that: - Combines insights from all subtask plans diff --git a/.agents/planners/implementation-planner.ts b/.agents/planners/implementation-planner.ts index bf7bd7045e..073e5852ca 100644 --- a/.agents/planners/implementation-planner.ts +++ b/.agents/planners/implementation-planner.ts @@ -1,8 +1,5 @@ import { publisher } from '../constants' -import { - PLACEHOLDER, - type SecretAgentDefinition, -} from '../types/secret-agent-definition' +import { type SecretAgentDefinition } from '../types/secret-agent-definition' const definition: SecretAgentDefinition = { id: 'implementation-planner', @@ -20,25 +17,18 @@ const definition: SecretAgentDefinition = { }, outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: true, toolNames: ['spawn_agents', 'read_files'], - spawnableAgents: [ - 'file-explorer', - 'web-researcher', - 'docs-researcher', - ], + spawnableAgents: ['file-explorer', 'web-researcher', 'docs-researcher'], - systemPrompt: `You are an expert programmer, architect, researcher, and general problem solver. + instructionsPrompt: `You are an expert programmer, architect, researcher, and general problem solver. You spawn agents to help you gather information, and then describe a full change to the codebase that will accomplish the task. You do not have access to tools to modify files (e.g. the write_file or str_replace tools). You are describing all the code changes that should be made as a full implementation. -${PLACEHOLDER.FILE_TREE_PROMPT} -${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`, - - instructionsPrompt: `Instructions: +Instructions: - Spawn file-explorer twice to find all the relevant parts of the codebase. Use different prompts for each file-explorer to ensure you get all the relevant parts of the codebase. In parallel as part of the same spawn_agents tool call, you may also spawn a web-researcher or docs-researcher to search the web or technical documentation for relevant information. -- Read all the file paths that are relevant using the read_files tool. -- Read more and more files to get any information that could possibly help you make the best plan. It's good to read 20+ files. +- Read any relevant files that have not already been read. - Think about the best way to accomplish the task. - Finally, describe the full change to the codebase that will accomplish the task (or other steps, e.g. terminal commands to run). Use markdown code blocks to describe the changes for each file. diff --git a/.agents/reviewer/reviewer-factory.ts b/.agents/reviewer/reviewer-factory.ts index 3fb97c8a79..ce6a6d0a0d 100644 --- a/.agents/reviewer/reviewer-factory.ts +++ b/.agents/reviewer/reviewer-factory.ts @@ -1,6 +1,4 @@ import { AGENT_PERSONAS } from '@codebuff/common/constants/agents' -import { closeXml } from '@codebuff/common/util/xml' - import type { SecretAgentDefinition } from '../types/secret-agent-definition' import type { Model } from '@codebuff/common/old-constants' @@ -15,11 +13,11 @@ export const reviewer = (model: Model): Omit => ({ }, }, outputMode: 'last_message', - includeMessageHistory: true, - toolNames: ['end_turn', 'run_file_change_hooks'], + toolNames: ['run_file_change_hooks'], spawnableAgents: [], - systemPrompt: `You are an expert programmer who can articulate very clear feedback on code changes.`, + inheritParentSystemPrompt: true, + includeMessageHistory: true, instructionsPrompt: `Your task is to provide helpful feedback on the last file changes made by the assistant. @@ -42,7 +40,5 @@ Next, you should critique the code changes made recently in the above conversati - Make sure no sections were deleted that weren't supposed to be deleted. - Make sure the new code matches the style of the existing code. -Be concise and to the point. After providing all your feedback, use the end_turn tool to end your response.`, - - stepPrompt: `IMPORTANT: Don't forget to end your response with the end_turn tool: ${closeXml('end_turn')}`, +Be concise and to the point.`, }) diff --git a/.agents/types/agent-definition.ts b/.agents/types/agent-definition.ts index 472e06f130..807096bcea 100644 --- a/.agents/types/agent-definition.ts +++ b/.agents/types/agent-definition.ts @@ -95,7 +95,7 @@ export interface AgentDefinition { /** Whether to include conversation history from the parent agent in context. * * Defaults to false. - * Use this if the agent needs to know all the previous messages in the conversation. + * Use this when the agent needs to know all the previous messages in the conversation. */ includeMessageHistory?: boolean @@ -121,6 +121,14 @@ export interface AgentDefinition { * This field is key if the agent is intended to be spawned by other agents. */ spawnerPrompt?: string + /** Whether to inherit the parent agent's system prompt instead of using this agent's own systemPrompt. + * + * Defaults to false. + * Use this when you want to enable prompt caching by preserving the same system prompt prefix. + * Cannot be used together with the systemPrompt field. + */ + inheritParentSystemPrompt?: boolean + /** Background information for the agent. Fairly optional. Prefer using instructionsPrompt for agent instructions. */ systemPrompt?: string @@ -289,7 +297,7 @@ export type ModelName = | 'openai/gpt-5-nano' // Anthropic - | 'anthropic/claude-sonnet-4' + | 'anthropic/claude-sonnet-4.5' | 'anthropic/claude-opus-4.1' // Gemini @@ -326,8 +334,8 @@ export type ModelName = // Other open source models | 'moonshotai/kimi-k2' | 'moonshotai/kimi-k2:nitro' - | 'z-ai/glm-4.5' - | 'z-ai/glm-4.5:nitro' + | 'z-ai/glm-4.6' + | 'z-ai/glm-4.6:nitro' | (string & {}) export type { Tools } diff --git a/backend/src/__tests__/agent-id-resolution.test.ts b/backend/src/__tests__/agent-id-resolution.test.ts index e59b1ff78d..9b9bdaeae4 100644 --- a/backend/src/__tests__/agent-id-resolution.test.ts +++ b/backend/src/__tests__/agent-id-resolution.test.ts @@ -23,6 +23,7 @@ describe('Agent ID Resolution', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, @@ -38,6 +39,7 @@ describe('Agent ID Resolution', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, @@ -54,6 +56,7 @@ describe('Agent ID Resolution', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'google/gemini-2.5-pro', spawnerPrompt: 'Test', inputSchema: {}, @@ -69,6 +72,7 @@ describe('Agent ID Resolution', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, @@ -85,6 +89,7 @@ describe('Agent ID Resolution', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, @@ -155,6 +160,7 @@ describe('Agent ID Resolution', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, @@ -182,6 +188,7 @@ describe('Agent ID Resolution', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, diff --git a/backend/src/__tests__/agent-registry.test.ts b/backend/src/__tests__/agent-registry.test.ts index 70523606a0..9b6867c3f0 100644 --- a/backend/src/__tests__/agent-registry.test.ts +++ b/backend/src/__tests__/agent-registry.test.ts @@ -38,6 +38,7 @@ const mockStaticTemplates: Record = { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, @@ -53,6 +54,7 @@ const mockStaticTemplates: Record = { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'google/gemini-2.5-flash', spawnerPrompt: 'Test', inputSchema: {}, @@ -235,6 +237,7 @@ describe('Agent Registry', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', inputSchema: {}, @@ -289,6 +292,7 @@ describe('Agent Registry', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Test', }, @@ -327,6 +331,7 @@ describe('Agent Registry', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Local test', inputSchema: {}, @@ -356,6 +361,7 @@ describe('Agent Registry', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Cached test', }, @@ -479,6 +485,7 @@ describe('Agent Registry', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, model: 'anthropic/claude-4-sonnet-20250522', spawnerPrompt: 'Cache test', }, diff --git a/backend/src/__tests__/cost-aggregation.integration.test.ts b/backend/src/__tests__/cost-aggregation.integration.test.ts index db3499a832..d01e0bbd3e 100644 --- a/backend/src/__tests__/cost-aggregation.integration.test.ts +++ b/backend/src/__tests__/cost-aggregation.integration.test.ts @@ -48,7 +48,8 @@ const mockFileContext: ProjectFileContext = { inputSchema: {}, spawnerPrompt: '', model: 'gpt-4o-mini', - includeMessageHistory: true, + includeMessageHistory: false, + inheritParentSystemPrompt: false, toolNames: ['spawn_agents'], spawnableAgents: ['editor'], systemPrompt: 'Base agent system prompt', @@ -63,9 +64,10 @@ const mockFileContext: ProjectFileContext = { spawnerPrompt: '', model: 'gpt-4o-mini', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['write_file'], spawnableAgents: [], - systemPrompt: 'Editor agent system prompt', + systemPrompt: '', instructionsPrompt: 'Editor agent instructions', stepPrompt: 'Editor agent step prompt', }, @@ -139,7 +141,8 @@ describe('Cost Aggregation Integration Tests', () => { inputSchema: {}, spawnerPrompt: '', model: 'gpt-4o-mini', - includeMessageHistory: true, + includeMessageHistory: false, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['spawn_agents'], spawnableAgents: ['editor'], @@ -155,10 +158,11 @@ describe('Cost Aggregation Integration Tests', () => { spawnerPrompt: '', model: 'gpt-4o-mini', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['write_file'], spawnableAgents: [], - systemPrompt: 'Editor agent system prompt', + systemPrompt: '', instructionsPrompt: 'Editor agent instructions', stepPrompt: 'Editor agent step prompt', } satisfies AgentTemplate, diff --git a/backend/src/__tests__/cost-aggregation.test.ts b/backend/src/__tests__/cost-aggregation.test.ts index 73c9001f33..cadaade2ba 100644 --- a/backend/src/__tests__/cost-aggregation.test.ts +++ b/backend/src/__tests__/cost-aggregation.test.ts @@ -69,6 +69,7 @@ describe('Cost Aggregation System', () => { instructionsPrompt: 'Test instructions', stepPrompt: 'Test step prompt', includeMessageHistory: true, + inheritParentSystemPrompt: false, outputMode: 'last_message', inputSchema: {}, } @@ -140,6 +141,7 @@ describe('Cost Aggregation System', () => { messages: [], agentState: parentAgentState, sendSubagentChunk: () => {}, + system: 'Test system prompt', } // Mock executeAgent to return results with different credit costs @@ -212,6 +214,7 @@ describe('Cost Aggregation System', () => { messages: [], agentState: parentAgentState, sendSubagentChunk: () => {}, + system: 'Test system prompt', } // Mock executeAgent to return success and failure with partial costs @@ -368,6 +371,7 @@ describe('Cost Aggregation System', () => { messages: [], agentState: mainAgentState, sendSubagentChunk: () => {}, + system: 'Test system prompt', } const mockExecuteAgent = spyOn(spawnAgentUtils, 'executeSubagent') diff --git a/backend/src/__tests__/loop-agent-steps.test.ts b/backend/src/__tests__/loop-agent-steps.test.ts index f20671206a..461f0235c6 100644 --- a/backend/src/__tests__/loop-agent-steps.test.ts +++ b/backend/src/__tests__/loop-agent-steps.test.ts @@ -19,6 +19,7 @@ import { spyOn, } from 'bun:test' +import { withAppContext } from '../context/app-context' import { loopAgentSteps } from '../run-agent-step' import { clearAgentGeneratorCache } from '../run-programmatic-step' import { mockFileContext, MockWebSocket } from './test-utils' @@ -35,6 +36,23 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => let mockAgentState: AgentState let llmCallCount: number + const runLoopAgentStepsWithContext = async ( + ws: WebSocket, + options: Parameters[1], + ) => { + return await withAppContext( + { + userId: options.userId, + clientSessionId: options.clientSessionId, + }, + { + currentUserId: options.userId, + processedRepoId: 'test-repo', + }, + async () => loopAgentSteps(ws, options), + ) + } + beforeAll(() => { // Mock logger mockModule('@codebuff/backend/util/logger', () => ({ @@ -64,9 +82,15 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => getAgentPrompt: async () => 'Mock prompt', })) - // Mock live user inputs - will be overridden in individual tests + // Mock live user inputs - default to true to allow tests to run mockModule('@codebuff/backend/live-user-inputs', () => ({ - checkLiveUserInput: () => false, // Default to false, override in tests + checkLiveUserInput: () => true, + resetLiveUserInputsState: () => {}, + startUserInput: () => {}, + endUserInput: () => {}, + cancelUserInput: () => {}, + setSessionConnected: () => {}, + getLiveUserInputIds: () => undefined, })) // Mock file reading updates @@ -87,6 +111,8 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }) beforeEach(() => { + clearAgentGeneratorCache() + llmCallCount = 0 // Setup spies for database operations @@ -131,6 +157,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => inputSchema: {}, outputMode: 'structured_output', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['read_files', 'write_file', 'end_turn'], spawnableAgents: [], @@ -155,8 +182,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => }) afterEach(() => { - mock.restore() clearAgentGeneratorCache() + + mock.restore() }) afterAll(() => { @@ -187,13 +215,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - // Mock checkLiveUserInput to allow the loop to continue - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => true, // Always return true to allow loop to continue - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -240,7 +262,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -289,13 +311,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - // Mock checkLiveUserInput to allow multiple iterations - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => true, // Always return true to allow loop to continue - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -343,16 +359,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - let checkCallCount = 0 - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => { - checkCallCount++ - return checkCallCount <= 5 - }, - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -393,7 +400,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -426,16 +433,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': llmOnlyTemplate, } - let checkCallCount = 0 - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => { - checkCallCount++ - return checkCallCount <= 2 // Allow 2 iterations - }, - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -470,16 +468,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - let checkCallCount = 0 - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => { - checkCallCount++ - return checkCallCount <= 2 - }, - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -531,16 +520,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - let checkCallCount = 0 - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => { - checkCallCount++ - return checkCallCount <= 10 // Allow many iterations - }, - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -588,16 +568,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => return getMessagesCallCount === 2 ? ['async message'] : [] }) - let checkCallCount = 0 - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => { - checkCallCount++ - return checkCallCount <= 5 - }, - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -648,13 +619,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => 'test-agent': mockTemplate, } - // Mock checkLiveUserInput to allow the loop to run - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => true, - ) - - await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, { + await runLoopAgentStepsWithContext(new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', agentType: 'test-agent', agentState: mockAgentState, @@ -736,20 +701,10 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => return 'mock-message-id' }) - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - let checkCount = 0 - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => { - checkCount++ - return checkCount < 10 // Limit to prevent infinite loop - }, - ) - - // Capture the agent state during execution mockAgentState.output = undefined capturedAgentState = mockAgentState - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -820,15 +775,10 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => return 'mock-message-id' }) - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => true, - ) - mockAgentState.output = undefined capturedAgentState = mockAgentState - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -875,12 +825,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => return 'mock-message-id' }) - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => true, - ) - - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', @@ -946,15 +891,10 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => return 'mock-message-id' }) - const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs') - spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation( - () => true, - ) - mockAgentState.output = undefined capturedAgentState = mockAgentState - const result = await loopAgentSteps( + const result = await runLoopAgentStepsWithContext( new MockWebSocket() as unknown as WebSocket, { userInputId: 'test-user-input', diff --git a/backend/src/__tests__/main-prompt.integration.test.ts b/backend/src/__tests__/main-prompt.integration.test.ts index e5af8f6a78..889e135eb5 100644 --- a/backend/src/__tests__/main-prompt.integration.test.ts +++ b/backend/src/__tests__/main-prompt.integration.test.ts @@ -74,6 +74,7 @@ describe.skip('mainPrompt (Integration)', () => { spawnerPrompt: '', model: 'gpt-4o-mini', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['write_file', 'run_terminal_command'], spawnableAgents: [], systemPrompt: '', @@ -485,6 +486,7 @@ export function getMessagesSubset(messages: Message[], otherTokens: number) { spawnerPrompt: '', model: 'gpt-4o-mini', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['write_file', 'run_terminal_command'], spawnableAgents: [], diff --git a/backend/src/__tests__/main-prompt.test.ts b/backend/src/__tests__/main-prompt.test.ts index fa2e4c1d3d..2c561c8587 100644 --- a/backend/src/__tests__/main-prompt.test.ts +++ b/backend/src/__tests__/main-prompt.test.ts @@ -57,6 +57,7 @@ describe('mainPrompt', () => { spawnerPrompt: '', model: 'gpt-4o-mini', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['write_file', 'run_terminal_command'], spawnableAgents: [], @@ -72,6 +73,7 @@ describe('mainPrompt', () => { spawnerPrompt: '', model: 'gpt-4o', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['write_file', 'run_terminal_command'], spawnableAgents: [], @@ -319,6 +321,7 @@ describe('mainPrompt', () => { spawnerPrompt: '', model: 'gpt-4o-mini', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['write_file', 'run_terminal_command'], spawnableAgents: [], @@ -334,6 +337,7 @@ describe('mainPrompt', () => { spawnerPrompt: '', model: 'gpt-4o', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['write_file', 'run_terminal_command'], spawnableAgents: [], diff --git a/backend/src/__tests__/malformed-tool-call.test.ts b/backend/src/__tests__/malformed-tool-call.test.ts index ca190cbff4..3d46e38689 100644 --- a/backend/src/__tests__/malformed-tool-call.test.ts +++ b/backend/src/__tests__/malformed-tool-call.test.ts @@ -60,6 +60,7 @@ describe('malformed tool call error handling', () => { inputSchema: {}, outputMode: 'all_messages' as const, includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['read_files', 'end_turn'], spawnableAgents: [], @@ -142,6 +143,7 @@ describe('malformed tool call error handling', () => { fileContext: mockFileContext, messages: [], agentState, + system: 'Test system prompt', agentContext: {}, onResponseChunk, fullResponse: '', @@ -197,6 +199,7 @@ describe('malformed tool call error handling', () => { fileContext: mockFileContext, messages: [], agentState, + system: 'Test system prompt', agentContext: {}, onResponseChunk, fullResponse: '', @@ -242,6 +245,7 @@ describe('malformed tool call error handling', () => { fileContext: mockFileContext, messages: [], agentState, + system: 'Test system prompt', agentContext: {}, onResponseChunk, fullResponse: '', @@ -291,6 +295,7 @@ describe('malformed tool call error handling', () => { fileContext: mockFileContext, messages: [], agentState, + system: 'Test system prompt', agentContext: {}, onResponseChunk, fullResponse: '', @@ -342,6 +347,7 @@ describe('malformed tool call error handling', () => { fileContext: mockFileContext, messages: [], agentState, + system: 'Test system prompt', agentContext: {}, onResponseChunk, fullResponse: '', @@ -395,6 +401,7 @@ describe('malformed tool call error handling', () => { fileContext: mockFileContext, messages: [], agentState, + system: 'Test system prompt', agentContext: {}, onResponseChunk, fullResponse: '', diff --git a/backend/src/__tests__/prompt-caching-subagents.test.ts b/backend/src/__tests__/prompt-caching-subagents.test.ts new file mode 100644 index 0000000000..87410a0b5b --- /dev/null +++ b/backend/src/__tests__/prompt-caching-subagents.test.ts @@ -0,0 +1,576 @@ +import { TEST_USER_ID } from '@codebuff/common/old-constants' +import { + clearMockedModules, + mockModule, +} from '@codebuff/common/testing/mock-modules' +import { getInitialSessionState } from '@codebuff/common/types/session-state' +import { + spyOn, + beforeEach, + afterEach, + beforeAll, + afterAll, + describe, + expect, + it, + mock, +} from 'bun:test' + +import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk' +import { loopAgentSteps } from '../run-agent-step' +import * as websocketAction from '../websockets/websocket-action' + +import type { AgentTemplate } from '../templates/types' +import type { Message } from '@codebuff/common/types/messages/codebuff-message' +import type { ProjectFileContext } from '@codebuff/common/util/file' +import type { WebSocket } from 'ws' + +const mockFileContext: ProjectFileContext = { + projectRoot: '/test', + cwd: '/test', + fileTree: [], + fileTokenScores: {}, + knowledgeFiles: {}, + gitChanges: { + status: '', + diff: '', + diffCached: '', + lastCommitMessages: '', + }, + changesSinceLastChat: {}, + shellConfigFiles: {}, + agentTemplates: {}, + customToolDefinitions: {}, + systemInfo: { + platform: 'test', + shell: 'test', + nodeVersion: 'test', + arch: 'test', + homedir: '/home/test', + cpus: 1, + }, +} + +class MockWebSocket { + send(msg: string) {} + close() {} + on(event: string, listener: (...args: any[]) => void) {} + removeListener(event: string, listener: (...args: any[]) => void) {} +} + +describe('Prompt Caching for Subagents with inheritParentSystemPrompt', () => { + let mockLocalAgentTemplates: Record + let capturedMessages: Message[] = [] + + beforeAll(() => { + // Mock logger + mockModule('@codebuff/backend/util/logger', () => ({ + logger: { + debug: () => {}, + error: () => {}, + info: () => {}, + warn: () => {}, + }, + withLoggerContext: async (context: any, fn: () => Promise) => fn(), + })) + }) + + beforeEach(() => { + capturedMessages = [] + + // Setup mock agent templates + mockLocalAgentTemplates = { + parent: { + id: 'parent', + displayName: 'Parent Agent', + outputMode: 'last_message', + inputSchema: {}, + spawnerPrompt: '', + model: 'anthropic/claude-sonnet-4', + includeMessageHistory: false, + inheritParentSystemPrompt: false, + mcpServers: {}, + toolNames: [], + spawnableAgents: ['child'], + systemPrompt: 'Parent agent system prompt for testing', + instructionsPrompt: '', + stepPrompt: '', + } satisfies AgentTemplate, + child: { + id: 'child', + displayName: 'Child Agent', + outputMode: 'last_message', + inputSchema: {}, + spawnerPrompt: '', + model: 'anthropic/claude-sonnet-4', // Same model as parent + includeMessageHistory: false, + inheritParentSystemPrompt: true, // Should inherit parent's system prompt + mcpServers: {}, + toolNames: [], + spawnableAgents: [], + systemPrompt: '', // Must be empty when inheritParentSystemPrompt is true + instructionsPrompt: '', + stepPrompt: '', + } satisfies AgentTemplate, + } + + // Mock LLM API to capture messages and end turn immediately + spyOn(aisdk, 'promptAiSdkStream').mockImplementation( + async function* (options) { + // Capture the messages sent to the LLM + capturedMessages = options.messages + + // Simulate immediate end turn + yield { + type: 'text' as const, + text: 'Test response', + } + + if (options.onCostCalculated) { + await options.onCostCalculated(1) + } + + return 'mock-message-id' + }, + ) + + // Mock file operations + spyOn(websocketAction, 'requestFiles').mockImplementation( + async (ws, paths) => { + const results: Record = {} + paths.forEach((path) => { + results[path] = null + }) + return results + }, + ) + + spyOn(websocketAction, 'requestToolCall').mockImplementation( + async (ws, userInputId, toolName, input) => { + return { + output: [ + { + type: 'json', + value: { message: 'Success' }, + }, + ], + } + }, + ) + + // Mock live user input + const liveUserInputs = require('../live-user-inputs') + spyOn(liveUserInputs, 'checkLiveUserInput').mockImplementation(() => true) + }) + + afterEach(() => { + mock.restore() + }) + + afterAll(() => { + clearMockedModules() + }) + + it('should inherit parent system prompt when inheritParentSystemPrompt is true', async () => { + const sessionState = getInitialSessionState(mockFileContext) + const ws = new MockWebSocket() as unknown as WebSocket + + // Run parent agent first to establish system prompt + const parentResult = await loopAgentSteps(ws, { + userInputId: 'test-parent', + prompt: 'Parent task', + params: undefined, + agentType: 'parent', + agentState: sessionState.mainAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + }) + + // Capture parent's messages which include the system prompt + const parentMessages = capturedMessages + expect(parentMessages.length).toBeGreaterThan(0) + expect(parentMessages[0].role).toBe('system') + const parentSystemPrompt = parentMessages[0].content as string + expect(parentSystemPrompt).toContain( + 'Parent agent system prompt for testing', + ) + + // Now run child agent with inheritParentSystemPrompt and parentSystemPrompt + capturedMessages = [] + const childAgentState = { + ...sessionState.mainAgentState, + agentId: 'child-agent', + agentType: 'child' as const, + messageHistory: [], + } + + await loopAgentSteps(ws, { + userInputId: 'test-child', + prompt: 'Child task', + params: undefined, + agentType: 'child', + agentState: childAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + parentSystemPrompt: parentSystemPrompt, + }) + + // Verify child uses parent's system prompt + const childMessages = capturedMessages + expect(childMessages.length).toBeGreaterThan(0) + expect(childMessages[0].role).toBe('system') + expect(childMessages[0].content).toBe(parentSystemPrompt) + }) + + it('should generate own system prompt when inheritParentSystemPrompt is false', async () => { + const sessionState = getInitialSessionState(mockFileContext) + const ws = new MockWebSocket() as unknown as WebSocket + + // Create a child agent that does NOT inherit parent system prompt + const standaloneChild: AgentTemplate = { + id: 'standalone-child', + displayName: 'Standalone Child', + outputMode: 'last_message', + inputSchema: {}, + spawnerPrompt: '', + model: 'anthropic/claude-sonnet-4', + includeMessageHistory: false, + inheritParentSystemPrompt: false, + mcpServers: {}, + toolNames: [], + spawnableAgents: [], + systemPrompt: 'Standalone child system prompt', + instructionsPrompt: '', + stepPrompt: '', + } + + mockLocalAgentTemplates['standalone-child'] = standaloneChild + + // Run parent agent first + const parentResult = await loopAgentSteps(ws, { + userInputId: 'test-parent', + prompt: 'Parent task', + params: undefined, + agentType: 'parent', + agentState: sessionState.mainAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + }) + + const parentMessages = capturedMessages + const parentSystemPrompt = parentMessages[0].content as string + + // Run child agent with inheritParentSystemPrompt=false + capturedMessages = [] + const childAgentState = { + ...sessionState.mainAgentState, + agentId: 'child-agent', + agentType: 'standalone-child' as const, + messageHistory: [], + } + + await loopAgentSteps(ws, { + userInputId: 'test-child', + prompt: 'Child task', + params: undefined, + agentType: 'standalone-child', + agentState: childAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + parentSystemPrompt: parentSystemPrompt, + }) + + const childMessages = capturedMessages + + // Verify child uses its own system prompt (not parent's) + expect(childMessages[0].role).toBe('system') + expect(childMessages[0].content).not.toBe(parentSystemPrompt) + expect(childMessages[0].content).toContain('Standalone child system prompt') + }) + + it('should work independently: includeMessageHistory without inheritParentSystemPrompt', async () => { + const sessionState = getInitialSessionState(mockFileContext) + const ws = new MockWebSocket() as unknown as WebSocket + + // Create a child that includes message history but uses its own system prompt + const messageHistoryChild: AgentTemplate = { + id: 'message-history-child', + displayName: 'Message History Child', + outputMode: 'last_message', + inputSchema: {}, + spawnerPrompt: '', + model: 'anthropic/claude-sonnet-4', + includeMessageHistory: true, // Includes message history + inheritParentSystemPrompt: false, // But uses own system prompt + mcpServers: {}, + toolNames: [], + spawnableAgents: [], + systemPrompt: 'Child with message history system prompt', + instructionsPrompt: '', + stepPrompt: '', + } + + mockLocalAgentTemplates['message-history-child'] = messageHistoryChild + + // Run parent agent first + await loopAgentSteps(ws, { + userInputId: 'test-parent', + prompt: 'Parent task', + params: undefined, + agentType: 'parent', + agentState: sessionState.mainAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + }) + + const parentMessages = capturedMessages + const parentSystemPrompt = parentMessages[0].content as string + + // Run child agent + capturedMessages = [] + const childAgentState = { + ...sessionState.mainAgentState, + agentId: 'child-agent', + agentType: 'message-history-child' as const, + messageHistory: [ + { role: 'user' as const, content: 'Previous message' }, + { role: 'assistant' as const, content: 'Previous response' }, + ], + } + + await loopAgentSteps(ws, { + userInputId: 'test-child', + prompt: 'Child task', + params: undefined, + agentType: 'message-history-child', + agentState: childAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + parentSystemPrompt: parentSystemPrompt, + }) + + const childMessages = capturedMessages + + // Verify child uses its own system prompt (not parent's) + expect(childMessages[0].role).toBe('system') + expect(childMessages[0].content).not.toBe(parentSystemPrompt) + expect(childMessages[0].content).toContain( + 'Child with message history system prompt', + ) + + // Verify message history was included + expect(childMessages.length).toBeGreaterThan(2) + const hasMessageHistory = childMessages.some( + (msg) => msg.role === 'user' && msg.content === 'Previous message', + ) + expect(hasMessageHistory).toBe(true) + }) + + it('should validate that agents with inheritParentSystemPrompt cannot have custom systemPrompt', () => { + const { + DynamicAgentTemplateSchema, + } = require('@codebuff/common/types/dynamic-agent-template') + + // Valid: inheritParentSystemPrompt with empty systemPrompt + const validAgent = { + id: 'valid-agent', + displayName: 'Valid', + model: 'anthropic/claude-sonnet-4', + inheritParentSystemPrompt: true, + systemPrompt: '', + instructionsPrompt: '', + stepPrompt: '', + } + const validResult = DynamicAgentTemplateSchema.safeParse(validAgent) + expect(validResult.success).toBe(true) + + // Invalid: inheritParentSystemPrompt with custom systemPrompt + const invalidAgent = { + id: 'invalid-agent', + displayName: 'Invalid', + model: 'anthropic/claude-sonnet-4', + inheritParentSystemPrompt: true, + systemPrompt: 'Custom system prompt', + instructionsPrompt: '', + stepPrompt: '', + } + const invalidResult = DynamicAgentTemplateSchema.safeParse(invalidAgent) + expect(invalidResult.success).toBe(false) + if (!invalidResult.success) { + expect(invalidResult.error.message).toContain( + 'Cannot specify both systemPrompt and inheritParentSystemPrompt', + ) + } + }) + + it('should enable prompt caching with matching system prompt prefix', async () => { + const sessionState = getInitialSessionState(mockFileContext) + const ws = new MockWebSocket() as unknown as WebSocket + + // Run parent agent + const parentResult = await loopAgentSteps(ws, { + userInputId: 'test-parent', + prompt: 'Parent task', + params: undefined, + agentType: 'parent', + agentState: sessionState.mainAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + }) + + const parentMessages = capturedMessages + const parentSystemPrompt = parentMessages[0].content as string + + // Run child agent with inheritParentSystemPrompt=true + capturedMessages = [] + const childAgentState = { + ...sessionState.mainAgentState, + agentId: 'child-agent', + agentType: 'child' as const, + messageHistory: [], + } + + await loopAgentSteps(ws, { + userInputId: 'test-child', + prompt: 'Child task', + params: undefined, + agentType: 'child', + agentState: childAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + parentSystemPrompt: parentSystemPrompt, + }) + + const childMessages = capturedMessages + + // Verify both agents use the same system prompt + expect(parentMessages[0].role).toBe('system') + expect(childMessages[0].role).toBe('system') + expect(childMessages[0].content).toBe(parentMessages[0].content) + + // This matching system prompt enables prompt caching: + // Both agents will have the same system message at the start, + // allowing the LLM provider to cache and reuse the system prompt + }) + + it('should support both inheritParentSystemPrompt and includeMessageHistory together', async () => { + const sessionState = getInitialSessionState(mockFileContext) + const ws = new MockWebSocket() as unknown as WebSocket + + // Create a child that inherits system prompt AND includes message history + const fullInheritChild: AgentTemplate = { + id: 'full-inherit-child', + displayName: 'Full Inherit Child', + outputMode: 'last_message', + inputSchema: {}, + spawnerPrompt: '', + model: 'anthropic/claude-sonnet-4', + includeMessageHistory: true, // Includes message history + inheritParentSystemPrompt: true, // AND inherits system prompt + mcpServers: {}, + toolNames: [], + spawnableAgents: [], + systemPrompt: '', // Must be empty + instructionsPrompt: '', + stepPrompt: '', + } + + mockLocalAgentTemplates['full-inherit-child'] = fullInheritChild + + // Run parent agent first with some message history + const parentResult = await loopAgentSteps(ws, { + userInputId: 'test-parent', + prompt: 'Parent task', + params: undefined, + agentType: 'parent', + agentState: { + ...sessionState.mainAgentState, + messageHistory: [ + { role: 'user' as const, content: 'Initial question' }, + { role: 'assistant' as const, content: 'Initial answer' }, + ], + }, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + }) + + const parentMessages = capturedMessages + const parentSystemPrompt = parentMessages[0].content as string + + // Run child agent + capturedMessages = [] + const childAgentState = { + ...sessionState.mainAgentState, + agentId: 'child-agent', + agentType: 'full-inherit-child' as const, + messageHistory: [ + { role: 'user' as const, content: 'Initial question' }, + { role: 'assistant' as const, content: 'Initial answer' }, + ], + } + + await loopAgentSteps(ws, { + userInputId: 'test-child', + prompt: 'Child task', + params: undefined, + agentType: 'full-inherit-child', + agentState: childAgentState, + fingerprintId: 'test-fingerprint', + fileContext: mockFileContext, + localAgentTemplates: mockLocalAgentTemplates, + userId: TEST_USER_ID, + clientSessionId: 'test-session', + onResponseChunk: () => {}, + parentSystemPrompt: parentSystemPrompt, + }) + + const childMessages = capturedMessages + + // Verify child inherits parent's system prompt + expect(childMessages[0].role).toBe('system') + expect(childMessages[0].content).toBe(parentSystemPrompt) + + // Verify message history was included + expect(childMessages.length).toBeGreaterThan(2) + const hasMessageHistory = childMessages.some( + (msg) => msg.role === 'user' && msg.content === 'Initial question', + ) + expect(hasMessageHistory).toBe(true) + }) +}) diff --git a/backend/src/__tests__/read-docs-tool.test.ts b/backend/src/__tests__/read-docs-tool.test.ts index 49ea6b551b..2fbeef0752 100644 --- a/backend/src/__tests__/read-docs-tool.test.ts +++ b/backend/src/__tests__/read-docs-tool.test.ts @@ -323,6 +323,7 @@ describe('read_docs tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -394,6 +395,7 @@ describe('read_docs tool with researcher agent', () => { ) await runAgentStep(new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -443,6 +445,7 @@ describe('read_docs tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -509,6 +512,7 @@ describe('read_docs tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -574,6 +578,7 @@ describe('read_docs tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -640,6 +645,7 @@ describe('read_docs tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', diff --git a/backend/src/__tests__/run-agent-step-tools.test.ts b/backend/src/__tests__/run-agent-step-tools.test.ts index e28387e736..c705b48cc0 100644 --- a/backend/src/__tests__/run-agent-step-tools.test.ts +++ b/backend/src/__tests__/run-agent-step-tools.test.ts @@ -58,6 +58,7 @@ describe('runAgentStep - set_output tool', () => { inputSchema: {}, outputMode: 'structured_output' as const, includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['set_output', 'end_turn'], spawnableAgents: [], @@ -202,6 +203,7 @@ describe('runAgentStep - set_output tool', () => { agentState, prompt: 'Analyze the codebase', params: undefined, + system: 'Test system prompt', }, ) @@ -244,6 +246,7 @@ describe('runAgentStep - set_output tool', () => { agentState, prompt: 'Analyze the codebase', params: undefined, + system: 'Test system prompt', }, ) @@ -292,6 +295,7 @@ describe('runAgentStep - set_output tool', () => { agentState, prompt: 'Update the output', params: undefined, + system: 'Test system prompt', }, ) @@ -331,6 +335,7 @@ describe('runAgentStep - set_output tool', () => { agentState, prompt: 'Update with empty object', params: undefined, + system: 'Test system prompt', }, ) @@ -348,6 +353,7 @@ describe('runAgentStep - set_output tool', () => { inputSchema: {}, outputMode: 'structured_output' as const, includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['read_files', 'end_turn'], spawnableAgents: [], @@ -426,6 +432,7 @@ describe('runAgentStep - set_output tool', () => { agentState, prompt: 'Test the handleSteps functionality', params: undefined, + system: 'Test system prompt', }, ) @@ -468,6 +475,7 @@ describe('runAgentStep - set_output tool', () => { inputSchema: {}, outputMode: 'structured_output' as const, includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['set_messages', 'end_turn'], spawnableAgents: [], @@ -519,6 +527,7 @@ describe('runAgentStep - set_output tool', () => { inputSchema: {}, outputMode: 'structured_output' as const, includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['spawn_agent_inline', 'end_turn'], spawnableAgents: ['message-deleter-agent'], @@ -584,6 +593,7 @@ describe('runAgentStep - set_output tool', () => { agentState, prompt: 'Spawn an inline agent to clean up messages', params: undefined, + system: 'Parent system prompt', }, ) diff --git a/backend/src/__tests__/run-programmatic-step.test.ts b/backend/src/__tests__/run-programmatic-step.test.ts index 67b54bd86f..5b58f9e232 100644 --- a/backend/src/__tests__/run-programmatic-step.test.ts +++ b/backend/src/__tests__/run-programmatic-step.test.ts @@ -103,6 +103,7 @@ describe('runProgrammaticStep', () => { inputSchema: {}, outputMode: 'structured_output', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['read_files', 'write_file', 'end_turn'], spawnableAgents: [], diff --git a/backend/src/__tests__/sandbox-generator.test.ts b/backend/src/__tests__/sandbox-generator.test.ts index dda4d46e12..cbe179d004 100644 --- a/backend/src/__tests__/sandbox-generator.test.ts +++ b/backend/src/__tests__/sandbox-generator.test.ts @@ -71,6 +71,7 @@ describe('QuickJS Sandbox Generator', () => { model: 'anthropic/claude-4-sonnet-20250522', outputMode: 'structured_output', includeMessageHistory: false, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: ['set_output'], spawnableAgents: [], diff --git a/backend/src/__tests__/spawn-agents-message-history.test.ts b/backend/src/__tests__/spawn-agents-message-history.test.ts index 2988f7e8da..102116e053 100644 --- a/backend/src/__tests__/spawn-agents-message-history.test.ts +++ b/backend/src/__tests__/spawn-agents-message-history.test.ts @@ -77,6 +77,7 @@ describe('Spawn Agents Message History', () => { spawnerPrompt: '', model: '', includeMessageHistory, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: [], spawnableAgents: ['child-agent'], @@ -96,7 +97,7 @@ describe('Spawn Agents Message History', () => { }, }) - it('should exclude system messages from conversation history when includeMessageHistory is true', async () => { + it('should include all messages from conversation history when includeMessageHistory is true', async () => { const parentAgent = createMockAgent('parent', true) const childAgent = createMockAgent('child-agent', true) const ws = new MockWebSocket() as unknown as WebSocket @@ -131,6 +132,7 @@ describe('Spawn Agents Message History', () => { sendSubagentChunk: mockSendSubagentChunk, messages: mockMessages, agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -139,42 +141,35 @@ describe('Spawn Agents Message History', () => { // Verify that the spawned agent was called expect(mockLoopAgentSteps).toHaveBeenCalledTimes(1) - // Verify that the subagent's message history contains the conversation history message - expect(capturedSubAgentState.messageHistory).toHaveLength(1) - const conversationHistoryMessage = capturedSubAgentState.messageHistory[0] - expect(conversationHistoryMessage.role).toBe('user') - expect(conversationHistoryMessage.content).toContain( - 'conversation history between the user and an assistant', - ) - - // Parse the JSON content to verify system message is excluded - const contentMatch = - conversationHistoryMessage.content.match(/\[([\s\S]*)\]/) - expect(contentMatch).toBeTruthy() - const parsedMessages = JSON.parse(contentMatch![0]) + // Verify that the subagent's message history contains the filtered messages + // expireMessages filters based on timeToLive property, not role + // Since the system message doesn't have timeToLive, it will be included + expect(capturedSubAgentState.messageHistory).toHaveLength(4) // System + user + assistant messages - // Verify system message is excluded - expect(parsedMessages).toHaveLength(3) // Only user and assistant messages - expect( - parsedMessages.find((msg: any) => msg.role === 'system'), - ).toBeUndefined() - expect( - parsedMessages.find( - (msg: any) => - msg.content === - 'This is the parent system prompt that should be excluded', - ), - ).toBeUndefined() + // Verify system message is included (because it has no timeToLive property) + const systemMessages = capturedSubAgentState.messageHistory.filter( + (msg: any) => msg.role === 'system', + ) + expect(systemMessages).toHaveLength(1) + expect(systemMessages[0].content).toBe( + 'This is the parent system prompt that should be excluded', + ) // Verify user and assistant messages are included expect( - parsedMessages.find((msg: any) => msg.content === 'Hello'), + capturedSubAgentState.messageHistory.find( + (msg: any) => msg.content === 'Hello', + ), ).toBeTruthy() expect( - parsedMessages.find((msg: any) => msg.content === 'Hi there!'), + capturedSubAgentState.messageHistory.find( + (msg: any) => msg.content === 'Hi there!', + ), ).toBeTruthy() expect( - parsedMessages.find((msg: any) => msg.content === 'How are you?'), + capturedSubAgentState.messageHistory.find( + (msg: any) => msg.content === 'How are you?', + ), ).toBeTruthy() }) @@ -208,6 +203,7 @@ describe('Spawn Agents Message History', () => { sendSubagentChunk: mockSendSubagentChunk, messages: mockMessages, agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -243,15 +239,14 @@ describe('Spawn Agents Message History', () => { sendSubagentChunk: mockSendSubagentChunk, messages: mockMessages, agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) await result - // Verify that the subagent still gets a conversation history message, even if empty - expect(capturedSubAgentState.messageHistory).toHaveLength(1) - const conversationHistoryMessage = capturedSubAgentState.messageHistory[0] - expect(conversationHistoryMessage.content).toContain('[]') // Empty array in JSON + // Verify that the subagent's message history is empty when there are no messages to pass + expect(capturedSubAgentState.messageHistory).toHaveLength(0) }) it('should handle message history with only system messages', async () => { @@ -283,14 +278,18 @@ describe('Spawn Agents Message History', () => { sendSubagentChunk: mockSendSubagentChunk, messages: mockMessages, agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) await result - // Verify that all system messages are filtered out - expect(capturedSubAgentState.messageHistory).toHaveLength(1) - const conversationHistoryMessage = capturedSubAgentState.messageHistory[0] - expect(conversationHistoryMessage.content).toContain('[]') // Empty array in JSON since all system messages filtered out + // Verify that system messages without timeToLive are included + // expireMessages only filters messages with timeToLive='userPrompt' + expect(capturedSubAgentState.messageHistory).toHaveLength(2) + const systemMessages = capturedSubAgentState.messageHistory.filter( + (msg: any) => msg.role === 'system', + ) + expect(systemMessages).toHaveLength(2) }) }) diff --git a/backend/src/__tests__/spawn-agents-permissions.test.ts b/backend/src/__tests__/spawn-agents-permissions.test.ts index 65c6a55d3e..807595f7d7 100644 --- a/backend/src/__tests__/spawn-agents-permissions.test.ts +++ b/backend/src/__tests__/spawn-agents-permissions.test.ts @@ -40,6 +40,7 @@ describe('Spawn Agents Permissions', () => { spawnerPrompt: '', model: '', includeMessageHistory: true, + inheritParentSystemPrompt: false, mcpServers: {}, toolNames: [], spawnableAgents, @@ -259,6 +260,7 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -291,6 +293,7 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -325,6 +328,7 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -361,6 +365,7 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -396,6 +401,7 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -428,6 +434,7 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -477,6 +484,7 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -526,6 +534,7 @@ describe('Spawn Agents Permissions', () => { localAgentTemplates: { thinker: childAgent }, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -556,6 +565,7 @@ describe('Spawn Agents Permissions', () => { localAgentTemplates: { reviewer: childAgent }, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -587,6 +597,7 @@ describe('Spawn Agents Permissions', () => { localAgentTemplates: {}, // Empty - agent not found messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -617,6 +628,7 @@ describe('Spawn Agents Permissions', () => { localAgentTemplates: { 'codebuff/thinker@1.0.0': childAgent }, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -650,6 +662,7 @@ describe('Spawn Agents Permissions', () => { }, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) @@ -680,6 +693,7 @@ describe('Spawn Agents Permissions', () => { localAgentTemplates: { 'codebuff/thinker@2.0.0': childAgent }, messages: [], agentState: sessionState.mainAgentState, + system: 'Test system prompt', }, }) diff --git a/backend/src/__tests__/subagent-streaming.test.ts b/backend/src/__tests__/subagent-streaming.test.ts index 554c43e456..cfd24cda1d 100644 --- a/backend/src/__tests__/subagent-streaming.test.ts +++ b/backend/src/__tests__/subagent-streaming.test.ts @@ -45,6 +45,7 @@ describe('Subagent Streaming', () => { spawnerPrompt: '', model: '', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: [], spawnableAgents: [], systemPrompt: '', @@ -162,6 +163,7 @@ describe('Subagent Streaming', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState, + system: 'Test system prompt', }, }) @@ -238,6 +240,7 @@ describe('Subagent Streaming', () => { sendSubagentChunk: mockSendSubagentChunk, messages: [], agentState, + system: 'Test system prompt', }, }) await result diff --git a/backend/src/__tests__/web-search-tool.test.ts b/backend/src/__tests__/web-search-tool.test.ts index 0a884323bd..9fc6f56bb1 100644 --- a/backend/src/__tests__/web-search-tool.test.ts +++ b/backend/src/__tests__/web-search-tool.test.ts @@ -140,6 +140,7 @@ describe('web_search tool with researcher agent', () => { ) await runAgentStep(new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -186,6 +187,7 @@ describe('web_search tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -243,6 +245,7 @@ describe('web_search tool with researcher agent', () => { ) await runAgentStep(new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -286,6 +289,7 @@ describe('web_search tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -344,6 +348,7 @@ describe('web_search tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -398,6 +403,7 @@ describe('web_search tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -442,6 +448,7 @@ describe('web_search tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', @@ -498,6 +505,7 @@ describe('web_search tool with researcher agent', () => { const { agentState: newAgentState } = await runAgentStep( new MockWebSocket() as unknown as WebSocket, { + system: 'Test system prompt', userId: TEST_USER_ID, userInputId: 'test-input', clientSessionId: 'test-session', diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts index beee1764ae..98ba474901 100644 --- a/backend/src/run-agent-step.ts +++ b/backend/src/run-agent-step.ts @@ -89,6 +89,7 @@ export interface AgentOptions { prompt: string | undefined params: Record | undefined + system: string } export const runAgentStep = async ( @@ -111,6 +112,7 @@ export const runAgentStep = async ( localAgentTemplates, prompt, params, + system, } = options let agentState = options.agentState @@ -271,30 +273,6 @@ export const runAgentStep = async ( }) const iterationNum = agentState.messageHistory.length - - const system = - (await getAgentPrompt({ - agentTemplate, - promptType: { type: 'systemPrompt' }, - fileContext, - agentState, - agentTemplates: localAgentTemplates, - additionalToolDefinitions: () => { - const additionalToolDefinitions = cloneDeep( - Object.fromEntries( - Object.entries(fileContext.customToolDefinitions).filter( - ([toolName]) => agentTemplate.toolNames.includes(toolName), - ), - ), - ) - return getMCPToolData({ - ws, - toolNames: agentTemplate.toolNames, - mcpServers: agentTemplate.mcpServers, - writeTo: additionalToolDefinitions, - }) - }, - })) ?? '' const systemTokens = countTokensJson(system) const agentMessages = agentState.messageHistory @@ -339,6 +317,7 @@ export const runAgentStep = async ( agentState, repoId, messages: agentMessages, + system, agentTemplate, localAgentTemplates, fileContext, @@ -455,6 +434,7 @@ export const loopAgentSteps = async ( clientSessionId, onResponseChunk, clearUserPromptMessagesAfterResponse = true, + parentSystemPrompt, }: { userInputId: string agentType: AgentTemplateType @@ -466,6 +446,7 @@ export const loopAgentSteps = async ( fileContext: ProjectFileContext localAgentTemplates: Record clearUserPromptMessagesAfterResponse?: boolean + parentSystemPrompt?: string userId: string | undefined clientSessionId: string @@ -521,6 +502,33 @@ export const loopAgentSteps = async ( : undefined // Build the initial message history with user prompt and instructions + // Generate system prompt once, using parent's if inheritParentSystemPrompt is true + const system = + agentTemplate.inheritParentSystemPrompt && parentSystemPrompt + ? parentSystemPrompt + : (await getAgentPrompt({ + agentTemplate, + promptType: { type: 'systemPrompt' }, + fileContext, + agentState, + agentTemplates: localAgentTemplates, + additionalToolDefinitions: () => { + const additionalToolDefinitions = cloneDeep( + Object.fromEntries( + Object.entries(fileContext.customToolDefinitions).filter( + ([toolName]) => agentTemplate.toolNames.includes(toolName), + ), + ), + ) + return getMCPToolData({ + ws, + toolNames: agentTemplate.toolNames, + mcpServers: agentTemplate.mcpServers, + writeTo: additionalToolDefinitions, + }) + }, + })) ?? '' + const initialMessages = buildArray( ...agentState.messageHistory, @@ -598,6 +606,7 @@ export const loopAgentSteps = async ( localAgentTemplates, prompt: currentPrompt, params: currentParams, + system, stepsComplete: shouldEndTurn, stepNumber: totalSteps, }) @@ -675,6 +684,7 @@ export const loopAgentSteps = async ( agentState: currentAgentState, prompt: currentPrompt, params: currentParams, + system, }) if (newAgentState.runId) { diff --git a/backend/src/run-programmatic-step.ts b/backend/src/run-programmatic-step.ts index 27ce42516d..236c43b9bf 100644 --- a/backend/src/run-programmatic-step.ts +++ b/backend/src/run-programmatic-step.ts @@ -48,6 +48,7 @@ export async function runProgrammaticStep( template, prompt, params, + system, userId, userInputId, clientSessionId, @@ -62,6 +63,7 @@ export async function runProgrammaticStep( template: AgentTemplate prompt: string | undefined params: Record | undefined + system: string | undefined userId: string | undefined userInputId: string clientSessionId: string @@ -160,6 +162,7 @@ export async function runProgrammaticStep( repoId, agentTemplate: template, localAgentTemplates, + system, sendSubagentChunk: (data: { userInputId: string agentId: string diff --git a/backend/src/templates/prompts.ts b/backend/src/templates/prompts.ts index 6960a79bc4..c1b5b886aa 100644 --- a/backend/src/templates/prompts.ts +++ b/backend/src/templates/prompts.ts @@ -43,6 +43,8 @@ params: None` `- ${agentType}: ${agentTemplate.spawnerPrompt}`, agentTemplate.includeMessageHistory && 'This agent can see the current message history.', + agentTemplate.inheritParentSystemPrompt && + "This agent inherits the parent's system prompt for prompt caching.", inputSchemaStr, ).join('\n') }) diff --git a/backend/src/tools/handlers/tool/spawn-agent-inline.ts b/backend/src/tools/handlers/tool/spawn-agent-inline.ts index e940e09aa4..d35c1c6744 100644 --- a/backend/src/tools/handlers/tool/spawn-agent-inline.ts +++ b/backend/src/tools/handlers/tool/spawn-agent-inline.ts @@ -37,6 +37,7 @@ export const handleSpawnAgentInline = ((params: { localAgentTemplates?: Record messages?: Message[] agentState?: AgentState + system?: string } }): { result: Promise>; state: {} } => { const { @@ -60,7 +61,8 @@ export const handleSpawnAgentInline = ((params: { userId, agentTemplate: parentAgentTemplate, localAgentTemplates, - agentState, + agentState: parentAgentState, + system, } = validateSpawnState(state, 'spawn_agent_inline') const triggerSpawnAgentInline = async () => { @@ -75,9 +77,10 @@ export const handleSpawnAgentInline = ((params: { // Create child agent state that shares message history with parent const childAgentState: AgentState = createAgentState( agentType, - agentState, + agentTemplate, + parentAgentState, getLatestState().messages, - agentState.agentContext, + parentAgentState.agentContext, ) logAgentSpawn( @@ -96,13 +99,14 @@ export const handleSpawnAgentInline = ((params: { prompt: prompt || '', params: agentParams, agentTemplate, - parentAgentState: agentState, + parentAgentState, agentState: childAgentState, fingerprintId, fileContext, localAgentTemplates, userId, clientSessionId, + parentSystemPrompt: system, onResponseChunk: (chunk) => { // Disabled. // Inherits parent's onResponseChunk @@ -118,8 +122,8 @@ export const handleSpawnAgentInline = ((params: { state.messages = finalMessages // Update parent agent state to reflect shared message history - if (agentState && result.agentState) { - agentState.messageHistory = finalMessages + if (parentAgentState && result.agentState) { + parentAgentState.messageHistory = finalMessages } return undefined diff --git a/backend/src/tools/handlers/tool/spawn-agent-utils.ts b/backend/src/tools/handlers/tool/spawn-agent-utils.ts index 951658ad2e..c1938a3e21 100644 --- a/backend/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/backend/src/tools/handlers/tool/spawn-agent-utils.ts @@ -15,7 +15,6 @@ import type { } from '@codebuff/common/types/session-state' import type { ProjectFileContext } from '@codebuff/common/util/file' import type { WebSocket } from 'ws' - export interface SpawnAgentParams { agent_type: string prompt?: string @@ -30,6 +29,7 @@ export interface BaseSpawnState { localAgentTemplates?: Record messages?: Message[] agentState?: AgentState + system?: string } export interface SpawnContext { @@ -54,6 +54,7 @@ export function validateSpawnState( messages, agentState, userId, + system, } = state if (!ws) { @@ -84,6 +85,9 @@ export function validateSpawnState( `Internal error for ${toolName}: Missing localAgentTemplates in state`, ) } + if (!system) { + throw new Error(`Internal error for ${toolName}: Missing system in state`) + } return { ws, @@ -93,6 +97,7 @@ export function validateSpawnState( localAgentTemplates, messages, agentState, + system, } } @@ -224,36 +229,22 @@ export function validateAgentInput( } } -/** - * Creates conversation history message for spawned agents - */ -export function createConversationHistoryMessage(messages: Message[]): Message { - // Filter out system messages from conversation history to avoid including parent's system prompt - const messagesWithoutSystem = messages.filter( - (message) => message.role !== 'system', - ) - return { - role: 'user', - content: `For context, the following is the conversation history between the user and an assistant:\n\n${JSON.stringify( - messagesWithoutSystem, - null, - 2, - )}`, - keepDuringTruncation: true, - } -} - /** * Creates a new agent state for spawned agents */ export function createAgentState( agentType: string, + agentTemplate: AgentTemplate, parentAgentState: AgentState, - messageHistory: Message[], + parentMessageHistory: Message[], agentContext: Record, ): AgentState { const agentId = generateCompactId() + const messageHistory = agentTemplate.includeMessageHistory + ? parentMessageHistory + : [] + return { agentId, agentType, @@ -316,6 +307,7 @@ export async function executeSubagent({ onResponseChunk, isOnlyChild = false, clearUserPromptMessagesAfterResponse = true, + parentSystemPrompt, }: { ws: WebSocket userInputId: string @@ -332,6 +324,7 @@ export async function executeSubagent({ onResponseChunk: (chunk: string | PrintModeEvent) => void isOnlyChild?: boolean clearUserPromptMessagesAfterResponse?: boolean + parentSystemPrompt?: string }) { onResponseChunk({ type: 'subagent_start', @@ -356,6 +349,7 @@ export async function executeSubagent({ clientSessionId, onResponseChunk, clearUserPromptMessagesAfterResponse, + parentSystemPrompt, }) onResponseChunk({ diff --git a/backend/src/tools/handlers/tool/spawn-agents-async.ts b/backend/src/tools/handlers/tool/spawn-agents-async.ts index 2dc49e4d93..68906e9f71 100644 --- a/backend/src/tools/handlers/tool/spawn-agents-async.ts +++ b/backend/src/tools/handlers/tool/spawn-agents-async.ts @@ -4,7 +4,6 @@ import { validateSpawnState, validateAndGetAgentTemplate, validateAgentInput, - createConversationHistoryMessage, createAgentState, logAgentSpawn, executeSubagent, @@ -46,6 +45,7 @@ export const handleSpawnAgentsAsync = ((params: { sendSubagentChunk?: SendSubagentChunk messages?: Message[] agentState?: AgentState + system?: string } }): { result: Promise>; state: {} } => { if (!ASYNC_AGENTS_ENABLED) { @@ -77,7 +77,7 @@ export const handleSpawnAgentsAsync = ((params: { localAgentTemplates, agentState, } = validateSpawnState(state, 'spawn_agents_async') - const { sendSubagentChunk } = state + const { sendSubagentChunk, system: parentSystemPrompt } = state if (!sendSubagentChunk) { throw new Error( @@ -90,10 +90,6 @@ export const handleSpawnAgentsAsync = ((params: { > = async () => { const results: CodebuffToolOutput[0]['value'] = [] - const conversationHistoryMessage = createConversationHistoryMessage( - getLatestState().messages, - ) - // Validate and spawn agents asynchronously for (const { agent_type: agentTypeStr, prompt, params } of agents) { try { @@ -107,11 +103,12 @@ export const handleSpawnAgentsAsync = ((params: { const subAgentMessages: Message[] = [] if (agentTemplate.includeMessageHistory) { - subAgentMessages.push(conversationHistoryMessage) + subAgentMessages.push(...getLatestState().messages) } const asyncAgentState = createAgentState( agentType, + agentTemplate, agentState, subAgentMessages, {}, @@ -142,6 +139,7 @@ export const handleSpawnAgentsAsync = ((params: { localAgentTemplates, userId, clientSessionId, + parentSystemPrompt, onResponseChunk: (chunk: string | PrintModeEvent) => { if (typeof chunk !== 'string') { return diff --git a/backend/src/tools/handlers/tool/spawn-agents.ts b/backend/src/tools/handlers/tool/spawn-agents.ts index 8acfcce7ae..1c9bfc9e73 100644 --- a/backend/src/tools/handlers/tool/spawn-agents.ts +++ b/backend/src/tools/handlers/tool/spawn-agents.ts @@ -2,7 +2,6 @@ import { validateSpawnState, validateAndGetAgentTemplate, validateAgentInput, - createConversationHistoryMessage, createAgentState, logAgentSpawn, executeSubagent, @@ -49,6 +48,7 @@ export const handleSpawnAgents = ((params: { sendSubagentChunk?: SendSubagentChunk messages?: Message[] agentState?: AgentState + system?: string } }): { result: Promise>; state: {} } => { const { @@ -64,7 +64,7 @@ export const handleSpawnAgents = ((params: { } = params const { agents } = toolCall.input const validatedState = validateSpawnState(state, 'spawn_agents') - const { sendSubagentChunk } = state + const { sendSubagentChunk, system: parentSystemPrompt } = state if (!sendSubagentChunk) { throw new Error( @@ -78,15 +78,10 @@ export const handleSpawnAgents = ((params: { userId, agentTemplate: parentAgentTemplate, localAgentTemplates, - messages, - agentState, + agentState: parentAgentState, } = validatedState const triggerSpawnAgents = async () => { - const conversationHistoryMessage = createConversationHistoryMessage( - getLatestState().messages, - ) - const results = await Promise.allSettled( agents.map(async ({ agent_type: agentTypeStr, prompt, params }) => { const { agentTemplate, agentType } = await validateAndGetAgentTemplate( @@ -97,15 +92,11 @@ export const handleSpawnAgents = ((params: { validateAgentInput(agentTemplate, agentType, prompt, params) - const subAgentMessages: Message[] = [] - if (agentTemplate.includeMessageHistory) { - subAgentMessages.push(conversationHistoryMessage) - } - const subAgentState = createAgentState( agentType, - agentState, - subAgentMessages, + agentTemplate, + parentAgentState, + getLatestState().messages, {}, ) @@ -124,7 +115,7 @@ export const handleSpawnAgents = ((params: { prompt: prompt || '', params, agentTemplate, - parentAgentState: agentState, + parentAgentState, agentState: subAgentState, fingerprintId, fileContext, @@ -132,6 +123,7 @@ export const handleSpawnAgents = ((params: { userId, clientSessionId, isOnlyChild: agents.length === 1, + parentSystemPrompt, onResponseChunk: (chunk: string | PrintModeEvent) => { if (agents.length === 1) { writeToClient(chunk) diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts index 69d1bb15e0..975d100ee1 100644 --- a/backend/src/tools/stream-parser.ts +++ b/backend/src/tools/stream-parser.ts @@ -48,6 +48,7 @@ export async function processStreamWithTools(options: { localAgentTemplates: Record fileContext: ProjectFileContext messages: Message[] + system: string agentState: AgentState agentContext: Record onResponseChunk: (chunk: string | PrintModeEvent) => void @@ -66,6 +67,7 @@ export async function processStreamWithTools(options: { localAgentTemplates, fileContext, agentContext, + system, agentState, onResponseChunk, } = options @@ -102,6 +104,7 @@ export async function processStreamWithTools(options: { agentState, agentContext, messages, + system, } function toolCallback(toolName: T) { diff --git a/common/src/__tests__/agent-validation.test.ts b/common/src/__tests__/agent-validation.test.ts index fcd6cda843..2331ba89db 100644 --- a/common/src/__tests__/agent-validation.test.ts +++ b/common/src/__tests__/agent-validation.test.ts @@ -46,6 +46,7 @@ describe('Agent Validation', () => { toolNames: ['set_output'], spawnableAgents: [], includeMessageHistory: true, + inheritParentSystemPrompt: false, systemPrompt: 'Test system prompt', instructionsPrompt: 'Test user prompt', stepPrompt: 'Test agent step prompt', @@ -74,6 +75,7 @@ describe('Agent Validation', () => { spawnableAgents: ['thinker', 'researcher'], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, }, }, } @@ -102,6 +104,7 @@ describe('Agent Validation', () => { spawnableAgents: ['nonexistent_agent'], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], }, }, @@ -130,6 +133,7 @@ describe('Agent Validation', () => { stepPrompt: 'Custom step prompt', outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, @@ -169,6 +173,7 @@ describe('Agent Validation', () => { }, outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, @@ -201,6 +206,7 @@ describe('Agent Validation', () => { }, outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, @@ -232,6 +238,7 @@ describe('Agent Validation', () => { stepPrompt: 'Test step prompt', outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, @@ -260,6 +267,7 @@ describe('Agent Validation', () => { spawnableAgents: [], // No spawnable agents outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], }, 'spawner.ts': { @@ -274,6 +282,7 @@ describe('Agent Validation', () => { spawnableAgents: ['codebuffai-git-committer'], // Should be valid after first pass outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn', 'spawn_agents'], }, }, @@ -307,6 +316,7 @@ describe('Agent Validation', () => { stepPrompt: 'Test step prompt', outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], // No inputSchema @@ -338,6 +348,7 @@ describe('Agent Validation', () => { stepPrompt: 'Test step prompt', outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], // No paramsSchema @@ -395,6 +406,7 @@ describe('Agent Validation', () => { spawnableAgents: [], outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], }, }, @@ -465,6 +477,7 @@ describe('Agent Validation', () => { }, outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, @@ -523,6 +536,7 @@ describe('Agent Validation', () => { }, outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, @@ -571,6 +585,7 @@ describe('Agent Validation', () => { }, outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, @@ -615,6 +630,7 @@ describe('Agent Validation', () => { inputSchema: {}, outputMode: 'last_message', includeMessageHistory: true, + inheritParentSystemPrompt: false, toolNames: ['end_turn'], spawnableAgents: [], }, diff --git a/common/src/__tests__/handlesteps-parsing.test.ts b/common/src/__tests__/handlesteps-parsing.test.ts index 546b583bb0..f115334118 100644 --- a/common/src/__tests__/handlesteps-parsing.test.ts +++ b/common/src/__tests__/handlesteps-parsing.test.ts @@ -51,6 +51,7 @@ describe('handleSteps Parsing Tests', () => { toolNames: ['set_output'], spawnableAgents: [], includeMessageHistory: true, + inheritParentSystemPrompt: false, systemPrompt: 'Test system prompt', instructionsPrompt: 'Test user prompt', stepPrompt: 'Test agent step prompt', diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index d662d4f5d9..9df04bade5 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -92,13 +92,6 @@ export interface AgentDefinition { params?: JsonObjectSchema } - /** Whether to include conversation history from the parent agent in context. - * - * Defaults to false. - * Use this if the agent needs to know all the previous messages in the conversation. - */ - includeMessageHistory?: boolean - /** How the agent should output a response to its parent (defaults to 'last_message') * * last_message: The last message from the agent, typically after using tools. @@ -121,6 +114,21 @@ export interface AgentDefinition { * This field is key if the agent is intended to be spawned by other agents. */ spawnerPrompt?: string + /** Whether to include conversation history from the parent agent in context. + * + * Defaults to false. + * Use this when the agent needs to know all the previous messages in the conversation. + */ + includeMessageHistory?: boolean + + /** Whether to inherit the parent agent's system prompt instead of using this agent's own systemPrompt. + * + * Defaults to false. + * Use this when you want to enable prompt caching by preserving the same system prompt prefix. + * Cannot be used together with the systemPrompt field. + */ + inheritParentSystemPrompt?: boolean + /** Background information for the agent. Fairly optional. Prefer using instructionsPrompt for agent instructions. */ systemPrompt?: string diff --git a/common/src/types/agent-template.ts b/common/src/types/agent-template.ts index 7fc18538f1..fa7ef13e75 100644 --- a/common/src/types/agent-template.ts +++ b/common/src/types/agent-template.ts @@ -49,6 +49,7 @@ export type AgentTemplate< params?: z.ZodSchema } includeMessageHistory: boolean + inheritParentSystemPrompt: boolean outputMode: 'last_message' | 'all_messages' | 'structured_output' outputSchema?: z.ZodSchema diff --git a/common/src/types/dynamic-agent-template.ts b/common/src/types/dynamic-agent-template.ts index f6b8b69e3b..6485353e19 100644 --- a/common/src/types/dynamic-agent-template.ts +++ b/common/src/types/dynamic-agent-template.ts @@ -150,6 +150,7 @@ export const DynamicAgentDefinitionSchema = z.object({ // Input and output inputSchema: InputSchemaObjectSchema, includeMessageHistory: z.boolean().default(false), + inheritParentSystemPrompt: z.boolean().default(false), outputMode: z .enum(['last_message', 'all_messages', 'structured_output']) .default('last_message'), @@ -242,4 +243,22 @@ export const DynamicAgentTemplateSchema = DynamicAgentDefinitionSchema.extend({ path: ['toolNames'], }, ) + .refine( + (data) => { + // If inheritParentSystemPrompt is true, systemPrompt must be empty or undefined + if ( + data.inheritParentSystemPrompt && + data.systemPrompt && + data.systemPrompt.trim() !== '' + ) { + return false + } + return true + }, + { + message: + 'Cannot specify both systemPrompt and inheritParentSystemPrompt. When inheritParentSystemPrompt is true, systemPrompt must be empty.', + path: ['systemPrompt'], + }, + ) export type DynamicAgentTemplate = z.infer diff --git a/evals/scaffolding.ts b/evals/scaffolding.ts index 6f7682aa18..c5590ca809 100644 --- a/evals/scaffolding.ts +++ b/evals/scaffolding.ts @@ -198,6 +198,7 @@ export async function runAgentStepScaffolding( agentState, prompt, params: undefined, + system: 'Test system prompt', }) return {