diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
index 58d3322530..da31f85e92 100644
--- a/.agents/base2/base2.ts
+++ b/.agents/base2/base2.ts
@@ -8,7 +8,7 @@ const definition: SecretAgentDefinition = {
   id: 'base2',
   publisher,
   model: 'anthropic/claude-sonnet-4.5',
-  displayName: 'Orchestrator',
+  displayName: 'Buffy the Orchestrator',
   spawnerPrompt:
     'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks',
   inputSchema: {
@@ -28,16 +28,17 @@ const definition: SecretAgentDefinition = {
   },
   outputMode: 'last_message',
   includeMessageHistory: true,
-  toolNames: ['spawn_agents', 'read_files', 'code_search'],
+  toolNames: ['spawn_agents', 'read_files'],
   spawnableAgents: [
-    'read-only-commander',
-    'researcher-file-explorer',
+    'file-explorer',
+    'find-all-referencer',
     'researcher-web',
     'researcher-docs',
+    'read-only-commander',
     'decomposing-thinker',
-    'decomposing-planner',
+    'code-sketcher',
     'editor',
-    'reviewer-max',
+    'reviewer',
     'context-pruner',
   ],
 
@@ -46,13 +47,12 @@ const definition: SecretAgentDefinition = {
 # Core Mandates
 
 - **Tone:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
-- **Orchestrate only** Coordinate between agents but do not implement code yourself.
-- **Rely on agents** Ask your spawned agents to complete a whole task. Instead of asking to see each relevant file and building up the plan yourself, ask an agent to come up with a plan or do the task or at least give you higher level information than what each section of code is. You shouldn't be trying to read each section of code yourself.
-- **Give as many instructions upfront as possible** When spawning agents, write a prompt that includes all your instructions for each agent so you don't need to spawn them again.
-- **Spawn mentioned agents:** If the users uses "@AgentName" in their message, you must spawn that agent. Spawn all the agents that the user mentions.
-- **Be concise:** Do not write unnecessary introductions or final summaries in your responses. Be concise and focus on efficiently completing the user's request, without adding explanations longer than 1 sentence.
-- **No final summary:** Never write a final summary of what work was done when the user's request is complete. Instead, inform the user in one sentence that the task is complete.
-- **Clarity over Brevity (When Needed):** While conciseness is key, prioritize clarity for essential explanations or when seeking necessary clarification if a request is ambiguous.
+- **Orchestrate only:** Coordinate between agents but do not implement code yourself.
+- **Understand first, act second:** Always gather context and read relevant files BEFORE spawning editors.
+- **Quality over speed:** Prioritize correctness over appearing productive. Fewer, well-informed agents are better than many rushed ones.
+- **Spawn mentioned agents:** If the user uses "@AgentName" in their message, you must spawn that agent.
+- **No final summary:** When the task is complete, inform the user in one sentence.
+- **Validate assumptions:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing.
 - **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions.
 - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.
 
@@ -68,30 +68,42 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
 
   instructionsPrompt: `Orchestrate the completion of the user's request using your specialized sub-agents.
 
-## Example workflow
+You spawn agents in "layers". Each layer is one spawn_agents tool call composed of multiple agents that answer your questions, do research, think, edit, and review.
+
+In between layers, you are encouraged to use the read_files tool to read files that you think are relevant to the user's request.
+
+Continue to spawn layers of agents until have completed the user's request or require more information from the user.
+
+## Example layers
+
+The user asks you to implement a new feature. You respond in multiple steps:
+
+1. Spawn a file explorer with different prompts to find relevant files; spawn a find-all-referencer to find more relevant files and answer questions about the codebase; spawn 1 docs research to find relevant docs;
+1a. Read all the relevant files using the read_files tool.
+2. Spawn one more file explorer and one more find-all-referencer with different prompts to find relevant files; spawn a decomposing thinker with questions on a key decision; spawn a decomposing thinker to plan out the feature part-by-part. Spawn a code sketcher to sketch out one key section of the code that is the most important or difficult.
+2a. Read all the relevant files using the read_files tool.
+3. Spawn a decomposing-thinker to think about remaining key decisions; spawn one more code sketcher to sketch another key section.
+4. Spawn two editors to implement all the changes.
+5. Spawn a reviewer to review the changes made by the editors.
 
-Use this workflow to solve a medium or complex coding task:
-1. Spawn relevant researchers in parallel (researcher-file-explorer, researcher-web, researcher-docs)
-2. Read all the relevant files using the read_files tool.
-3. Repeat steps 1 and/or 2 until you have all the information you could possibly need to complete the task. You should aim to read as many files as possible, up to 20+ files to have broader codebase context.
-4. Spawn a decomposing planner to come up with a plan.
-5. Spawn an editor to implement the plan. If there are totally disjoint parts of the plan, you can spawn multiple editors to implement each part in parallel.
-6. Spawn a reviewer to review the changes made by the editor. If more changes are needed, go back to step 5, but no more than once.
-7. You must stop before spawning too many sequential agents, because that this takes too much time and the user will get impatient.
 
-Feel free to modify this workflow as needed. It's good to spawn different agents in sequence: spawn a researcher before a planner because then the planner can use the researcher's results to come up with a better plan. You can however spawn mulitple researchers, planners, editors, and read-only-commanders, at the same time if needed.
+## Spawning agents guidelines
 
-## Guidelines
+- **Sequence agents properly:** Keep in mind dependencies when spawning different agents:
+  - Spawn file explorers, find-all-referencer, and researchers before thinkers because then the thinkers can use the file/research results to come up with a better conclusions
+  - Spawn thinkers before editors so editors can use the insights from the thinkers.
+  - Reviewers should be spawned after editors.
+- **Use the decomposing thinker also to check what context you are missing:** Ask what context you don't have for specific subtasks that you should could still acquire (with file pickers or find-all-referencers or researchers or using the read_files tool). Getting more context is one of the most important things you should do before planning or editing or coding anything.
+- **Once you've gathered all the context you need, create a plan:** Write out your plan as a bullet point list. The user wants to see you write out your plan so they know you are on track.
+- **Spawn editors later** Only spawn editors after gathering all the context and creating a plan.
+- **No need to include context:** When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context.
 
-- Spawn agents to help you complete the task. Iterate by spawning more agents as needed.
-- Don't mastermind the task. Rely on your agents' judgement to research, plan, edit, and review the code.
-- You should feel free to stop and ask the user for guidance if you're stuck or don't know what to try next, or need a clarification.
-- Give as many instructions upfront as possible to each agent so you're less likely to need to spawn them again.
-- When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context.
-- Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, running scripts that could alter production environments, installing packages globally, etc). Don't do any of these unless the user explicitly asks you to.
+## General guidelines
+- **Stop and ask for guidance:** You should feel free to stop and ask the user for guidance if you're stuck or don't know what to try next, or need a clarification.
+- **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, running scripts that could alter production environments, installing packages globally, etc). Don't do any of these unless the user explicitly asks you to.
 `,
 
-  stepPrompt: `Don't forget to spawn agents that could help, especially: the researcher-file-explorer to get codebase context, the decomposing-planner to craft a great plan, and the reviewer-max to review code changes made by the editor.`,
+  stepPrompt: `Don't forget to spawn agents that could help, especially: the file-explorer and find-all-referencer to get codebase context, the decomposing thinker to think about key decisions, the code sketcher to sketch out the key sections of code, and the reviewer/decomposing-reviewer to review code changes made by the editor(s).`,
 
   handleSteps: function* ({ prompt, params }) {
     let steps = 0
diff --git a/.agents/editor/code-sketcher.ts b/.agents/editor/code-sketcher.ts
new file mode 100644
index 0000000000..7a893cf5e8
--- /dev/null
+++ b/.agents/editor/code-sketcher.ts
@@ -0,0 +1,56 @@
+import { publisher } from '../constants'
+import { type SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'code-sketcher',
+  displayName: 'Code Sketcher',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  spawnerPrompt:
+    'Spawn to sketch the code that will be needed to accomplish the task, focusing on the the key sections of logic or interfaces. Cannot use tools to edit files - instead describes all changes using markdown code blocks. Does not spawn other agents.',
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: 'The coding task to sketch out, including the key sections of logic or interfaces it should focus on.',
+    },
+  },
+  outputMode: 'last_message',
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+  toolNames: [],
+  spawnableAgents: [],
+
+  instructionsPrompt: `You are an expert programmer who sketches out the code that will be needed to accomplish the task.
+
+You do not have access to tools to modify files. Instead, you describe all code changes using markdown code blocks.
+
+Instructions:
+- Think about the best way to accomplish the task
+- Write out the sketch for each file that needs to be changed
+- Use markdown code blocks with the file path as the language identifier
+- For each file, show the only the code changes needed, don't include the entire file
+
+Important: Focus on the key sections of logic or interfaces that are needed to accomplish the task! You don't need to sketch out the more obvious parts of the code.
+You can skip over parts of the code using psuedo code or placeholder comments.
+
+Guidelines:
+- Pay close attention to the user's request and address all requirements
+- Focus on the simplest solution that accomplishes the task
+- Reuse existing code patterns and conventions from the codebase
+- Keep naming consistent with the existing codebase
+- Try not to modify more files than necessary
+- Avoid comments unless absolutely necessary to understand the code
+- Do not add try/catch blocks unless needed
+- Do not write duplicate code that could use existing helpers
+
+Format your response with file blocks, like this:
+path/to/file.ts
+\`\`\`typescript
+// ... existing code ...
+[this is is the key section of code]
+// ... existing code ...
+\`\`\`
+`,
+}
+
+export default definition
diff --git a/.agents/file-explorer/code-searcher.ts b/.agents/file-explorer/code-searcher.ts
new file mode 100644
index 0000000000..cc2ec6b2d2
--- /dev/null
+++ b/.agents/file-explorer/code-searcher.ts
@@ -0,0 +1,79 @@
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+interface SearchQuery {
+  pattern: string
+  flags?: string
+  cwd?: string
+  maxResults?: number
+}
+
+const paramsSchema = {
+  type: 'object' as const,
+  properties: {
+    searchQueries: {
+      type: 'array' as const,
+      items: {
+        type: 'object' as const,
+        properties: {
+          pattern: {
+            type: 'string' as const,
+            description: 'The pattern to search for',
+          },
+          flags: {
+            type: 'string' as const,
+            description:
+              'Optional ripgrep flags to customize the search (e.g., "-i" for case-insensitive, "-t ts" for TypeScript files only, "-A 3" for 3 lines after match, "-B 2" for 2 lines before match, "--type-not test" to exclude test files)',
+          },
+          cwd: {
+            type: 'string' as const,
+            description:
+              'Optional working directory to search within, relative to the project root. Defaults to searching the entire project',
+          },
+          maxResults: {
+            type: 'number' as const,
+            description:
+              'Maximum number of results to return per file. Defaults to 15. There is also a global limit of 250 results across all files',
+          },
+        },
+        required: ['pattern'],
+      },
+      description: 'Array of code search queries to execute',
+    },
+  },
+  required: ['searchQueries'],
+}
+
+const codeSearcher: SecretAgentDefinition = {
+  id: 'code-searcher',
+  displayName: 'Code Searcher',
+  spawnerPrompt:
+    'Mechanically runs multiple code search queries (using ripgrep line-oriented search) and returns all results',
+  model: 'anthropic/claude-sonnet-4.5',
+  publisher,
+  outputMode: 'all_messages',
+  includeMessageHistory: false,
+  toolNames: ['code_search'],
+  spawnableAgents: [],
+  inputSchema: {
+    params: paramsSchema,
+  },
+  handleSteps: function* ({ params }) {
+    const searchQueries: SearchQuery[] = params?.searchQueries ?? []
+
+    for (const query of searchQueries) {
+      yield {
+        toolName: 'code_search',
+        input: {
+          pattern: query.pattern,
+          flags: query.flags,
+          cwd: query.cwd,
+          maxResults: query.maxResults,
+        },
+      }
+    }
+  },
+}
+
+export default codeSearcher
diff --git a/.agents/file-explorer/codebase-explorer.ts b/.agents/file-explorer/codebase-explorer.ts
new file mode 100644
index 0000000000..668b6e1864
--- /dev/null
+++ b/.agents/file-explorer/codebase-explorer.ts
@@ -0,0 +1,40 @@
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const codebaseExplorer: SecretAgentDefinition = {
+  id: 'codebase-explorer',
+  displayName: 'Codebase Explorer',
+  spawnerPrompt:
+    'Orchestrates multiple exploration agents to comprehensively analyze the codebase and answer questions.',
+  model: 'anthropic/claude-sonnet-4.5',
+  publisher,
+  outputMode: 'last_message',
+  includeMessageHistory: false,
+  toolNames: ['spawn_agents'],
+  spawnableAgents: [
+    'file-picker',
+    'code-searcher',
+    'directory-lister',
+    'glob-matcher',
+    'file-q-and-a',
+  ],
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: 'A question or exploration goal for the codebase.',
+    },
+  },
+  systemPrompt: `You are a codebase exploration orchestrator. Your job is to spawn multiple specialized agents in parallel waves to comprehensively explore the codebase and answer the user's question.
+
+Strategy:
+1. Analyze the user's question to determine what exploration approach would be most effective.
+2. You may spawn agents to help you answer the user's question. Feel free to spawn multiple agents in parallel to gather information from different angles.
+3. Synthesize all findings into a comprehensive answer.`,
+
+  instructionsPrompt: `Analyze the user's prompt and spawn appropriate exploration agents.
+
+Finally, synthesize all findings into a comprehensive answer.`,
+}
+
+export default codebaseExplorer
diff --git a/.agents/file-explorer/directory-lister.ts b/.agents/file-explorer/directory-lister.ts
new file mode 100644
index 0000000000..bc9aba8b3c
--- /dev/null
+++ b/.agents/file-explorer/directory-lister.ts
@@ -0,0 +1,55 @@
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+interface ListDirectoryQuery {
+  path: string
+}
+
+const paramsSchema = {
+  type: 'object' as const,
+  properties: {
+    directories: {
+      type: 'array' as const,
+      items: {
+        type: 'object' as const,
+        properties: {
+          path: { type: 'string' as const },
+        },
+        required: ['path'],
+      },
+      description: 'Array of directory paths to list',
+    },
+  },
+  required: ['directories'],
+}
+
+const directoryLister: SecretAgentDefinition = {
+  id: 'directory-lister',
+  displayName: 'Directory Lister',
+  spawnerPrompt:
+    'Mechanically lists multiple directories and returns their contents',
+  model: 'anthropic/claude-sonnet-4.5',
+  publisher,
+  outputMode: 'all_messages',
+  includeMessageHistory: false,
+  toolNames: ['list_directory'],
+  spawnableAgents: [],
+  inputSchema: {
+    params: paramsSchema,
+  },
+  handleSteps: function* ({ params }) {
+    const directories: ListDirectoryQuery[] = params?.directories ?? []
+
+    for (const directory of directories) {
+      yield {
+        toolName: 'list_directory',
+        input: {
+          path: directory.path,
+        },
+      }
+    }
+  },
+}
+
+export default directoryLister
diff --git a/.agents/file-explorer.ts b/.agents/file-explorer/file-explorer.ts
similarity index 91%
rename from .agents/file-explorer.ts
rename to .agents/file-explorer/file-explorer.ts
index 94f329be3e..2403cc7810 100644
--- a/.agents/file-explorer.ts
+++ b/.agents/file-explorer/file-explorer.ts
@@ -1,8 +1,8 @@
 import { AgentTemplateTypes } from '@codebuff/common/types/session-state'
 
-import { publisher } from './constants'
+import { publisher } from '../constants'
 
-import type { SecretAgentDefinition } from './types/secret-agent-definition'
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
 
 const paramsSchema = {
   type: 'object' as const,
@@ -22,7 +22,7 @@ const fileExplorer: SecretAgentDefinition = {
   displayName: 'Dora the File Explorer',
   spawnerPrompt:
     'Comprehensively explores the codebase and reports back on the results',
-  model: 'anthropic/claude-4-sonnet-20250522',
+  model: 'x-ai/grok-4-fast',
   publisher,
   outputMode: 'structured_output',
   includeMessageHistory: false,
diff --git a/.agents/researcher/researcher-file-picker.ts b/.agents/file-explorer/file-picker.ts
similarity index 52%
rename from .agents/researcher/researcher-file-picker.ts
rename to .agents/file-explorer/file-picker.ts
index 62995393c6..90a3c47102 100644
--- a/.agents/researcher/researcher-file-picker.ts
+++ b/.agents/file-explorer/file-picker.ts
@@ -1,11 +1,12 @@
 import { publisher } from '../constants'
-import { filePicker } from 'factory/file-picker'
-import { SecretAgentDefinition } from 'types/secret-agent-definition'
+import { filePicker } from '../factory/file-picker'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
 
 const definition: SecretAgentDefinition = {
-  ...filePicker('x-ai/grok-4-fast'),
-  id: 'researcher-file-picker',
+  id: 'file-picker',
   publisher,
+  ...filePicker('x-ai/grok-4-fast'),
 }
 
 export default definition
diff --git a/.agents/file-explorer/file-q-and-a.ts b/.agents/file-explorer/file-q-and-a.ts
new file mode 100644
index 0000000000..628807bbb4
--- /dev/null
+++ b/.agents/file-explorer/file-q-and-a.ts
@@ -0,0 +1,61 @@
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+import type { ToolCall } from 'types/agent-definition'
+
+const paramsSchema = {
+  type: 'object' as const,
+  properties: {
+    filePath: {
+      type: 'string' as const,
+      description: 'Path to the file to ask questions about',
+    },
+  },
+  required: ['filePath'],
+}
+
+const fileQAndA: SecretAgentDefinition = {
+  id: 'file-q-and-a',
+  displayName: 'Quinn the File Q&A',
+  spawnerPrompt:
+    'Reads a single file and answers questions about it - can summarize, explain specific parts, or excerpt portions of the file',
+  model: 'x-ai/grok-4-fast',
+  publisher,
+  outputMode: 'last_message',
+  includeMessageHistory: false,
+  toolNames: ['read_files'],
+  spawnableAgents: [],
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description:
+        'A question about the file - can ask for a summary, explanation of specific functionality, or an excerpt of a particular section',
+    },
+    params: paramsSchema,
+  },
+  systemPrompt:
+    'You are an expert at reading and analyzing code files. Answer questions about files clearly and accurately. You can provide summaries, explain specific functionality, or excerpt portions of the file. When excerpting, reproduce the code exactly as it appears in the file.',
+  instructionsPrompt: `
+Read the file and answer the user's question about it. Depending on what they're asking:
+- For summaries: explain the main purpose, key functions/classes/exports, and important patterns
+- For specific questions: focus on the relevant parts and provide clear explanations
+- For excerpts: reproduce the requested code exactly as it appears in the file
+  `.trim(),
+  stepPrompt: 'Do not use any tools again. Just answer the question about the file.',
+
+  handleSteps: function* ({ prompt, params }) {
+    const filePath = params?.filePath
+    if (!filePath) {
+      throw new Error('filePath parameter is required')
+    }
+
+    yield {
+      toolName: 'read_files',
+      input: { paths: [filePath] },
+    } satisfies ToolCall
+
+    yield 'STEP_ALL'
+  },
+}
+
+export default fileQAndA
diff --git a/.agents/file-explorer/find-all-referencer.ts b/.agents/file-explorer/find-all-referencer.ts
new file mode 100644
index 0000000000..8e1f7b22dd
--- /dev/null
+++ b/.agents/file-explorer/find-all-referencer.ts
@@ -0,0 +1,53 @@
+import { ToolCall } from 'types/agent-definition'
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'find-all-referencer',
+  displayName: 'Find All Referencer',
+  spawnerPrompt:
+    'Ask this agent to find all references to something in the codebase or where something is defined or answer any other codebase-wide questions.',
+  model: 'x-ai/grok-4-fast',
+  publisher,
+  outputMode: 'last_message',
+  includeMessageHistory: false,
+  toolNames: ['spawn_agents', 'find_files', 'read_files'],
+  spawnableAgents: [
+    'file-picker',
+    'code-searcher',
+    'directory-lister',
+    'glob-matcher',
+    'file-q-and-a',
+  ],
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description:
+        'The function or class or import etc. to find all references to in the codebase. Can accommodate vague requests as well!',
+    },
+  },
+  systemPrompt: `You are a codebase exploration agent that is good at finding all references to something in the codebase or where something is defined.
+
+Strategy:
+1. Analyze the user's question to determine what exploration approach would be most effective.
+2. Spawn agents to help you answer the user's question. You should spawn multiple agents in parallel to gather information faster.
+3. Synthesize all findings into a concise, but comprehensive answer.
+`,
+
+  instructionsPrompt: `Analyze the user's prompt and spawn appropriate exploration agents.
+
+Use lots of different agents in parallel to gather more information faster.
+
+Finally, synthesize all findings into a comprehensive and concise answer.`,
+
+  handleSteps: function* ({ prompt, params }) {
+    yield {
+      toolName: 'find_files',
+      input: { prompt: prompt ?? '' },
+    } satisfies ToolCall
+    yield 'STEP_ALL'
+  },
+}
+
+export default definition
diff --git a/.agents/file-explorer/glob-matcher.ts b/.agents/file-explorer/glob-matcher.ts
new file mode 100644
index 0000000000..5598b3258b
--- /dev/null
+++ b/.agents/file-explorer/glob-matcher.ts
@@ -0,0 +1,58 @@
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+interface GlobQuery {
+  pattern: string
+  cwd?: string
+}
+
+const paramsSchema = {
+  type: 'object' as const,
+  properties: {
+    patterns: {
+      type: 'array' as const,
+      items: {
+        type: 'object' as const,
+        properties: {
+          pattern: { type: 'string' as const },
+          cwd: { type: 'string' as const },
+        },
+        required: ['pattern'],
+      },
+      description: 'Array of glob patterns to match',
+    },
+  },
+  required: ['patterns'],
+}
+
+const globMatcher: SecretAgentDefinition = {
+  id: 'glob-matcher',
+  displayName: 'Glob Matcher',
+  spawnerPrompt:
+    'Mechanically runs multiple glob pattern matches and returns all matching files',
+  model: 'anthropic/claude-sonnet-4.5',
+  publisher,
+  outputMode: 'all_messages',
+  includeMessageHistory: false,
+  toolNames: ['glob'],
+  spawnableAgents: [],
+  inputSchema: {
+    params: paramsSchema,
+  },
+  handleSteps: function* ({ params }) {
+    const patterns: GlobQuery[] = params?.patterns ?? []
+
+    for (const query of patterns) {
+      yield {
+        toolName: 'glob',
+        input: {
+          pattern: query.pattern,
+          cwd: query.cwd,
+        },
+      }
+    }
+  },
+}
+
+export default globMatcher
diff --git a/.agents/file-picker.ts b/.agents/file-picker.ts
deleted file mode 100644
index 673ed51447..0000000000
--- a/.agents/file-picker.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-import { publisher } from './constants'
-import { filePicker } from './factory/file-picker'
-
-import type { SecretAgentDefinition } from './types/secret-agent-definition'
-
-const definition: SecretAgentDefinition = {
-  id: 'file-picker',
-  publisher,
-  ...filePicker('x-ai/grok-4-fast'),
-}
-
-export default definition
diff --git a/.agents/planners/decomposing-planner-lite.ts b/.agents/planners/decomposing-planner-lite.ts
index 84e8632824..5b1ae3120c 100644
--- a/.agents/planners/decomposing-planner-lite.ts
+++ b/.agents/planners/decomposing-planner-lite.ts
@@ -8,7 +8,7 @@ const definition: SecretAgentDefinition = {
   model: 'anthropic/claude-sonnet-4.5',
   spawnerPrompt:
     'Creates a better implementation plan by decomposing the task into smaller plans in parallel and synthesizing them into a final plan. Includes full code changes.',
-  spawnableAgents: ['researcher-file-explorer', 'implementation-planner-lite'],
+  spawnableAgents: ['file-explorer', 'implementation-planner-lite'],
 }
 
 export default definition
diff --git a/.agents/planners/implementation-planner.ts b/.agents/planners/implementation-planner.ts
index 073e5852ca..bcfb53ed55 100644
--- a/.agents/planners/implementation-planner.ts
+++ b/.agents/planners/implementation-planner.ts
@@ -18,19 +18,15 @@ const definition: SecretAgentDefinition = {
   outputMode: 'last_message',
   includeMessageHistory: true,
   inheritParentSystemPrompt: true,
-  toolNames: ['spawn_agents', 'read_files'],
-  spawnableAgents: ['file-explorer', 'web-researcher', 'docs-researcher'],
 
-  instructionsPrompt: `You are an expert programmer, architect, researcher, and general problem solver.
-You spawn agents to help you gather information, and then describe a full change to the codebase that will accomplish the task.
+  instructionsPrompt: `You are an expert programmer, architect, and general problem solver.
+You describe a full change to the codebase that will accomplish the task.
 
 You do not have access to tools to modify files (e.g. the write_file or str_replace tools). You are describing all the code changes that should be made as a full implementation.
 
 Instructions:
-- Spawn file-explorer twice to find all the relevant parts of the codebase. Use different prompts for each file-explorer to ensure you get all the relevant parts of the codebase. In parallel as part of the same spawn_agents tool call, you may also spawn a web-researcher or docs-researcher to search the web or technical documentation for relevant information.
-- Read any relevant files that have not already been read.
 - Think about the best way to accomplish the task.
-- Finally, describe the full change to the codebase that will accomplish the task (or other steps, e.g. terminal commands to run). Use markdown code blocks to describe the changes for each file.
+- Describe the full change to the codebase that will accomplish the task (or other steps, e.g. terminal commands to run). Use markdown code blocks to describe the changes for each file.
 
 Note that you are not allowed to use tools to modify files. You are instead describing a full implementation of the changes that should be made with all the code changes using markdown code blocks.
 
diff --git a/.agents/planners/iterative-planner.ts b/.agents/planners/iterative-planner.ts
new file mode 100644
index 0000000000..dc5e9d0148
--- /dev/null
+++ b/.agents/planners/iterative-planner.ts
@@ -0,0 +1,66 @@
+import { publisher } from '../constants'
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'iterative-planner',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Iterative Planner',
+  spawnerPrompt:
+    'Spawn this agent when you need to create a detailed implementation plan through iterative refinement with critique and validation steps. Spawn it with a rough step-by-step initial plan.',
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: 'The initial step-by-step plan to refine and validate',
+    },
+  },
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+  outputMode: 'last_message',
+  toolNames: ['spawn_agents'],
+  spawnableAgents: ['plan-critiquer'],
+
+  instructionsPrompt: `You are an expert implementation planner. Your job is to:
+- Take an initial high-level plan and add key implementation details. Include important decisions and alternatives. Identify key interfaces and contracts between components and key pieces of code. Add validation steps to ensure correctness. Identify which steps can be done in parallel.
+- Spawn a plan-critiquer agent with the entire revised, fleshed out plan.
+- Incorporate feedback from the critiques to output a final plan.
+  
+Instructions:
+
+1. Immediately spawn the iterative-planner agent with an updated plan:
+
+Transform the initial plan into a detailed implementation guide that includes:
+
+**All User Requirements:**
+- Make sure the plan addresses all the requirements in the user's request, and does not do other stuff that the user did not ask for.
+
+**Key Decisions & Trade-offs:**
+- Architecture decisions and rationale
+- Cruxes of the plan
+- Alternatives considered
+
+**Interfaces & Contracts:**
+- Clear API signatures between components
+- Key tricky bits of code (keep this short though)
+
+**Validation Steps:**
+- How to verify each step works correctly
+- Include explicit verification steps when it makes sense in the plan.
+
+**Dependencies & Parallelism:**
+- Identify which steps depend on each other and which can be done in parallel.
+
+Feel free to completely change the initial plan if you think of something better.
+
+2. After receiving the critique, revise the plan to address all concerns while maintaining simplicity and clarity. Output the final plan.
+
+## Guidelines for the plan
+
+- IMPORTANT: Don't overengineer the plan -- prefer minimalism and simplicity in almost every case. Streamline the final plan to be as minimal as possible.
+- IMPORTANT: You must pay attention to the user's request! Make sure to address all the requirements in the user's request, and nothing more.
+- Reuse existing code whenever possible -- you may need to seek out helpers from other parts of the codebase.
+- Use existing patterns and conventions from the codebase. Keep naming consistent. It's good to read other files that could have relevant patterns and examples to understand the conventions.
+- Try not to modify more files than necessary.`,
+}
+
+export default definition
diff --git a/.agents/planners/plan-critiquer.ts b/.agents/planners/plan-critiquer.ts
new file mode 100644
index 0000000000..597920fa0b
--- /dev/null
+++ b/.agents/planners/plan-critiquer.ts
@@ -0,0 +1,89 @@
+import { publisher } from '../constants'
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+import type { ToolMessage } from '../types/util-types'
+
+const definition: SecretAgentDefinition = {
+  id: 'plan-critiquer',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Plan Critiquer',
+  spawnerPrompt:
+    'Analyzes implementation plans to identify areas of concern and proposes solutions through parallel thinking.',
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description:
+        "The implementation plan to critique. Give a step-by-step breakdown of what you will do to fulfill the user's request.",
+    },
+  },
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+  outputMode: 'structured_output',
+  outputSchema: {
+    type: 'object',
+    properties: {
+      critique: {
+        type: 'string',
+        description: 'Analysis of the plan with identified areas of concern',
+      },
+      suggestions: {
+        type: 'array',
+        items: {
+          type: 'object',
+        },
+        description: 'Suggestions for each area of concern',
+      },
+    },
+    required: ['critique', 'suggestions'],
+  },
+  toolNames: ['spawn_agents', 'set_output'],
+  spawnableAgents: ['decomposing-thinker'],
+
+  instructionsPrompt: `You are an expert plan reviewer. Your job is to:
+1. Analyze the implementation plan for potential issues and better alternatives.
+2. Identify 2-5 specific areas of concern that need deeper analysis
+3. Spawn a decomposing-thinker agent with the concerns as prompts. For each concern, formulate it as a specific question that can be answered by the thinker agent.
+
+## Guidelines for the critique
+
+IMPORTANT: You must pay attention to the user's request! Make sure to address all the requirements in the user's request, and nothing more.
+
+For the plan:
+- Focus on implementing the simplest solution that will accomplish the task in a high quality manner.
+- Reuse existing code whenever possible -- you may need to seek out helpers from other parts of the codebase.
+- Use existing patterns and conventions from the codebase. Keep naming consistent. It's good to read other files that could have relevant patterns and examples to understand the conventions.
+- Try not to modify more files than necessary.
+`,
+
+  handleSteps: function* () {
+    const { agentState } = yield 'STEP'
+
+    const lastAssistantMessage = agentState.messageHistory
+      .filter((m) => m.role === 'assistant')
+      .pop()
+
+    const critique =
+      typeof lastAssistantMessage?.content === 'string'
+        ? lastAssistantMessage.content
+        : ''
+    const toolResult = agentState.messageHistory
+      .filter((m) => m.role === 'tool' && m.content.toolName === 'spawn_agents')
+      .pop() as ToolMessage
+
+    const suggestions = toolResult
+      ? toolResult.content.output.map((result) =>
+          result.type === 'json' ? result.value : {},
+        )[0]
+      : []
+
+    yield {
+      toolName: 'set_output',
+      input: {
+        critique,
+        suggestions,
+      },
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/planners/requirements-planner.ts b/.agents/planners/requirements-planner.ts
new file mode 100644
index 0000000000..6cb8d1d2e5
--- /dev/null
+++ b/.agents/planners/requirements-planner.ts
@@ -0,0 +1,63 @@
+import { publisher } from '../constants'
+import { type SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'requirements-planner',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Requirements Planner',
+  spawnerPrompt:
+    'Come up with a list of requirements for a user request, and plan how to implement them.',
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: 'The user request to plan for',
+    },
+  },
+  outputMode: 'structured_output',
+  toolNames: ['spawn_agents', 'set_output', 'end_turn'],
+  spawnableAgents: [
+    'file-explorer',
+    'researcher-web',
+    'researcher-docs',
+    'two-wave-planner',
+  ],
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  instructionsPrompt: `You are an expert requirements planner with deep experience in software engineering, architecture, and project management.
+
+Instructions:
+1. Spawn a file-explorer agent to get more context about the codebase. Optionally, in parallel, spawn a researcher-web and/or researcher-docs agent to get context about the web and docs.
+2. Read any new files that have not already been read that could possibly be relevant to the user request or could help with planning.
+3. Analyze the user request in "<analysis>" tags. Explain the key steps and components that will be needed to accomplish the task.
+4. Come up with 2-8 explicit requirements. Try to keep the requirements disjoint, cover the whole task, and focus on the important and challenging parts of the task.
+5. Spawn a two-wave-planner agent with the requirements as input.
+6. End turn.
+`,
+
+  handleSteps: function* () {
+    const { agentState } = yield 'STEP_ALL'
+    const toolResults = agentState.messageHistory.filter(
+      (message) =>
+        message.role === 'tool' && message.content.toolName === 'spawn_agents',
+    )
+    const lastToolResult = toolResults[toolResults.length - 1]
+    const lastToolResultJson =
+      lastToolResult &&
+      lastToolResult.role === 'tool' &&
+      lastToolResult.content.output[0]?.type === 'json'
+        ? lastToolResult.content.output[0].value
+        : 'No results'
+
+    yield {
+      toolName: 'set_output',
+      input: {
+        plans: lastToolResultJson,
+      },
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/planners/two-wave-planner.ts b/.agents/planners/two-wave-planner.ts
new file mode 100644
index 0000000000..137f9c42c1
--- /dev/null
+++ b/.agents/planners/two-wave-planner.ts
@@ -0,0 +1,76 @@
+import { publisher } from '../constants'
+import { type SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'two-wave-planner',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Two Wave Planner',
+  spawnerPrompt:
+    'Plans how to implement a list of requirements for a user request across two waves for deep refinement.',
+  inputSchema: {
+    params: {
+      type: 'object',
+      properties: {
+        requirements: {
+          type: 'array',
+          items: { type: 'string' },
+          description: 'A list of explicit requirements to plan for, in the order they should be implemented',
+        },
+      },
+      required: ['requirements'],
+    },
+  },
+  outputMode: 'structured_output',
+  toolNames: ['spawn_agents', 'set_output'],
+  spawnableAgents: ['implementation-planner'],
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  handleSteps: function* ({ params }) {
+    const requirements: string[] = params?.requirements ?? []
+
+    yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: requirements.map((requirement) => ({
+          agent_type: 'implementation-planner',
+          prompt: `Research and give insights and proposals for this requirement: ${requirement}`,
+        })),
+      },
+    }
+
+    const { toolResult: planResults } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: requirements.map((requirement, idx) => ({
+          agent_type: 'implementation-planner',
+          prompt: `Create a new plan for the following requirement: <requirement>${requirement}</requirement>
+
+You can see the previous plans for the list of requirements in the message history above, including the previous plan for this requirement. Review them to:
+- Simplify your plan based on the broader context
+- Identify overlaps or conflicts with other plans
+- Find opportunities for code reuse across requirements
+- Ensure your plan integrates well with other requirements
+- Make your plan as concise as possible! A good plan is short and sweet.`,
+        })),
+      },
+    }
+
+    const plans = planResults
+      ? planResults.map((result) =>
+          result.type === 'json' ? result.value : '',
+        )
+      : []
+
+    yield {
+      toolName: 'set_output',
+      input: {
+        plans,
+      },
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/read-only-commander.ts b/.agents/read-only-commander.ts
index d4d2734797..945dfbda9d 100644
--- a/.agents/read-only-commander.ts
+++ b/.agents/read-only-commander.ts
@@ -12,16 +12,14 @@ const readOnlyCommander: SecretAgentDefinition = {
     prompt: {
       type: 'string',
       description:
-        'The question to answer about the codebase or with use of the terminal.',
+        'The commands to run with use of the terminal. Has no other context about the current task or project, so you must specify everything you want to be done and what information you want back.',
     },
   },
   outputMode: 'last_message',
-  includeMessageHistory: true,
-  inheritParentSystemPrompt: true,
-  toolNames: ['run_terminal_command', 'code_search', 'read_files'],
-  instructionsPrompt: `You are an expert software engineer, however you only execute READ ONLY commands to answer the user's question. You also cannot spawn any agents.
-
-Use the tools to answer the user's question. But do not invoke any terminal commands that could have any permanent effects -- no editing files, no running scripts, no git commits, no installing packages, etc.`,
+  includeMessageHistory: false,
+  toolNames: ['run_terminal_command'],
+  systemPrompt: `You are an expert software engineer, however you only execute READ ONLY terminal commands to answer the user's question. You also cannot spawn any agents.`,
+  instructionsPrompt: `Use the run_terminal_command tool to answer the user's question. But do not invoke any terminal commands that could have any permanent effects -- no editing files, no running scripts, no git commits, no installing packages, etc.`,
 }
 
 export default readOnlyCommander
diff --git a/.agents/researcher/researcher-file-explorer.ts b/.agents/researcher/researcher-file-explorer.ts
deleted file mode 100644
index 502b911e83..0000000000
--- a/.agents/researcher/researcher-file-explorer.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-import { publisher } from '../constants'
-import type { SecretAgentDefinition } from '../types/secret-agent-definition'
-
-const paramsSchema = {
-  type: 'object' as const,
-  properties: {
-    prompts: {
-      type: 'array' as const,
-      items: { type: 'string' },
-      description:
-        'List of 1-4 different parts of the codebase that could be useful to explore',
-    },
-  },
-  required: ['prompts'],
-}
-
-const fileExplorer: SecretAgentDefinition = {
-  id: 'researcher-file-explorer',
-  displayName: 'Dora the File Explorer',
-  spawnerPrompt:
-    'Comprehensively explores the codebase and reports back on the results',
-  model: 'x-ai/grok-4-fast',
-  publisher,
-  outputMode: 'structured_output',
-  includeMessageHistory: false,
-  toolNames: ['spawn_agents', 'set_output'],
-  spawnableAgents: ['researcher-file-picker'],
-  inputSchema: {
-    prompt: {
-      type: 'string',
-      description: 'What you need to accomplish by exploring the codebase',
-    },
-    params: paramsSchema,
-  },
-  systemPrompt:
-    'You are a file explorer agent that spawns multiple file picker agents in parallel to comprehensively explore the codebase.',
-  instructionsPrompt: '',
-  stepPrompt: '',
-
-  handleSteps: function* ({ prompt, params }) {
-    const prompts: string[] = params?.prompts ?? []
-    const filePickerPrompts = prompts.map(
-        (focusPrompt) =>
-          `Based on the overall goal "${prompt}", find files related to this specific area: ${focusPrompt}`,
-      ),
-      { toolResult: spawnResult } = yield {
-        toolName: 'spawn_agents',
-        input: {
-          agents: filePickerPrompts.map((promptText) => ({
-            agent_type: 'researcher-file-picker',
-            prompt: promptText,
-          })),
-        },
-      }
-    yield {
-      toolName: 'set_output',
-      input: {
-        results: spawnResult,
-      },
-    }
-  },
-}
-
-export default fileExplorer
diff --git a/.agents/researcher/researcher-grok-4-fast.ts b/.agents/researcher/researcher-grok-4-fast.ts
index 6efd1fe430..b6e4a0ac22 100644
--- a/.agents/researcher/researcher-grok-4-fast.ts
+++ b/.agents/researcher/researcher-grok-4-fast.ts
@@ -11,7 +11,7 @@ const definition: SecretAgentDefinition = {
   displayName: 'Grok 4 Fast Researcher',
   toolNames: ['spawn_agents'],
   spawnableAgents: [
-    'researcher-file-explorer',
+    'file-explorer',
     // 'researcher-codebase-explorer',
     'researcher-web',
     'researcher-docs',
@@ -38,7 +38,7 @@ ${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`,
 Take as many steps as you need to gather information first:
 - Use the spawn_agents tool to spawn agents to research the codebase and web. Spawn as many agents in parallel as possible. Feel free to call it multiple times to find more information.
 
-You should likely spawn the researcher-file-explorer agent to get a comprehensive understanding of the codebase. You should also spawn the researcher-web and researcher-docs agents to get up-to-date information from the web and docs, if relevant.
+You should likely spawn the file-explorer agent to get a comprehensive understanding of the codebase. You should also spawn the researcher-web and researcher-docs agents to get up-to-date information from the web and docs, if relevant.
 
 Finally, write up a research report that answers the user question to the best of your ability from the information gathered from the agents. Don't add any opinions or recommendations, just all the plain facts that are relevant. Mention which files are relevant to the user question. Be clear and concise.`,
 }
diff --git a/.agents/reviewer/decomposing-reviewer.ts b/.agents/reviewer/decomposing-reviewer.ts
new file mode 100644
index 0000000000..0ed5ca590e
--- /dev/null
+++ b/.agents/reviewer/decomposing-reviewer.ts
@@ -0,0 +1,58 @@
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'decomposing-reviewer',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Decomposing Reviewer',
+  spawnerPrompt:
+    'Creates comprehensive code review by decomposing the review into multiple focused review aspects and synthesizing insights from parallel reviewer agents.',
+  inputSchema: {
+    params: {
+      type: 'object',
+      properties: {
+        prompts: {
+          type: 'array',
+          items: {
+            type: 'string',
+            description: 'A specific review aspect or concern to analyze',
+          },
+          description: 'A list of 2-8 specific review aspects to analyze',
+        },
+      },
+      required: ['prompts'],
+    },
+  },
+  inheritParentSystemPrompt: true,
+  includeMessageHistory: true,
+  outputMode: 'structured_output',
+  toolNames: ['spawn_agents', 'set_output'],
+  spawnableAgents: ['reviewer'],
+
+  handleSteps: function* ({ params }) {
+    const prompts: string[] = params?.prompts ?? []
+    const { toolResult } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: prompts.map((promptText) => ({
+          agent_type: 'reviewer',
+          prompt: promptText,
+        })),
+      },
+    }
+
+    const reviews = toolResult
+      ? toolResult.map((result) =>
+          result.type === 'json' ? result.value : '',
+        )[0]
+      : []
+    yield {
+      toolName: 'set_output',
+      input: { reviews },
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/thinker/decomposing-thinker.ts b/.agents/thinker/decomposing-thinker.ts
index 1b9a946e79..fc9d904a27 100644
--- a/.agents/thinker/decomposing-thinker.ts
+++ b/.agents/thinker/decomposing-thinker.ts
@@ -44,11 +44,13 @@ const definition: SecretAgentDefinition = {
     }
 
     const thoughts = toolResult
-      ? toolResult.map((result) => (result.type === 'json' ? result.value : ''))
+      ? toolResult.map((result) =>
+          result.type === 'json' ? result.value : '',
+        )[0]
       : []
     yield {
       toolName: 'set_output',
-      input: { results: thoughts },
+      input: { thoughts },
     }
   },
 }
diff --git a/.agents/thinker/thinker.ts b/.agents/thinker/thinker.ts
index 0db6078e41..767da06364 100644
--- a/.agents/thinker/thinker.ts
+++ b/.agents/thinker/thinker.ts
@@ -23,7 +23,7 @@ const definition: SecretAgentDefinition = {
   instructionsPrompt: `
 Think deeply, step by step, about the user request and how best to approach it.
 
-Consider edge cases, potential issues, and alternative approaches.
+Consider edge cases, potential issues, and alternative approaches. Also, propose reading files or spawning agents to get more context that would be helpful for solving the problem.
 
 Come up with a list of insights that would help someone arrive at the best solution.
 
diff --git a/evals/git-evals/run-git-evals.ts b/evals/git-evals/run-git-evals.ts
index 54c92ae23f..7c8e627e8c 100644
--- a/evals/git-evals/run-git-evals.ts
+++ b/evals/git-evals/run-git-evals.ts
@@ -42,6 +42,7 @@ export async function runSingleEval(
   fingerprintId: string,
   codingAgent: 'codebuff' | 'claude',
   agent?: string,
+  promptWithSpec: boolean = false,
 ): Promise<EvalRunJudged> {
   const startTime = new Date()
   const trace: CodebuffTrace[] = []
@@ -93,7 +94,7 @@ export async function runSingleEval(
 
     let currentDecision: AgentDecision = 'continue'
     let attempts = 0
-    const MAX_ATTEMPTS = 5
+    const MAX_ATTEMPTS = promptWithSpec ? 1 : 5
 
     while (currentDecision === 'continue' && attempts < MAX_ATTEMPTS) {
       // Check for process-level errors
@@ -119,11 +120,17 @@ export async function runSingleEval(
       // Get next prompt from prompting agent with timeout
       let agentResponse: z.infer<typeof AgentDecisionSchema>
       try {
-        agentResponse = await promptAiSdkStructured({
-          messages: [
-            {
-              role: 'user',
-              content: `You are an expert software engineer tasked with implementing a specification using CodeBuff, an AI coding assistant. Your goal is to prompt CodeBuff to implement the spec correctly. You are in a conversation with this coding agent.
+        agentResponse = promptWithSpec
+          ? {
+              decision: 'continue',
+              reasoning: 'Using spec as sole prompt',
+              next_prompt: evalCommit.spec,
+            }
+          : await promptAiSdkStructured({
+              messages: [
+                {
+                  role: 'user',
+                  content: `You are an expert software engineer tasked with implementing a specification using CodeBuff, an AI coding assistant. Your goal is to prompt CodeBuff to implement the spec correctly. You are in a conversation with this coding agent.
 
 Current spec to implement:
 <spec>${evalCommit.spec}</spec>
@@ -243,16 +250,6 @@ Explain your reasoning in detail. Do not ask Codebuff to git commit changes.`,
       },
     }
 
-    if (process.env.NEXT_PUBLIC_CB_ENVIRONMENT === 'dev') {
-      const { eval_commit, gitDiff, ...rest } = result
-      const { fileStates, ...rest2 } = eval_commit
-
-      writeJsonToFile(
-        { ...rest, ...rest2 },
-        path.join(__dirname, `trace-${evalCommit.sha}.json`),
-      )
-    }
-
     return result
   } catch (judgingError) {
     console.error('Error in judging:', judgingError)
diff --git a/evals/git-evals/run-single-eval-process.ts b/evals/git-evals/run-single-eval-process.ts
index 5c455cbe81..3fedc27a43 100644
--- a/evals/git-evals/run-single-eval-process.ts
+++ b/evals/git-evals/run-single-eval-process.ts
@@ -74,6 +74,7 @@ async function main() {
       fingerprintId,
       codingAgent as any,
       agent,
+      false,
     )
 
     // Check again after long-running operation
diff --git a/evals/git-evals/run-single-eval.ts b/evals/git-evals/run-single-eval.ts
index 83b3cb3531..092c1ca9fb 100644
--- a/evals/git-evals/run-single-eval.ts
+++ b/evals/git-evals/run-single-eval.ts
@@ -199,6 +199,7 @@ async function runSingleEvalTask(options: {
       fingerprintId,
       codingAgent,
       agentType,
+      false,
     )
 
     const duration = Date.now() - startTime