CodebuffAI
diff --git a/‎.agents/notion-agent.ts‎
Lines changed: 1 addition & 1 deletion b/‎.agents/notion-agent.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.agents/notion-researcher.ts‎
Lines changed: 1 addition & 1 deletion b/‎.agents/notion-researcher.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.agents/sessions/03-03-0909-add-console-log/LESSONS.md‎
Lines changed: 15 additions & 0 deletions b/‎.agents/sessions/03-03-0909-add-console-log/LESSONS.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎.agents/sessions/03-03-0909-add-console-log/PLAN.md‎
Lines changed: 16 additions & 0 deletions b/‎.agents/sessions/03-03-0909-add-console-log/PLAN.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎.agents/sessions/03-03-0909-add-console-log/SPEC.md‎
Lines changed: 25 additions & 0 deletions b/‎.agents/sessions/03-03-0909-add-console-log/SPEC.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎.agents/skills/meta/SKILL.md‎
Lines changed: 10 additions & 0 deletions b/‎.agents/skills/meta/SKILL.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.agents/types/agent-definition.ts‎
Lines changed: 12 additions & 4 deletions b/‎.agents/types/agent-definition.ts‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎.agents/types/tools.ts‎
Lines changed: 17 additions & 0 deletions b/‎.agents/types/tools.ts‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎agents/__tests__/file-picker.test.ts‎
Lines changed: 21 additions & 56 deletions b/‎agents/__tests__/file-picker.test.ts‎
Lines changed: 21 additions & 56 deletions
diff --git a/‎agents/base2/base-deep-evals.ts‎
Lines changed: 8 additions & 0 deletions b/‎agents/base2/base-deep-evals.ts‎
Lines changed: 8 additions & 0 deletions
@@ -3,7 +3,7 @@ import type { AgentDefinition } from './types/agent-definition'
 const definition: AgentDefinition = {
   id: 'notion-query-agent',
   displayName: 'Notion Query Agent',
-  model: 'x-ai/grok-4-fast',
+  model: 'google/gemini-3.1-flash-lite-preview',
 
   spawnerPrompt:
     'Expert at querying Notion databases and pages to find information and answer questions about content stored in Notion workspaces.',
 
@@ -6,7 +6,7 @@ const definition: AgentDefinition = {
   id: 'notion-researcher',
   publisher,
   displayName: 'Notion Researcher',
-  model: 'x-ai/grok-4-fast',
+  model: 'google/gemini-3.1-flash-lite-preview',
 
   spawnerPrompt:
     'Expert at conducting comprehensive research across Notion workspaces by spawning multiple notion agents in parallel waves to gather information from different angles and sources.',
 
@@ -0,0 +1,15 @@
+# LESSONS
+
+## What went well
+- `git diff -- cli/src/index.tsx` immediately after editing made it easy to enforce exact scope for a one-line change.
+- Validating with `bun run cli/src/index.tsx --help` gave a quick, non-effectful end-to-end check that startup output works.
+
+## What was tricky
+- Bun script invocation shape from repo root was easy to misremember: `bun --cwd cli run typecheck` failed, while `bun run --cwd cli typecheck` succeeded.
+
+## Useful patterns
+- Entrypoint logs placed at the top of `main()` apply to all command paths that enter `main()`; verify with a non-interactive path first.
+- For tiny requests, combine: (1) minimal code edit, (2) scoped diff check, (3) one runtime smoke check, (4) one typecheck.
+
+## Future efficiency notes
+- Put exact validation commands directly in `PLAN.md` to avoid command-syntax backtracking during validation.
@@ -0,0 +1,16 @@
+# PLAN
+
+## Implementation Steps
+1. Update `cli/src/index.tsx` by adding `console.log('Codebuff CLI starting')` as the first statement in `main()`.
+2. Inspect the diff to confirm scope: exactly one new `console.log` line in `cli/src/index.tsx` and no unintended edits.
+3. Run lightweight validation for CLI startup behavior:
+   - Run a non-interactive path (`--help`) and confirm the line appears once.
+   - Confirm the log sits before command branching in `main()` so it applies to all `main()` paths.
+
+## Dependencies / Ordering
+- Step 1 must happen before Step 2 and Step 3.
+- Step 2 should complete before Step 3 to ensure we validate the intended change only.
+
+## Risk Areas
+- Low risk overall.
+- Minor UX risk: the new stdout line appears for all command paths entering `main()` (including `--help`, `login`, and `publish`). This is intentional per spec.
@@ -0,0 +1,25 @@
+# SPEC
+
+## Overview
+Add a single startup `console.log` to the CLI entrypoint so there is explicit stdout output when the CLI boots.
+
+## Requirements
+1. Modify `cli/src/index.tsx` only for functional code changes.
+2. Add exactly one `console.log(...)` statement.
+3. Place the log at the start of `main()`.
+4. Use a static message string (no timestamp or dynamic args). Chosen message: `Codebuff CLI starting`.
+5. The log should print for any execution path that enters `main()` (including normal startup and command modes like `login`/`publish`).
+6. Keep all existing behavior unchanged aside from the added stdout line.
+
+## Technical Approach
+Insert one `console.log('Codebuff CLI starting')` call as the first statement inside `main()` so it prints once per process run before the rest of startup flow proceeds.
+
+## Files to Create/Modify
+- `cli/src/index.tsx` (modify)
+- `.agents/sessions/03-03-0909-add-console-log/SPEC.md` (this spec)
+
+## Out of Scope
+- Replacing existing logger usage with `console.log`
+- Adding additional logs
+- Refactoring startup flow or command handling
+- Any server/web/API changes
@@ -0,0 +1,10 @@
+---
+name: meta
+description: Broad project-level implementation and validation heuristics
+---
+
+# Meta
+
+- When validating CLI changes, run a non-effectful command path first (for example `--help`) before any command that could trigger external side effects. (from .agents/sessions/03-03-0909-add-console-log)
+- For tightly scoped edits, pair runtime smoke-checks with `git diff -- <file>` to verify no unintended spillover. (from .agents/sessions/03-03-0909-add-console-log)
+- From monorepo root, run workspace scripts as `bun run --cwd <workspace> <script>`; if Bun prints global run help, re-check flag order/command shape. (from .agents/sessions/03-03-0909-add-console-log)
@@ -370,26 +370,32 @@ export type ModelName =
   // Recommended Models
 
   // OpenAI
+  | 'openai/gpt-5.3'
+  | 'openai/gpt-5.3-codex'
+  | 'openai/gpt-5.2'
   | 'openai/gpt-5.1'
   | 'openai/gpt-5.1-chat'
   | 'openai/gpt-5-mini'
   | 'openai/gpt-5-nano'
 
   // Anthropic
+  | 'anthropic/claude-sonnet-4.6'
+  | 'anthropic/claude-opus-4.6'
+  | 'anthropic/claude-haiku-4.5'
   | 'anthropic/claude-sonnet-4.5'
   | 'anthropic/claude-opus-4.1'
-  | 'anthropic/claude-opus-4.6'
 
   // Gemini
+  | 'google/gemini-3-pro-preview'
+  | 'google/gemini-3-flash-preview'
+  | 'google/gemini-3.1-flash-lite-preview'
   | 'google/gemini-2.5-pro'
   | 'google/gemini-2.5-flash'
   | 'google/gemini-2.5-flash-lite'
-  | 'google/gemini-2.5-flash-preview-09-2025'
-  | 'google/gemini-2.5-flash-lite-preview-09-2025'
 
   // X-AI
-  | 'x-ai/grok-4-07-09'
   | 'x-ai/grok-4-fast'
+  | 'x-ai/grok-4.1-fast'
   | 'x-ai/grok-code-fast-1'
 
   // Qwen
@@ -416,12 +422,14 @@ export type ModelName =
   | 'moonshotai/kimi-k2:nitro'
   | 'moonshotai/kimi-k2.5'
   | 'moonshotai/kimi-k2.5:nitro'
+  | 'z-ai/glm-5'
   | 'z-ai/glm-4.6'
   | 'z-ai/glm-4.6:nitro'
   | 'z-ai/glm-4.7'
   | 'z-ai/glm-4.7:nitro'
   | 'z-ai/glm-4.7-flash'
   | 'z-ai/glm-4.7-flash:nitro'
+  | 'minimax/minimax-m2.5'
   | (string & {})
 
 import type { ToolName, GetToolParams } from './tools'
 
@@ -3,6 +3,7 @@
  */
 export type ToolName =
   | 'add_message'
+  | 'apply_patch'
   | 'ask_user'
   | 'code_search'
   | 'end_turn'
@@ -33,6 +34,7 @@ export type ToolName =
  */
 export interface ToolParamsMap {
   add_message: AddMessageParams
+  apply_patch: ApplyPatchParams
   ask_user: AskUserParams
   code_search: CodeSearchParams
   end_turn: EndTurnParams
@@ -67,6 +69,21 @@ export interface AddMessageParams {
   content: string
 }
 
+/**
+ * Apply a file operation (create, update, or delete) using Codex-style apply_patch format.
+ */
+export interface ApplyPatchParams {
+  /** The file operation to perform. */
+  operation: {
+    /** Operation type: create_file, update_file, or delete_file */
+    type: 'create_file' | 'update_file' | 'delete_file'
+    /** File path relative to project root */
+    path: string
+    /** Diff content. Required for create_file and update_file. Lines prefixed with + for creates, unified diff with @@ hunks for updates. */
+    diff?: string
+  }
+}
+
 /**
  * Ask the user multiple choice questions and pause execution until they respond.
  */
 
@@ -80,12 +80,7 @@ describe('file-picker agent', () => {
   })
 
   describe('createFilePicker - max mode', () => {
-    test('uses grok model', () => {
-      const maxPicker = createFilePicker('max')
-      expect(maxPicker.model).toBe('x-ai/grok-4.1-fast')
-    })
-
-    test('spawns two file-listers in parallel', () => {
+    test('spawns single file-lister-max', () => {
       const maxPicker = createFilePicker('max')
       const mockAgentState = createMockAgentState()
       const mockLogger = {
@@ -105,9 +100,13 @@ describe('file-picker agent', () => {
 
       const toolCall = result.value as ToolCall<'spawn_agents'>
       expect(toolCall.toolName).toBe('spawn_agents')
-      expect(toolCall.input.agents).toHaveLength(2)
-      expect(toolCall.input.agents[0].agent_type).toBe('file-lister')
-      expect(toolCall.input.agents[1].agent_type).toBe('file-lister')
+      expect(toolCall.input.agents).toHaveLength(1)
+      expect(toolCall.input.agents[0].agent_type).toBe('file-lister-max')
+    })
+
+    test('includes file-lister-max in spawnableAgents', () => {
+      const maxPicker = createFilePicker('max')
+      expect(maxPicker.spawnableAgents).toContain('file-lister-max')
     })
   })
 
@@ -424,7 +423,7 @@ describe('file-picker agent', () => {
   })
 
   describe('handleStepsMax', () => {
-    test('spawns two file-listers in parallel', () => {
+    test('spawns single file-lister-max with prompt and params', () => {
       const maxPicker = createFilePicker('max')
       const mockAgentState = createMockAgentState()
       const mockLogger = {
@@ -445,16 +444,13 @@ describe('file-picker agent', () => {
 
       const toolCall = result.value as ToolCall<'spawn_agents'>
       expect(toolCall.toolName).toBe('spawn_agents')
-      expect(toolCall.input.agents).toHaveLength(2)
-
-      // Both should have same prompt and params
+      expect(toolCall.input.agents).toHaveLength(1)
+      expect(toolCall.input.agents[0].agent_type).toBe('file-lister-max')
       expect(toolCall.input.agents[0].prompt).toBe('Find auth files')
-      expect(toolCall.input.agents[1].prompt).toBe('Find auth files')
       expect(toolCall.input.agents[0].params).toEqual({ directories: ['src'] })
-      expect(toolCall.input.agents[1].params).toEqual({ directories: ['src'] })
     })
 
-    test('merges results from both file-listers', () => {
+    test('extracts results from file-lister-max', () => {
       const maxPicker = createFilePicker('max')
       const mockAgentState = createMockAgentState()
       const mockLogger = {
@@ -472,7 +468,6 @@ describe('file-picker agent', () => {
 
       generator.next()
 
-      // Mock result with two spawned agent results - wrapped in toolResult with production structure
       const mockToolResult = {
         agentState: createMockAgentState(),
         toolResult: [
@@ -481,29 +476,14 @@ describe('file-picker agent', () => {
             value: [
               {
                 agentName: 'File Lister',
-                agentType: 'file-lister',
+                agentType: 'file-lister-max',
                 value: {
                   type: 'lastMessage',
                   value: [
                     {
                       role: 'assistant',
                       content: [
-                        { type: 'text', text: 'src/auth.ts\nsrc/login.ts' },
-                      ],
-                    },
-                  ],
-                },
-              },
-              {
-                agentName: 'File Lister',
-                agentType: 'file-lister',
-                value: {
-                  type: 'lastMessage',
-                  value: [
-                    {
-                      role: 'assistant',
-                      content: [
-                        { type: 'text', text: 'src/user.ts\nsrc/auth.ts' }, // auth.ts is duplicate
+                        { type: 'text', text: 'src/auth.ts\nsrc/login.ts\nsrc/user.ts' },
                       ],
                     },
                   ],
@@ -517,7 +497,6 @@ describe('file-picker agent', () => {
 
       const result = generator.next(mockToolResult)
 
-      // Should merge and deduplicate
       const toolCall = result.value as ToolCall<'read_files'>
       const paths = toolCall.input.paths
       expect(paths).toHaveLength(3)
@@ -526,7 +505,7 @@ describe('file-picker agent', () => {
       expect(paths).toContain('src/user.ts')
     })
 
-    test('handles partial failures in max mode', () => {
+    test('handles error from file-lister-max', () => {
       const maxPicker = createFilePicker('max')
       const mockAgentState = createMockAgentState()
       const mockLogger = {
@@ -544,7 +523,6 @@ describe('file-picker agent', () => {
 
       generator.next()
 
-      // One success, one error - wrapped in toolResult with production structure
       const mockToolResult = {
         agentState: createMockAgentState(),
         toolResult: [
@@ -553,23 +531,10 @@ describe('file-picker agent', () => {
             value: [
               {
                 agentName: 'File Lister',
-                agentType: 'file-lister',
-                value: {
-                  type: 'lastMessage',
-                  value: [
-                    {
-                      role: 'assistant',
-                      content: [{ type: 'text', text: 'src/file.ts' }],
-                    },
-                  ],
-                },
-              },
-              {
-                agentName: 'File Lister',
-                agentType: 'file-lister',
+                agentType: 'file-lister-max',
                 value: {
                   type: 'error',
-                  message: 'Second file-lister failed',
+                  message: 'File lister max failed',
                 },
               },
             ],
@@ -580,10 +545,10 @@ describe('file-picker agent', () => {
 
       const result = generator.next(mockToolResult)
 
-      // Should still proceed with successful results
-      const toolCall = result.value as ToolCall<'read_files'>
-      expect(toolCall.toolName).toBe('read_files')
-      expect(toolCall.input.paths).toContain('src/file.ts')
+      const stepText = result.value as StepText
+      expect(stepText.type).toBe('STEP_TEXT')
+      expect(stepText.text).toContain('Error from file-lister')
+      expect(stepText.text).toContain('File lister max failed')
     })
   })
 
 
@@ -0,0 +1,8 @@
+import { createBaseDeep } from './base-deep'
+
+const definition = {
+  ...createBaseDeep({ noAskUser: true, noLearning: true }),
+  id: 'base-deep-evals',
+  displayName: 'Buffy the Codex Evals Orchestrator',
+}
+export default definition