diff --git a/package-lock.json b/package-lock.json index d1618ca..b2cf080 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6102,7 +6102,7 @@ }, "packages/cli": { "name": "@onkernel/cua-cli", - "version": "0.1.0", + "version": "0.1.1", "license": "MIT", "dependencies": { "@earendil-works/pi-coding-agent": "0.79.1", diff --git a/packages/agent/README.md b/packages/agent/README.md index 03b1f20..cb60d4f 100644 --- a/packages/agent/README.md +++ b/packages/agent/README.md @@ -98,6 +98,8 @@ Both classes mirror pi constructor shapes and behavior, with minimal additions: - CUA model refs (`"provider:model"`) accepted where pi expects a concrete model - `extraTools` to add your own pi tools alongside the built-in browser tools - `computerUseExtra: true` to let the model use a small navigation helper +- `playwright: true` to let the model run Playwright/TypeScript against the + live browser session If auth callbacks are omitted, both classes default to CUA env var conventions: - OpenAI: `OPENAI_API_KEY` @@ -124,6 +126,19 @@ URL or go back. `computerUseExtra: true` adds `computer_use_extra`, a provider-neutral escape hatch exposing `goto`, `back`, `forward`, and `url` so navigation works uniformly regardless of which model is driving. +Some steps are awkward as raw pointer/keyboard actions: precise DOM reads, +form fills, data extraction, or waiting on a specific selector. +`playwright: true` adds `playwright_execute`, which runs Playwright/TypeScript +directly against the live browser session. `page`, `context`, and `browser` +are in scope and the code may `return` a JSON-serializable value. Each call +runs in a fresh JS context (locals don't persist across calls) but the +browser session does carry over. No screenshot is returned automatically; +request one on a follow-up turn when the model needs to see the page. +Playwright-level failures come back as tool content (so the model can adapt) +rather than thrown errors. Verified e2e +against Anthropic, Tzafon, and Yutori CUA models; OpenAI and Google are +unit-tested. + ### Model Switching `CuaAgent` follows pi `Agent` semantics: assign `agent.state.model` to a diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index 49a4a6b..3e158f2 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -12,6 +12,7 @@ import { import { type Api, CUA_NAVIGATION_TOOL_NAME, + CUA_PLAYWRIGHT_TOOL_NAME, type CuaModelRef, type CuaRuntimeSpec, type CuaSimpleStreamOptions, @@ -66,6 +67,8 @@ export type CuaAgentOptions = Omit & { extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */ computerUseExtra?: boolean; + /** Expose a tool that runs Playwright code against the browser session. */ + playwright?: boolean; }; /** @@ -89,6 +92,8 @@ export type CuaAgentHarnessOptions< extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */ computerUseExtra?: boolean; + /** Expose a tool that runs Playwright code against the browser session. */ + playwright?: boolean; /** Optional payload hook composed after the provider-specific CUA payload hook. */ onPayload?: SimpleStreamOptions["onPayload"]; }; @@ -110,6 +115,7 @@ class CuaRuntimeController { model: CuaRuntimeInput; extraTools?: AgentTool[]; computerUseExtra?: boolean; + playwright?: boolean; onPayload?: SimpleStreamOptions["onPayload"]; }, ) { @@ -136,6 +142,7 @@ class CuaRuntimeController { { toolExecutors: this.runtimeSpec.toolExecutors, computerUseExtra: this.options.computerUseExtra, + playwright: this.options.playwright, }, this.translator, ), @@ -159,6 +166,7 @@ class CuaRuntimeController { return [ ...(this.options.extraTools ?? []).map((tool) => tool.name), ...(this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : []), + ...(this.options.playwright ? [CUA_PLAYWRIGHT_TOOL_NAME] : []), ]; } @@ -203,6 +211,7 @@ export class CuaAgent extends Agent { prepareNextTurn, extraTools, computerUseExtra, + playwright, ...agentOptions } = options; const runtime = new CuaRuntimeController({ @@ -211,6 +220,7 @@ export class CuaAgent extends Agent { model: initialState.model, extraTools, computerUseExtra, + playwright, onPayload, }); const wrappedStreamFn: StreamFn = (model, context, streamOptions) => { @@ -326,6 +336,7 @@ export class CuaAgentHarness< model, extraTools, computerUseExtra, + playwright, systemPrompt, getApiKeyAndHeaders, onPayload, @@ -338,6 +349,7 @@ export class CuaAgentHarness< model, extraTools, computerUseExtra, + playwright, onPayload, }); const resolvedTools = runtime.tools(); diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 052b5d8..75bea96 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -8,6 +8,7 @@ export type { ComputerToolOptions, CuaExecutorTool, NavigationDetails, + PlaywrightDetails, } from "./tools"; export { CuaAgent, CuaAgentHarness } from "./agent"; export type { CuaAgentHarnessOptions, CuaAgentOptions, CuaAgentState } from "./agent"; diff --git a/packages/agent/src/tools.ts b/packages/agent/src/tools.ts index 74becaa..a326116 100644 --- a/packages/agent/src/tools.ts +++ b/packages/agent/src/tools.ts @@ -2,10 +2,13 @@ import type Kernel from "@onkernel/sdk"; import type { ImageContent, TextContent, Tool } from "@earendil-works/pi-ai"; import { CUA_NAVIGATION_TOOL_NAME, + CUA_PLAYWRIGHT_TOOL_NAME, createCuaNavigationToolDefinition, + createCuaPlaywrightToolDefinition, type ComputerToolCoordinateSystem, type CuaBatchInput, type CuaNavigationInput, + type CuaPlaywrightInput, type CuaScreenshotSpec, type CuaToolExecutorSpec, type TSchema, @@ -20,6 +23,7 @@ export interface ComputerToolOptions { coordinateSystem?: ComputerToolCoordinateSystem; screenshot?: CuaScreenshotSpec; computerUseExtra?: boolean; + playwright?: boolean; } type ToolContent = Array; @@ -35,12 +39,38 @@ export interface NavigationDetails { url?: string; } +/** + * Structured details for a `playwright_execute` tool result. Library + * consumers can read these directly instead of re-parsing the model-facing + * tool content blocks. + * + * - `success` — whether the Playwright code itself completed without error. + * A `false` value means the code threw or the SDK reported failure; in + * that case the failure is also surfaced as tool content for the model. + * - `statusText` — short human-readable status (success or failure summary). + * - `result` — present only when the code returned a JSON-serializable value. + * - `stdout`/`stderr` — raw daemon output, present whenever the daemon + * reported a non-empty value on that stream (may be whitespace-only). + * - `error` — present only when `success` is `false`; the error message from + * the daemon. + */ +export interface PlaywrightDetails { + success: boolean; + statusText: string; + result?: unknown; + stdout?: string; + stderr?: string; + error?: string; +} + type BatchTool = AgentTool; type NavigationTool = AgentTool; +type PlaywrightTool = AgentTool; type ActionTool = AgentTool; -export type CuaExecutorTool = BatchTool | NavigationTool | ActionTool; +export type CuaExecutorTool = BatchTool | NavigationTool | PlaywrightTool | ActionTool; type NavigationExecutorSpec = { kind: "navigation"; definition: Tool }; -type ComputerExecutorSpec = CuaToolExecutorSpec | NavigationExecutorSpec; +type PlaywrightExecutorSpec = { kind: "playwright"; definition: Tool }; +type ComputerExecutorSpec = CuaToolExecutorSpec | NavigationExecutorSpec | PlaywrightExecutorSpec; export function createCuaComputerTools(args: ComputerToolOptions): CuaExecutorTool[] { return buildCuaComputerTools(args, new InternalComputerTranslator(args)); @@ -48,18 +78,20 @@ export function createCuaComputerTools(args: ComputerToolOptions): CuaExecutorTo /** Build executor tools against an existing translator (internal; not part of the package surface). */ export function buildCuaComputerTools( - args: Pick, + args: Pick, translator: InternalComputerTranslator, ): CuaExecutorTool[] { - return withNavigationTool(args).map((executor) => createExecutorTool(executor, translator)); + return withExtraTools(args).map((executor) => createExecutorTool(executor, translator)); } -function withNavigationTool(args: Pick): ComputerExecutorSpec[] { +function withExtraTools(args: Pick): ComputerExecutorSpec[] { const executors: ComputerExecutorSpec[] = [...args.toolExecutors]; const existing = new Set(executors.map((executor) => executor.definition.name)); if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) { - const definition = createCuaNavigationToolDefinition(); - executors.push({ kind: "navigation", definition }); + executors.push({ kind: "navigation", definition: createCuaNavigationToolDefinition() }); + } + if (args.playwright && !existing.has(CUA_PLAYWRIGHT_TOOL_NAME)) { + executors.push({ kind: "playwright", definition: createCuaPlaywrightToolDefinition() }); } return executors; } @@ -78,6 +110,19 @@ function createExecutorTool(executor: ComputerExecutorSpec, translator: Internal }; return tool; } + if (isPlaywrightExecutor(executor)) { + const tool: PlaywrightTool = { + name: definition.name, + label: definition.name, + description: definition.description, + parameters: definition.parameters, + executionMode: "sequential", + async execute(_toolCallId: string, params: unknown): Promise> { + return executePlaywrightTool(translator, asPlaywrightInput(params)); + }, + }; + return tool; + } const tool: ActionTool = { name: definition.name, label: definition.name, @@ -95,6 +140,10 @@ function isNavigationExecutor(executor: ComputerExecutorSpec): executor is Navig return "kind" in executor && executor.kind === "navigation"; } +function isPlaywrightExecutor(executor: ComputerExecutorSpec): executor is PlaywrightExecutorSpec { + return "kind" in executor && executor.kind === "playwright"; +} + async function executeBatchTool(translator: InternalComputerTranslator, params: CuaBatchInput): Promise> { const content: ToolContent = []; const readResults: BatchDetails["readResults"] = []; @@ -149,6 +198,42 @@ async function executeNavigationTool(translator: InternalComputerTranslator, par } } +async function executePlaywrightTool(translator: InternalComputerTranslator, params: CuaPlaywrightInput): Promise> { + try { + const execution = await translator.executePlaywright(params.code, params.timeout_sec); + + const content: ToolContent = []; + if (execution.result !== undefined) { + content.push({ type: "text", text: `result: ${formatPlaywrightResult(execution.result)}` }); + } + if (execution.stdout?.trim()) { + content.push({ type: "text", text: `stdout:\n${execution.stdout.trimEnd()}` }); + } + if (execution.stderr?.trim()) { + content.push({ type: "text", text: `stderr:\n${execution.stderr.trimEnd()}` }); + } + if (!execution.success) { + content.push({ type: "text", text: `error: ${execution.error ?? "playwright execution reported failure"}` }); + } + + const statusText = execution.success ? "Playwright executed successfully." : `Playwright execution failed: ${execution.error ?? "unknown error"}`; + if (content.length === 0) content.push({ type: "text", text: statusText }); + + const details: PlaywrightDetails = { success: execution.success, statusText }; + if (execution.result !== undefined) details.result = execution.result; + if (execution.stdout) details.stdout = execution.stdout; + if (execution.stderr) details.stderr = execution.stderr; + if (execution.error) details.error = execution.error; + return { content, details }; + } catch (err) { + throw new Error(`playwright_execute failed: ${errorMessage(err)}`, { cause: err }); + } +} + +function formatPlaywrightResult(result: unknown): string { + return typeof result === "string" ? result : JSON.stringify(result); +} + function errorMessage(err: unknown): string { return err instanceof Error ? err.message : String(err); } @@ -163,3 +248,10 @@ function asNavigationInput(value: unknown): CuaNavigationInput { } throw new Error("invalid computer_use_extra parameters"); } + +function asPlaywrightInput(value: unknown): CuaPlaywrightInput { + if (value && typeof value === "object" && typeof (value as { code?: unknown }).code === "string") { + return value as CuaPlaywrightInput; + } + throw new Error("invalid playwright_execute parameters"); +} diff --git a/packages/agent/src/translator/translator.ts b/packages/agent/src/translator/translator.ts index 495b19a..26d309c 100644 --- a/packages/agent/src/translator/translator.ts +++ b/packages/agent/src/translator/translator.ts @@ -85,6 +85,17 @@ export class InternalComputerTranslator { return { x: Math.trunc(pos.x), y: Math.trunc(pos.y) }; } + async executePlaywright(code: string, timeoutSec?: number): Promise { + const truncated = timeoutSec !== undefined ? Math.trunc(timeoutSec) : undefined; + const timeout = truncated !== undefined && truncated >= 1 + ? Math.min(truncated, PLAYWRIGHT_MAX_TIMEOUT_SEC) + : undefined; + return this.client.browsers.playwright.execute(this.sessionId, { + code, + ...(timeout !== undefined ? { timeout_sec: timeout } : {}), + }); + } + async executeBatch(actions: CuaAction[]): Promise { const result: BatchExecutionResult = { readResults: [] }; const pending: KernelBatchAction[] = []; @@ -228,6 +239,11 @@ export class InternalComputerTranslator { type KernelBatchAction = Parameters[1]["actions"][number]; +export type PlaywrightExecutionResult = + Awaited>; + +const PLAYWRIGHT_MAX_TIMEOUT_SEC = 300; + const CLICK_BUTTONS: ReadonlySet = new Set(["left", "right", "middle", "back", "forward"]); const DRAG_BUTTONS: ReadonlySet = new Set(["left", "right", "middle"]); diff --git a/packages/agent/test/agent.test.ts b/packages/agent/test/agent.test.ts index 73e83ee..dfc7525 100644 --- a/packages/agent/test/agent.test.ts +++ b/packages/agent/test/agent.test.ts @@ -144,6 +144,23 @@ describe("CuaAgent", () => { ]); }); + it("synthesizes a playwright_execute tool when requested", () => { + const runtime = resolveCuaRuntimeSpec("openai:gpt-5.5"); + const agent = new CuaAgent({ + browser, + client, + playwright: true, + initialState: { + model: "openai:gpt-5.5", + }, + }); + + expect(agent.state.tools.map((tool) => tool.name)).toEqual([ + ...runtime.toolExecutors.map((tool) => tool.definition.name), + "playwright_execute", + ]); + }); + it("refreshes CUA runtime state when state.model changes", () => { const runtime = resolveCuaRuntimeSpec("google:gemini-3-flash-preview"); const agent = new CuaAgent({ diff --git a/packages/agent/test/tool-exhaustiveness.test.ts b/packages/agent/test/tool-exhaustiveness.test.ts index 2a75273..af8c1ba 100644 --- a/packages/agent/test/tool-exhaustiveness.test.ts +++ b/packages/agent/test/tool-exhaustiveness.test.ts @@ -85,4 +85,80 @@ describe("Cua tool executor coverage", () => { ]); expect(result.content.at(-1)).toMatchObject({ type: "image", mimeType: "image/png" }); }); + + it("runs the playwright_execute tool and returns result + stdout as tool content", async () => { + const calls: Array<{ id: string; body: { code: string; timeout_sec?: number } }> = []; + const runtime = resolveCuaRuntimeSpec("openai:gpt-5.5"); + const tools = createCuaComputerTools({ + browser, + client: { + browsers: { + playwright: { + execute: async (id: string, body: { code: string; timeout_sec?: number }) => { + calls.push({ id, body }); + return { success: true, result: "Example Domain", stdout: "logged\n", stderr: "" }; + }, + }, + }, + } as unknown as Kernel, + toolExecutors: runtime.toolExecutors, + playwright: true, + }); + const playwright = tools.find((tool) => tool.name === "playwright_execute"); + expect(playwright).toBeDefined(); + + const result = await playwright!.execute("call_1", { code: "return await page.title();", timeout_sec: 30 }); + + expect(calls).toEqual([{ id: "browser_123", body: { code: "return await page.title();", timeout_sec: 30 } }]); + expect(result.content[0]).toMatchObject({ type: "text", text: "result: Example Domain" }); + expect(result.content.some((block) => block.type === "text" && block.text === "stdout:\nlogged")).toBe(true); + expect(result.content.every((block) => block.type !== "image")).toBe(true); + expect(result.details).toMatchObject({ success: true }); + }); + + it("falls back to statusText for side-effect-only playwright_execute calls", async () => { + const runtime = resolveCuaRuntimeSpec("openai:gpt-5.5"); + const tools = createCuaComputerTools({ + browser, + client: { + browsers: { + playwright: { execute: async () => ({ success: true }) }, + }, + } as unknown as Kernel, + toolExecutors: runtime.toolExecutors, + playwright: true, + }); + const playwright = tools.find((tool) => tool.name === "playwright_execute"); + expect(playwright).toBeDefined(); + + const result = await playwright!.execute("call_1", { code: "await page.click('#submit')" }); + + expect(result.content).toEqual([ + { type: "text", text: "Playwright executed successfully." }, + ]); + expect(result.details).toMatchObject({ success: true, statusText: "Playwright executed successfully." }); + }); + + it("surfaces playwright_execute failures as tool content without throwing", async () => { + const runtime = resolveCuaRuntimeSpec("openai:gpt-5.5"); + const tools = createCuaComputerTools({ + browser, + client: { + browsers: { + playwright: { execute: async () => ({ success: false, error: "boom", stderr: "stack" }) }, + }, + } as unknown as Kernel, + toolExecutors: runtime.toolExecutors, + playwright: true, + }); + const playwright = tools.find((tool) => tool.name === "playwright_execute"); + expect(playwright).toBeDefined(); + + const result = await playwright!.execute("call_1", { code: "await page.click('#missing')" }); + + expect(result.content.some((block) => block.type === "text" && block.text.includes("error: boom"))).toBe(true); + expect(result.content.some((block) => block.type === "text" && block.text === "stderr:\nstack")).toBe(true); + expect(result.content.every((block) => block.type !== "image")).toBe(true); + expect(result.details).toMatchObject({ success: false }); + }); }); diff --git a/packages/ai/README.md b/packages/ai/README.md index 5bf1586..6d0da7a 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -309,12 +309,15 @@ definitions and executors; it is forwarded to the provider module's - `createCuaNavigationToolDefinition()`, `CuaNavigationSchema`, `CUA_NAVIGATION_TOOL_NAME` (`"computer_use_extra"`), `CUA_NAVIGATION_TOOL_DESCRIPTION` +- `createCuaPlaywrightToolDefinition()`, `CuaPlaywrightSchema`, + `CUA_PLAYWRIGHT_TOOL_NAME` (`"playwright_execute"`), + `CUA_PLAYWRIGHT_TOOL_DESCRIPTION` - `canonicalToolCallName(action)`, `canonicalToolCallArguments(action)` — map a normalized `CuaAction` back to its tool-call name/arguments - `normalizeGotoUrl(value)` — prefix bare hostnames with `https://` - Types: `CuaAction` (plus the 16 per-action interfaces), `CuaActionType`, `CuaMouseButton`, `CuaDragMouseButton`, `CuaBatchInput`, - `CuaNavigationInput`, `CuaToolExecutorSpec`, `ComputerToolsOptions`, + `CuaNavigationInput`, `CuaPlaywrightInput`, `CuaToolExecutorSpec`, `ComputerToolsOptions`, `ComputerToolCoordinateSystem` ### Provider registration diff --git a/packages/ai/src/providers/common.ts b/packages/ai/src/providers/common.ts index b31b479..e829a6c 100644 --- a/packages/ai/src/providers/common.ts +++ b/packages/ai/src/providers/common.ts @@ -297,10 +297,22 @@ export const CuaNavigationSchema = Type.Object( { additionalProperties: false }, ); +export const CuaPlaywrightSchema = Type.Object( + { + code: Type.String({ + description: + "Playwright/TypeScript to run against the live browser. `page`, `context`, and `browser` are in scope; end with a `return` to send a JSON-serializable value back. Example: \"await page.goto('https://example.com'); return await page.title();\"", + }), + timeout_sec: Type.Optional(Type.Number({ description: "Optional execution timeout in seconds. Default 60, max 300." })), + }, + { additionalProperties: false }, +); + export interface CuaBatchInput { actions: CuaAction[]; } export type CuaNavigationInput = Static; +export type CuaPlaywrightInput = Static; /** Tool schema plus execution adapter for a browser computer-use tool. */ export interface CuaToolExecutorSpec { @@ -317,6 +329,7 @@ export interface CuaToolExecutorSpec { */ export const CUA_BATCH_TOOL_NAME = "computer_batch"; export const CUA_NAVIGATION_TOOL_NAME = "computer_use_extra"; +export const CUA_PLAYWRIGHT_TOOL_NAME = "playwright_execute"; export const CUA_BATCH_TOOL_DESCRIPTION = [ "Execute multiple computer actions in sequence, including ordered read steps like url(), cursor_position(), and screenshot().", @@ -326,6 +339,13 @@ export const CUA_BATCH_TOOL_DESCRIPTION = [ export const CUA_NAVIGATION_TOOL_DESCRIPTION = "High-level browser navigation helpers for goto, back, forward, and url."; +export const CUA_PLAYWRIGHT_TOOL_DESCRIPTION = [ + "Run Playwright/TypeScript directly against the live browser session for steps that are awkward as raw pointer/keyboard actions: precise DOM reads, form fills, data extraction, and waiting on selectors.", + "`page`, `context`, and `browser` are in scope and the code may `return` a JSON-serializable value, which comes back as the result.", + "Each call runs in a fresh JS context — local variables do not persist across calls, but the browser session does (navigation, cookies, DOM state carry over via `page`/`context`/`browser`).", + "No screenshot is returned automatically; request one with a follow-up screenshot action when you need to see the page, rather than calling page.screenshot() inside the code.", +].join("\n"); + export interface ComputerToolsOptions { actions?: readonly CuaActionType[]; } @@ -425,6 +445,14 @@ export function createCuaNavigationToolDefinition(): Tool { }; } +export function createCuaPlaywrightToolDefinition(): Tool { + return { + name: CUA_PLAYWRIGHT_TOOL_NAME, + description: CUA_PLAYWRIGHT_TOOL_DESCRIPTION, + parameters: CuaPlaywrightSchema, + }; +} + export interface CuaScreenshotTransformSpec { width: number; height: number; diff --git a/packages/cli/README.md b/packages/cli/README.md index 3ae0fe2..0269493 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -102,6 +102,16 @@ Configuration is by environment variable. There is no config file. Use `--thinking ` (`off | minimal | low | medium | high | xhigh`, default `low`) for providers that support reasoning effort. +## Playwright escape hatch + +Pass `--playwright` to expose the `playwright_execute` tool, letting the +model run Playwright/TypeScript directly against the live browser session +for steps that are awkward as raw pointer/keyboard actions (precise DOM +reads, form fills, data extraction, waiting on selectors). `page`, +`context`, and `browser` are in scope; the code may `return` a +JSON-serializable value. Off by default. Verified e2e with Anthropic, +Tzafon, and Yutori CUA models. + ## Output formats `--print` defaults to streaming text. Pass `-o jsonl` for one diff --git a/packages/cli/src/cli-harness.ts b/packages/cli/src/cli-harness.ts index 7c7d806..dc13007 100644 --- a/packages/cli/src/cli-harness.ts +++ b/packages/cli/src/cli-harness.ts @@ -176,6 +176,7 @@ export interface HarnessCliFlags { debugTui: boolean; jsonlIncludeDeltas: boolean; jsonlIncludeImages: boolean; + playwright: boolean; model?: string; thinking?: string; browserProfile?: string; @@ -413,6 +414,7 @@ async function setupHarnessRuntime( skills, contextFiles, thinkingLevel, + playwright: flags.playwright, modelBaseUrl: baseUrlOverride, }); diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index b3a249c..5c20070 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -45,6 +45,8 @@ Options: --profile-no-save-changes Do not persist changes back to the profile --browser-timeout Browser inactivity timeout in seconds (default 300) --max-steps Max turns for action subcommands (default 3) + --playwright Add the playwright_execute tool so the model can run + Playwright code against the browser session --out Output file for screenshot subcommand -o, --output Output format for --print: text (default) | jsonl --jsonl-include-deltas Include assistant_text_delta events (default off) @@ -98,6 +100,7 @@ interface CliFlags { debugTui: boolean; jsonlIncludeDeltas: boolean; jsonlIncludeImages: boolean; + playwright: boolean; model?: string; thinking?: string; browserProfile?: string; @@ -146,6 +149,7 @@ function parseCliArgs(argv: string[]): CliFlags { output: { type: "string", short: "o" }, "jsonl-include-deltas": { type: "boolean", default: false }, "jsonl-include-images": { type: "boolean", default: false }, + playwright: { type: "boolean", default: false }, }, allowPositionals: true, strict: true, @@ -192,6 +196,7 @@ function parseCliArgs(argv: string[]): CliFlags { output: parsed.values.output as string | undefined, jsonlIncludeDeltas: !!parsed.values["jsonl-include-deltas"], jsonlIncludeImages: !!parsed.values["jsonl-include-images"], + playwright: !!parsed.values.playwright, positionals: parsed.positionals, }; } @@ -207,6 +212,7 @@ function toHarnessFlags(flags: CliFlags): HarnessCliFlags { debugTui: flags.debugTui, jsonlIncludeDeltas: flags.jsonlIncludeDeltas, jsonlIncludeImages: flags.jsonlIncludeImages, + playwright: flags.playwright, model: flags.model, thinking: flags.thinking, browserProfile: flags.browserProfile, diff --git a/packages/cli/src/harness.ts b/packages/cli/src/harness.ts index f68ced8..a748811 100644 --- a/packages/cli/src/harness.ts +++ b/packages/cli/src/harness.ts @@ -31,6 +31,8 @@ export interface BuildCuaHarnessOptions { /** Context files (AGENTS.md, CLAUDE.md, …) appended to the system prompt. */ contextFiles?: ContextFile[]; thinkingLevel?: ThinkingLevel; + /** Expose the playwright_execute tool that runs Playwright code against the browser session. */ + playwright?: boolean; /** Override the default coding-tools extraTools (bash/read/edit/write/grep/find/ls). */ extraTools?: CuaAgentHarnessOptions["extraTools"]; /** Override env-var API-key resolution (mainly for tests). */ @@ -60,6 +62,7 @@ export function buildCuaHarness(opts: BuildCuaHarnessOptions): CuaAgentHarness { browser: opts.browser, client: opts.client, extraTools, + playwright: opts.playwright, resources: { skills }, thinkingLevel: opts.thinkingLevel, systemPrompt: ({ model: activeModel, resources }) => { diff --git a/packages/cli/src/tui/message-list.ts b/packages/cli/src/tui/message-list.ts index 422c84d..95b9b32 100644 --- a/packages/cli/src/tui/message-list.ts +++ b/packages/cli/src/tui/message-list.ts @@ -86,6 +86,8 @@ function formatToolCall(name: string, args: unknown): string { if (action === "goto" && typeof obj.url === "string") return `goto(${obj.url})`; return action; } + case "playwright_execute": + return colors.dim(typeof obj.code === "string" ? truncate(obj.code.replace(/\s+/g, " ").trim(), 80) : ""); case "bash": return colors.dim(typeof obj.command === "string" ? truncate(obj.command, 80) : ""); case "read":