kernel · dprevoznik · Jun 23, 2026 · Jun 19, 2026 · Jun 20, 2026 · Jun 20, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/packages/agent/README.md b/packages/agent/README.md
@@ -98,6 +98,8 @@ Both classes mirror pi constructor shapes and behavior, with minimal additions:
 - CUA model refs (`"provider:model"`) accepted where pi expects a concrete model
 - `extraTools` to add your own pi tools alongside the built-in browser tools
 - `computerUseExtra: true` to let the model use a small navigation helper
+- `playwright: true` to let the model run Playwright/TypeScript against the
+  live browser session
 
 If auth callbacks are omitted, both classes default to CUA env var conventions:
 - OpenAI: `OPENAI_API_KEY`
@@ -124,6 +126,19 @@ URL or go back. `computerUseExtra: true` adds `computer_use_extra`, a
 provider-neutral escape hatch exposing `goto`, `back`, `forward`, and `url`
 so navigation works uniformly regardless of which model is driving.
 
+Some steps are awkward as raw pointer/keyboard actions: precise DOM reads,
+form fills, data extraction, or waiting on a specific selector.
+`playwright: true` adds `playwright_execute`, which runs Playwright/TypeScript
+directly against the live browser session. `page`, `context`, and `browser`
+are in scope and the code may `return` a JSON-serializable value. Each call
+runs in a fresh JS context (locals don't persist across calls) but the
+browser session does carry over. No screenshot is returned automatically;
+request one on a follow-up turn when the model needs to see the page.
+Playwright-level failures come back as tool content (so the model can adapt)
+rather than thrown errors. Verified e2e
+against Anthropic, Tzafon, and Yutori CUA models; OpenAI and Google are
+unit-tested.
+
 ### Model Switching
 
 `CuaAgent` follows pi `Agent` semantics: assign `agent.state.model` to a

diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts
@@ -12,6 +12,7 @@ import {
 import {
 	type Api,
 	CUA_NAVIGATION_TOOL_NAME,
+	CUA_PLAYWRIGHT_TOOL_NAME,
 	type CuaModelRef,
 	type CuaRuntimeSpec,
 	type CuaSimpleStreamOptions,
@@ -66,6 +67,8 @@ export type CuaAgentOptions = Omit<AgentOptions, "initialState"> & {
 	extraTools?: AgentTool[];
 	/** Expose a helper for browser navigation and URL reads. */
 	computerUseExtra?: boolean;
+	/** Expose a tool that runs Playwright code against the browser session. */
+	playwright?: boolean;
 };
 
 /**
@@ -89,6 +92,8 @@ export type CuaAgentHarnessOptions<
 	extraTools?: AgentTool[];
 	/** Expose a helper for browser navigation and URL reads. */
 	computerUseExtra?: boolean;
+	/** Expose a tool that runs Playwright code against the browser session. */
+	playwright?: boolean;
 	/** Optional payload hook composed after the provider-specific CUA payload hook. */
 	onPayload?: SimpleStreamOptions["onPayload"];
 };
@@ -110,6 +115,7 @@ class CuaRuntimeController {
 			model: CuaRuntimeInput;
 			extraTools?: AgentTool[];
 			computerUseExtra?: boolean;
+			playwright?: boolean;
 			onPayload?: SimpleStreamOptions["onPayload"];
 		},
 	) {
@@ -136,6 +142,7 @@ class CuaRuntimeController {
 				{
 					toolExecutors: this.runtimeSpec.toolExecutors,
 					computerUseExtra: this.options.computerUseExtra,
+					playwright: this.options.playwright,
 				},
 				this.translator,
 			),
@@ -159,6 +166,7 @@ class CuaRuntimeController {
 		return [
 			...(this.options.extraTools ?? []).map((tool) => tool.name),
 			...(this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : []),
+			...(this.options.playwright ? [CUA_PLAYWRIGHT_TOOL_NAME] : []),
 		];
 	}
 
@@ -203,6 +211,7 @@ export class CuaAgent extends Agent {
 			prepareNextTurn,
 			extraTools,
 			computerUseExtra,
+			playwright,
 			...agentOptions
 		} = options;
 		const runtime = new CuaRuntimeController({
@@ -211,6 +220,7 @@ export class CuaAgent extends Agent {
 			model: initialState.model,
 			extraTools,
 			computerUseExtra,
+			playwright,
 			onPayload,
 		});
 		const wrappedStreamFn: StreamFn = (model, context, streamOptions) => {
@@ -326,6 +336,7 @@ export class CuaAgentHarness<
 			model,
 			extraTools,
 			computerUseExtra,
+			playwright,
 			systemPrompt,
 			getApiKeyAndHeaders,
 			onPayload,
@@ -338,6 +349,7 @@ export class CuaAgentHarness<
 			model,
 			extraTools,
 			computerUseExtra,
+			playwright,
 			onPayload,
 		});
 		const resolvedTools = runtime.tools();

diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts
@@ -8,6 +8,7 @@ export type {
 	ComputerToolOptions,
 	CuaExecutorTool,
 	NavigationDetails,
+	PlaywrightDetails,
 } from "./tools";
 export { CuaAgent, CuaAgentHarness } from "./agent";
 export type { CuaAgentHarnessOptions, CuaAgentOptions, CuaAgentState } from "./agent";
diff --git a/packages/agent/src/tools.ts b/packages/agent/src/tools.ts
@@ -2,10 +2,13 @@ import type Kernel from "@onkernel/sdk";
 import type { ImageContent, TextContent, Tool } from "@earendil-works/pi-ai";
 import {
 	CUA_NAVIGATION_TOOL_NAME,
+	CUA_PLAYWRIGHT_TOOL_NAME,
 	createCuaNavigationToolDefinition,
+	createCuaPlaywrightToolDefinition,
 	type ComputerToolCoordinateSystem,
 	type CuaBatchInput,
 	type CuaNavigationInput,
+	type CuaPlaywrightInput,
 	type CuaScreenshotSpec,
 	type CuaToolExecutorSpec,
 	type TSchema,
@@ -20,6 +23,7 @@ export interface ComputerToolOptions {
 	coordinateSystem?: ComputerToolCoordinateSystem;
 	screenshot?: CuaScreenshotSpec;
 	computerUseExtra?: boolean;
+	playwright?: boolean;
 }
 
 type ToolContent = Array<TextContent | ImageContent>;
@@ -35,31 +39,59 @@ export interface NavigationDetails {
 	url?: string;
 }
 
+/**
+ * Structured details for a `playwright_execute` tool result. Library
+ * consumers can read these directly instead of re-parsing the model-facing
+ * tool content blocks.
+ *
+ * - `success` — whether the Playwright code itself completed without error.
+ *   A `false` value means the code threw or the SDK reported failure; in
+ *   that case the failure is also surfaced as tool content for the model.
+ * - `statusText` — short human-readable status (success or failure summary).
+ * - `result` — present only when the code returned a JSON-serializable value.
+ * - `stdout`/`stderr` — raw daemon output, present whenever the daemon
+ *   reported a non-empty value on that stream (may be whitespace-only).
+ * - `error` — present only when `success` is `false`; the error message from
+ *   the daemon.
+ */
+export interface PlaywrightDetails {
+	success: boolean;
+	statusText: string;
+	result?: unknown;
+	stdout?: string;
+	stderr?: string;
+	error?: string;
+}
+
 type BatchTool = AgentTool<TSchema, BatchDetails>;
 type NavigationTool = AgentTool<TSchema, NavigationDetails>;
+type PlaywrightTool = AgentTool<TSchema, PlaywrightDetails>;
 type ActionTool = AgentTool<TSchema, BatchDetails>;
-export type CuaExecutorTool = BatchTool | NavigationTool | ActionTool;
+export type CuaExecutorTool = BatchTool | NavigationTool | PlaywrightTool | ActionTool;
 type NavigationExecutorSpec = { kind: "navigation"; definition: Tool };
-type ComputerExecutorSpec = CuaToolExecutorSpec | NavigationExecutorSpec;
+type PlaywrightExecutorSpec = { kind: "playwright"; definition: Tool };
+type ComputerExecutorSpec = CuaToolExecutorSpec | NavigationExecutorSpec | PlaywrightExecutorSpec;
 
 export function createCuaComputerTools(args: ComputerToolOptions): CuaExecutorTool[] {
 	return buildCuaComputerTools(args, new InternalComputerTranslator(args));
 }
 
 /** Build executor tools against an existing translator (internal; not part of the package surface). */
 export function buildCuaComputerTools(
-	args: Pick<ComputerToolOptions, "toolExecutors" | "computerUseExtra">,
+	args: Pick<ComputerToolOptions, "toolExecutors" | "computerUseExtra" | "playwright">,
 	translator: InternalComputerTranslator,
 ): CuaExecutorTool[] {
-	return withNavigationTool(args).map((executor) => createExecutorTool(executor, translator));
+	return withExtraTools(args).map((executor) => createExecutorTool(executor, translator));
 }
 
-function withNavigationTool(args: Pick<ComputerToolOptions, "toolExecutors" | "computerUseExtra">): ComputerExecutorSpec[] {
+function withExtraTools(args: Pick<ComputerToolOptions, "toolExecutors" | "computerUseExtra" | "playwright">): ComputerExecutorSpec[] {
 	const executors: ComputerExecutorSpec[] = [...args.toolExecutors];
 	const existing = new Set(executors.map((executor) => executor.definition.name));
 	if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) {
-		const definition = createCuaNavigationToolDefinition();
-		executors.push({ kind: "navigation", definition });
+		executors.push({ kind: "navigation", definition: createCuaNavigationToolDefinition() });
+	}
+	if (args.playwright && !existing.has(CUA_PLAYWRIGHT_TOOL_NAME)) {
+		executors.push({ kind: "playwright", definition: createCuaPlaywrightToolDefinition() });
 	}
 	return executors;
 }
@@ -78,6 +110,19 @@ function createExecutorTool(executor: ComputerExecutorSpec, translator: Internal
 		};
 		return tool;
 	}
+	if (isPlaywrightExecutor(executor)) {
+		const tool: PlaywrightTool = {
+			name: definition.name,
+			label: definition.name,
+			description: definition.description,
+			parameters: definition.parameters,
+			executionMode: "sequential",
+			async execute(_toolCallId: string, params: unknown): Promise<AgentToolResult<PlaywrightDetails>> {
+				return executePlaywrightTool(translator, asPlaywrightInput(params));
+			},
+		};
+		return tool;
+	}
 	const tool: ActionTool = {
 		name: definition.name,
 		label: definition.name,
@@ -95,6 +140,10 @@ function isNavigationExecutor(executor: ComputerExecutorSpec): executor is Navig
 	return "kind" in executor && executor.kind === "navigation";
 }
 
+function isPlaywrightExecutor(executor: ComputerExecutorSpec): executor is PlaywrightExecutorSpec {
+	return "kind" in executor && executor.kind === "playwright";
+}
+
 async function executeBatchTool(translator: InternalComputerTranslator, params: CuaBatchInput): Promise<AgentToolResult<BatchDetails>> {
 	const content: ToolContent = [];
 	const readResults: BatchDetails["readResults"] = [];
@@ -149,6 +198,42 @@ async function executeNavigationTool(translator: InternalComputerTranslator, par
 	}
 }
 
+async function executePlaywrightTool(translator: InternalComputerTranslator, params: CuaPlaywrightInput): Promise<AgentToolResult<PlaywrightDetails>> {
+	try {
+		const execution = await translator.executePlaywright(params.code, params.timeout_sec);
+
+		const content: ToolContent = [];
+		if (execution.result !== undefined) {
+			content.push({ type: "text", text: `result: ${formatPlaywrightResult(execution.result)}` });
+		}
+		if (execution.stdout?.trim()) {
+			content.push({ type: "text", text: `stdout:\n${execution.stdout.trimEnd()}` });
+		}
+		if (execution.stderr?.trim()) {
+			content.push({ type: "text", text: `stderr:\n${execution.stderr.trimEnd()}` });
+		}
+		if (!execution.success) {
+			content.push({ type: "text", text: `error: ${execution.error ?? "playwright execution reported failure"}` });
+		}
+
+		const statusText = execution.success ? "Playwright executed successfully." : `Playwright execution failed: ${execution.error ?? "unknown error"}`;
+		if (content.length === 0) content.push({ type: "text", text: statusText });
+
+		const details: PlaywrightDetails = { success: execution.success, statusText };
+		if (execution.result !== undefined) details.result = execution.result;
+		if (execution.stdout) details.stdout = execution.stdout;
+		if (execution.stderr) details.stderr = execution.stderr;
+		if (execution.error) details.error = execution.error;
+		return { content, details };
+	} catch (err) {
+		throw new Error(`playwright_execute failed: ${errorMessage(err)}`, { cause: err });
+	}
+}
+
+function formatPlaywrightResult(result: unknown): string {
+	return typeof result === "string" ? result : JSON.stringify(result);
+}
+
 function errorMessage(err: unknown): string {
 	return err instanceof Error ? err.message : String(err);
 }
@@ -163,3 +248,10 @@ function asNavigationInput(value: unknown): CuaNavigationInput {
 	}
 	throw new Error("invalid computer_use_extra parameters");
 }
+
+function asPlaywrightInput(value: unknown): CuaPlaywrightInput {
+	if (value && typeof value === "object" && typeof (value as { code?: unknown }).code === "string") {
+		return value as CuaPlaywrightInput;
+	}
+	throw new Error("invalid playwright_execute parameters");
+}
diff --git a/packages/agent/src/translator/translator.ts b/packages/agent/src/translator/translator.ts
@@ -85,6 +85,17 @@ export class InternalComputerTranslator {
 		return { x: Math.trunc(pos.x), y: Math.trunc(pos.y) };
 	}
 
+	async executePlaywright(code: string, timeoutSec?: number): Promise<PlaywrightExecutionResult> {
+		const truncated = timeoutSec !== undefined ? Math.trunc(timeoutSec) : undefined;
+		const timeout = truncated !== undefined && truncated >= 1
+			? Math.min(truncated, PLAYWRIGHT_MAX_TIMEOUT_SEC)
+			: undefined;
+		return this.client.browsers.playwright.execute(this.sessionId, {
+			code,
+			...(timeout !== undefined ? { timeout_sec: timeout } : {}),
+		});
+	}
+
 	async executeBatch(actions: CuaAction[]): Promise<BatchExecutionResult> {
 		const result: BatchExecutionResult = { readResults: [] };
 		const pending: KernelBatchAction[] = [];
@@ -228,6 +239,11 @@ export class InternalComputerTranslator {
 type KernelBatchAction =
 	Parameters<Kernel["browsers"]["computer"]["batch"]>[1]["actions"][number];
 
+export type PlaywrightExecutionResult =
+	Awaited<ReturnType<Kernel["browsers"]["playwright"]["execute"]>>;
+
+const PLAYWRIGHT_MAX_TIMEOUT_SEC = 300;
+
 const CLICK_BUTTONS: ReadonlySet<string> = new Set<CuaMouseButton>(["left", "right", "middle", "back", "forward"]);
 const DRAG_BUTTONS: ReadonlySet<string> = new Set<CuaDragMouseButton>(["left", "right", "middle"]);
 

diff --git a/packages/agent/test/agent.test.ts b/packages/agent/test/agent.test.ts
@@ -144,6 +144,23 @@ describe("CuaAgent", () => {
 		]);
 	});
 
+	it("synthesizes a playwright_execute tool when requested", () => {
+		const runtime = resolveCuaRuntimeSpec("openai:gpt-5.5");
+		const agent = new CuaAgent({
+			browser,
+			client,
+			playwright: true,
+			initialState: {
+				model: "openai:gpt-5.5",
+			},
+		});
+
+		expect(agent.state.tools.map((tool) => tool.name)).toEqual([
+			...runtime.toolExecutors.map((tool) => tool.definition.name),
+			"playwright_execute",
+		]);
+	});
+
 	it("refreshes CUA runtime state when state.model changes", () => {
 		const runtime = resolveCuaRuntimeSpec("google:gemini-3-flash-preview");
 		const agent = new CuaAgent({