diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..e5ab6cb --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,31 @@ +name: Lint + +on: + pull_request: + branches: + - main + push: + branches: + - main + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Setup pnpm + uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0 + + - name: Setup Node.js + uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0 + with: + node-version-file: '.nvmrc' + cache: 'pnpm' + + - name: Install Dependencies + run: pnpm install + + - name: Lint & Type Check + run: pnpm run lint diff --git a/CLAUDE.md b/CLAUDE.md index 7623396..0b77bde 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,45 +16,81 @@ npm start # Run built server from lib/server.js ``` src/ ├── server.ts # MCP server entry, registers all tools + MCP resources +├── session/ +│ ├── state.ts # Session state maps, getBrowser(), getState(), SessionMetadata +│ └── lifecycle.ts # registerSession(), handleSessionTransition(), closeSession() +├── providers/ +│ ├── types.ts # SessionProvider interface, ConnectionConfig +│ ├── local-browser.provider.ts # Chrome/Firefox/Edge/Safari capability building +│ └── local-appium.provider.ts # iOS/Android via appium.config.ts ├── tools/ -│ ├── browser.tool.ts # Session state + start_browser + getBrowser() -│ ├── app-session.tool.ts # start_app_session (iOS/Android via Appium) -│ ├── navigate.tool.ts # URL navigation -│ ├── get-visible-elements.tool.ts # Element detection (web + mobile) -│ ├── click.tool.ts # Click/tap actions +│ ├── session.tool.ts # start_session (browser+mobile), close_session +│ ├── tabs.tool.ts # switch_tab +│ ├── launch-chrome.tool.ts # launch_chrome (remote debugging) +│ ├── navigate.tool.ts # navigateAction() + navigateTool +│ ├── click.tool.ts # clickAction() + clickTool +│ ├── set-value.tool.ts # setValueAction() + setValueTool +│ ├── scroll.tool.ts # scrollAction() + scrollTool +│ ├── gestures.tool.ts # tapAction(), swipeAction(), dragAndDropAction() +│ ├── context.tool.ts # switch_context (native/webview) +│ ├── device.tool.ts # rotate_device, hide_keyboard +│ ├── emulate-device.tool.ts # emulate_device (viewport/UA) +│ ├── cookies.tool.ts # set_cookie, delete_cookies +│ ├── execute-script.tool.ts # execute_script +│ ├── get-elements.tool.ts # get_elements (all elements, incl. below fold) │ └── ... # Other tools follow same pattern +├── resources/ +│ ├── index.ts # ResourceDefinition exports +│ ├── sessions.resource.ts # wdio://sessions, wdio://session/*/steps, wdio://session/*/code +│ ├── elements.resource.ts # wdio://session/current/elements +│ ├── accessibility.resource.ts# wdio://session/current/accessibility +│ ├── screenshot.resource.ts # wdio://session/current/screenshot +│ ├── cookies.resource.ts # wdio://session/current/cookies +│ ├── tabs.resource.ts # wdio://session/current/tabs +│ ├── contexts.resource.ts # wdio://session/current/contexts +│ ├── app-state.resource.ts # wdio://session/current/app-state +│ └── geolocation.resource.ts # wdio://session/current/geolocation ├── recording/ │ ├── step-recorder.ts # withRecording HOF, appendStep, session history access -│ ├── code-generator.ts # SessionHistory → WebdriverIO JS code -│ └── resources.ts # MCP resource builders (sessions index, step log) +│ └── code-generator.ts # SessionHistory → WebdriverIO JS code ├── scripts/ -│ └── get-interactable-browser-elements.ts # Browser-context script +│ ├── get-interactable-browser-elements.ts # Browser-context element detection +│ ├── get-browser-accessibility-tree.ts # Browser-context accessibility tree +│ ├── get-visible-mobile-elements.ts # Mobile visible element detection +│ └── get-elements.ts # Filter + paginate elements (used by tool + resource) ├── locators/ │ ├── element-filter.ts # Platform-specific element classification -│ ├── generate-all-locators.ts # Multi-strategy selector generation -│ └── source-parsing.ts # XML page source parsing for mobile +│ ├── locator-generation.ts # Multi-strategy selector generation +│ ├── xml-parsing.ts # XML page source parsing for mobile +│ ├── constants.ts # Shared locator constants +│ ├── types.ts # Locator type definitions +│ └── index.ts # Public exports ├── config/ -│ └── appium.config.ts # iOS/Android capability builders +│ └── appium.config.ts # iOS/Android capability builders (used by local-appium.provider) +├── utils/ +│ ├── parse-variables.ts # URI template variable parsing (parseBool, parseNumber, etc.) +│ └── zod-helpers.ts # coerceBoolean and other Zod utilities └── types/ ├── tool.ts # ToolDefinition interface + ├── resource.ts # ResourceDefinition interface └── recording.ts # RecordedStep, SessionHistory interfaces ``` ### Session State -Single active session model in `browser.tool.ts`: +Single active session model in `src/session/state.ts`: ```typescript -const browsers: Map = new Map(); -let currentSession: string | null = null; -const sessionMetadata: Map = new Map(); - -export function getBrowser(): WebdriverIO.Browser { - // Returns current active session or throws -} +// Private state — access via getState() or getBrowser() +export function getBrowser(): WebdriverIO.Browser { ... } +export function getState() { return state; } +export interface SessionMetadata { type: 'browser' | 'ios' | 'android'; capabilities: Record; isAttached: boolean; } ``` -State shared with `app-session.tool.ts` via `(getBrowser as any).__state`. +Session lifecycle managed via `src/session/lifecycle.ts`: +- `registerSession()` — registers browser + metadata + history, handles transition sentinel +- `handleSessionTransition()` — appends `__session_transition__` step to outgoing session +- `closeSession()` — terminates or detaches, marks endedAt, cleans up maps ### Tool Pattern @@ -81,17 +117,32 @@ export const myTool: ToolCallback = async ({ param }: { param: string }) => { } }; -// 3. Register in server.ts -server.tool(myToolDefinition.name, myToolDefinition.description, myToolDefinition.inputSchema, myTool); +// 3. Register in server.ts via the registerTool helper +registerTool(myToolDefinition, myTool); ``` ### Recording -All tools are wrapped with `withRecording()` in `server.ts`. Steps accumulate in `state.sessionHistory` (keyed by sessionId). -MCP resources expose history without tool calls: -- `wdio://sessions` — index of all sessions (fixed URI, discoverable via ListResources) -- `wdio://session/current/steps` — current session step log + generated JS (fixed URI) -- `wdio://session/{sessionId}/steps` — any session by ID (URI template, NOT listed by ListResources — see `docs/architecture/mcp-resources-notes.md`) +Selected tools are wrapped with `withRecording()` in `server.ts`. Steps accumulate in `state.sessionHistory` (keyed by sessionId). + +MCP resources expose live session data — all at fixed URIs discoverable via ListResources: + +**Session history:** +- `wdio://sessions` — index of all sessions +- `wdio://session/current/steps` — current session step log +- `wdio://session/current/code` — generated WebdriverIO JS for current session +- `wdio://session/{sessionId}/steps` — step log for any session (URI template) +- `wdio://session/{sessionId}/code` — generated JS for any session (URI template) + +**Live page state (current session):** +- `wdio://session/current/elements` — interactable elements (viewport-only; use `get_elements` tool with `inViewportOnly: false` for all) +- `wdio://session/current/accessibility` — accessibility tree +- `wdio://session/current/screenshot` — screenshot (base64) +- `wdio://session/current/cookies` — browser cookies +- `wdio://session/current/tabs` — open browser tabs +- `wdio://session/current/contexts` — native/webview contexts (mobile) +- `wdio://session/current/app-state` — mobile app state +- `wdio://session/current/geolocation` — device geolocation ### Build @@ -103,25 +154,34 @@ MCP resources expose history without tool calls: | File | Purpose | |----------------------------------------------------|-----------------------------------------------| -| `src/server.ts` | MCP server init, tool registration | -| `src/tools/browser.tool.ts` | Session state management, `getBrowser()` | -| `src/tools/app-session.tool.ts` | Appium session creation | +| `src/server.ts` | MCP server init, tool + resource registration | +| `src/session/state.ts` | Session state maps, `getBrowser()`, `getState()` | +| `src/session/lifecycle.ts` | `registerSession()`, `closeSession()`, session transitions | +| `src/tools/session.tool.ts` | `start_session` (browser + mobile), `close_session` | +| `src/tools/tabs.tool.ts` | `switch_tab` | +| `src/tools/get-elements.tool.ts` | `get_elements` — all elements with filtering + pagination | +| `src/resources/` | All MCP resource definitions (10 files) | +| `src/providers/local-browser.provider.ts` | Chrome/Firefox/Edge/Safari capability building | +| `src/providers/local-appium.provider.ts` | iOS/Android capabilities via appium.config.ts | | `src/scripts/get-interactable-browser-elements.ts` | Browser-context element detection | | `src/locators/` | Mobile element detection + locator generation | -| `src/recording/step-recorder.ts` | `withRecording(toolName, cb)` HOF — wraps every tool for step logging | +| `src/recording/step-recorder.ts` | `withRecording(toolName, cb)` HOF — wraps tools for step logging | | `src/recording/code-generator.ts` | Generates runnable WebdriverIO JS from `SessionHistory` | -| `src/recording/resources.ts` | Builds text for `wdio://sessions` and `wdio://session/*/steps` resources | +| `src/utils/zod-helpers.ts` | `coerceBoolean` for client interop | | `tsup.config.ts` | Build configuration | ## Gotchas ### Console Output -All console methods redirect to stderr. Chrome writes to stdout which corrupts MCP stdio protocol. +All console methods redirect to stderr via `console.error`. Chrome writes to stdout which corrupts MCP stdio protocol. ```typescript // In server.ts - do not remove -console.log = (...args) => process.stderr.write(util.format(...args) + '\n'); +console.log = (...args) => console.error('[LOG]', ...args); +console.info = (...args) => console.error('[INFO]', ...args); +console.warn = (...args) => console.error('[WARN]', ...args); +console.debug = (...args) => console.error('[DEBUG]', ...args); ``` ### Browser Scripts Must Be Self-Contained @@ -129,20 +189,18 @@ console.log = (...args) => process.stderr.write(util.format(...args) + '\n'); `get-interactable-browser-elements.ts` executes in browser context via `browser.execute()`. Cannot use Node.js APIs or external imports. -### Mobile State Sharing Hack +### Auto-Detach Behavior -`app-session.tool.ts` accesses browser.tool.ts state via: +Sessions created with `noReset: true` or without `appPath` automatically detach on close (don't terminate on Appium +server). -```typescript -const state = (getBrowser as any).__state; -``` +### MCP Resource URI Templates -This maintains single-session behavior across browser and mobile. +The MCP SDK only supports path-segment templates `{param}` in resource URIs — NOT RFC 6570 query param syntax `{?param}`. Resources using `{?param}` silently return "Resource not found". Keep resources at fixed URIs; expose parameterised access via tools instead. -### Auto-Detach Behavior +### Scripts vs Tools vs Resources -Sessions created with `noReset: true` or without `appPath` automatically detach on close (don't terminate on Appium -server). +Computation logic belongs in `src/scripts/` (no try/catch, returns raw data). Tools wrap scripts with try/catch and return `{ isError: true, content: [...] }` on failure. Resources wrap scripts and set `mimeType` in the response. ### Error Handling @@ -158,11 +216,12 @@ catch (e) { 1. Create `src/tools/my-tool.tool.ts` 2. Export `myToolDefinition` (Zod schema) and `myTool` (ToolCallback) -3. Import and register in `src/server.ts`: +3. Import and register in `src/server.ts` using the `registerTool` helper: ```typescript import { myToolDefinition, myTool } from './tools/my-tool.tool'; - server.tool(myToolDefinition.name, myToolDefinition.description, myToolDefinition.inputSchema, myTool); + registerTool(myToolDefinition, myTool); ``` + To wrap with recording: `registerTool(myToolDefinition, withRecording('my_tool', myTool));` ## Selector Syntax Reference @@ -179,6 +238,5 @@ catch (e) { See `docs/architecture/` for proposals: -- `session-configuration-proposal.md` — Cloud provider pattern (BrowserStack, SauceLabs) -- `interaction-sequencing-proposal.md` — Batch actions with state delta detection +- `session-configuration-proposal.md` — Cloud provider pattern (BrowserStack, SauceLabs) — providers/types.ts is the extension point - `multi-session-proposal.md` — Parallel sessions for sub-agent coordination \ No newline at end of file diff --git a/docs/architecture/interaction-sequencing-proposal.md b/docs/architecture/interaction-sequencing-proposal.md index c9e01ec..e959c97 100644 --- a/docs/architecture/interaction-sequencing-proposal.md +++ b/docs/architecture/interaction-sequencing-proposal.md @@ -122,6 +122,7 @@ interface SequenceResult { ### Why Stability Matters After clicking a button, the page might: + - Navigate (URL change) - Show a loading spinner - Fetch data and render new elements @@ -201,7 +202,7 @@ src/ 1. Create `interaction.tool.ts` with basic `execute_sequence` 2. Implement action dispatch (reuse existing tool logic) -3. Capture before/after state using `getVisibleElements` +3. Capture before/after state using `getElements` 4. Compute simple delta (appeared/disappeared by selector) ### Phase 2: Stability Detection @@ -241,6 +242,7 @@ execute_sequence({ ``` Response: + ```json { "completed": 3, @@ -271,6 +273,7 @@ execute_sequence({ ``` Response: + ```json { "completed": 2, @@ -300,6 +303,7 @@ execute_sequence({ ``` Response: + ```json { "completed": 1, @@ -331,6 +335,7 @@ execute_sequence({ ``` Response: + ```json { "completed": 3, @@ -361,6 +366,7 @@ Some actions (like `set_value`) rarely cause async changes. Could skip stability ### 2. How to handle infinite loading states? Options: + - Hard timeout (current approach) — returns partial delta - Detect specific loading patterns — report "page still loading" - Let AI decide — return `{ stable: false, reason: 'loading indicator visible' }` @@ -369,7 +375,8 @@ Options: ### 3. Should delta include off-screen elements? -Current `getVisibleElements` filters to viewport by default. For delta: +Current `getElements` filters to viewport by default. For delta: + - Viewport only = might miss elements that scrolled in/out - Full page = more accurate but larger payload @@ -378,6 +385,7 @@ Current `getVisibleElements` filters to viewport by default. For delta: ### 4. Performance: Full diff vs. key signals Two comparison strategies: + - **Full diff**: Compare all elements every poll (accurate, expensive) - **Key signals**: Compare signature only during polling, full diff only at end (fast, might miss rapid changes) @@ -386,6 +394,7 @@ Two comparison strategies: ### 5. What about conditional actions? Should we support: + ```typescript { action: 'click_element', selector: '#cookie-banner', optional: true } ``` @@ -399,6 +408,7 @@ Should we support: ### Existing Tools `execute_sequence` complements existing tools: + - Simple single actions still use `click_element`, `set_value`, etc. - Complex workflows use `execute_sequence` - No breaking changes to existing tools @@ -406,6 +416,7 @@ Should we support: ### Mobile Support Works identically for mobile sessions: + ```typescript execute_sequence({ actions: [ @@ -419,6 +430,7 @@ execute_sequence({ ### Multi-Session (Future) When multi-session support lands: + ```typescript execute_sequence({ sessionId: 'user-a', diff --git a/package.json b/package.json index 2239554..529a324 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,8 @@ "prebundle": "rimraf lib --glob ./*.tgz", "bundle": "tsup && shx chmod +x lib/server.js", "postbundle": "npm pack", - "lint": "eslint src/ --fix && tsc --noEmit", + "lint": "npm run lint:src && npm run lint:tests", + "lint:src": "eslint src/ --fix && tsc --noEmit", "lint:tests": "eslint tests/ --fix && tsc -p tsconfig.test.json --noEmit", "start": "node lib/server.js", "dev": "tsx --watch src/server.ts", diff --git a/src/providers/local-appium.provider.ts b/src/providers/local-appium.provider.ts new file mode 100644 index 0000000..d4ce07a --- /dev/null +++ b/src/providers/local-appium.provider.ts @@ -0,0 +1,101 @@ +import type { SessionProvider, ConnectionConfig } from './types'; +import { buildIOSCapabilities, buildAndroidCapabilities, getAppiumServerConfig } from '../config/appium.config'; + +export type LocalAppiumOptions = { + platform: 'iOS' | 'Android'; + appPath?: string; + deviceName: string; + platformVersion?: string; + automationName?: string; + appiumHost?: string; + appiumPort?: number; + appiumPath?: string; + autoGrantPermissions?: boolean; + autoAcceptAlerts?: boolean; + autoDismissAlerts?: boolean; + appWaitActivity?: string; + udid?: string; + noReset?: boolean; + fullReset?: boolean; + newCommandTimeout?: number; + capabilities?: Record; +}; + +export class LocalAppiumProvider implements SessionProvider { + name = 'local-appium'; + + getConnectionConfig(options: Record): ConnectionConfig { + const config = getAppiumServerConfig({ + hostname: options.appiumHost as string | undefined, + port: options.appiumPort as number | undefined, + path: options.appiumPath as string | undefined, + }); + return { protocol: 'http', ...config }; + } + + buildCapabilities(options: Record): Record { + const platform = options.platform as string; + const appPath = options.appPath as string | undefined; + const deviceName = options.deviceName as string; + const platformVersion = options.platformVersion as string | undefined; + const autoGrantPermissions = options.autoGrantPermissions as boolean | undefined; + const autoAcceptAlerts = options.autoAcceptAlerts as boolean | undefined; + const autoDismissAlerts = options.autoDismissAlerts as boolean | undefined; + const udid = options.udid as string | undefined; + const noReset = options.noReset as boolean | undefined; + const fullReset = options.fullReset as boolean | undefined; + const newCommandTimeout = options.newCommandTimeout as number | undefined; + const appWaitActivity = options.appWaitActivity as string | undefined; + const userCapabilities = (options.capabilities as Record | undefined) ?? {}; + + const capabilities: Record = platform === 'iOS' + ? buildIOSCapabilities(appPath, { + deviceName, + platformVersion, + automationName: (options.automationName as 'XCUITest') || 'XCUITest', + autoGrantPermissions, + autoAcceptAlerts, + autoDismissAlerts, + udid, + noReset, + fullReset, + newCommandTimeout, + }) + : buildAndroidCapabilities(appPath, { + deviceName, + platformVersion, + automationName: (options.automationName as 'UiAutomator2' | 'Espresso') || 'UiAutomator2', + autoGrantPermissions, + autoAcceptAlerts, + autoDismissAlerts, + appWaitActivity, + noReset, + fullReset, + newCommandTimeout, + }); + + const mergedCapabilities = { + ...capabilities, + ...userCapabilities, + }; + + for (const [key, value] of Object.entries(mergedCapabilities)) { + if (value === undefined) { + delete mergedCapabilities[key]; + } + } + + return mergedCapabilities; + } + + getSessionType(options: Record): 'ios' | 'android' { + const platform = options.platform as string; + return platform.toLowerCase() as 'ios' | 'android'; + } + + shouldAutoDetach(options: Record): boolean { + return options.noReset === true || !options.appPath; + } +} + +export const localAppiumProvider = new LocalAppiumProvider(); diff --git a/src/providers/local-browser.provider.ts b/src/providers/local-browser.provider.ts new file mode 100644 index 0000000..50bf303 --- /dev/null +++ b/src/providers/local-browser.provider.ts @@ -0,0 +1,118 @@ +import type { SessionProvider, ConnectionConfig } from './types'; + +export type LocalBrowserOptions = { + browser?: 'chrome' | 'firefox' | 'edge' | 'safari'; + headless?: boolean; + windowWidth?: number; + windowHeight?: number; + capabilities?: Record; +}; + +export class LocalBrowserProvider implements SessionProvider { + name = 'local-browser'; + + getConnectionConfig(_options: Record): ConnectionConfig { + return {}; // local — use WebdriverIO defaults + } + + buildCapabilities(options: Record): Record { + const selectedBrowser = (options.browser as string | undefined) ?? 'chrome'; + const headless = (options.headless as boolean | undefined) ?? true; + const windowWidth = (options.windowWidth as number | undefined) ?? 1920; + const windowHeight = (options.windowHeight as number | undefined) ?? 1080; + const userCapabilities = (options.capabilities as Record | undefined) ?? {}; + + const headlessSupported = selectedBrowser !== 'safari'; + const effectiveHeadless = headless && headlessSupported; + + const chromiumArgs = [ + `--window-size=${windowWidth},${windowHeight}`, + '--no-sandbox', + '--disable-search-engine-choice-screen', + '--disable-infobars', + '--log-level=3', + '--use-fake-device-for-media-stream', + '--use-fake-ui-for-media-stream', + '--disable-web-security', + '--allow-running-insecure-content', + ]; + + if (effectiveHeadless) { + chromiumArgs.push('--headless=new'); + chromiumArgs.push('--disable-gpu'); + chromiumArgs.push('--disable-dev-shm-usage'); + } + + const firefoxArgs: string[] = []; + if (effectiveHeadless && selectedBrowser === 'firefox') { + firefoxArgs.push('-headless'); + } + + const capabilities: Record = { + acceptInsecureCerts: true, + }; + + switch (selectedBrowser) { + case 'chrome': + capabilities.browserName = 'chrome'; + capabilities['goog:chromeOptions'] = { args: chromiumArgs }; + break; + case 'edge': + capabilities.browserName = 'msedge'; + capabilities['ms:edgeOptions'] = { args: chromiumArgs }; + break; + case 'firefox': + capabilities.browserName = 'firefox'; + if (firefoxArgs.length > 0) { + capabilities['moz:firefoxOptions'] = { args: firefoxArgs }; + } + break; + case 'safari': + capabilities.browserName = 'safari'; + break; + } + + const mergedCapabilities: Record = { + ...capabilities, + ...userCapabilities, + 'goog:chromeOptions': this.mergeCapabilityOptions(capabilities['goog:chromeOptions'], userCapabilities['goog:chromeOptions']), + 'ms:edgeOptions': this.mergeCapabilityOptions(capabilities['ms:edgeOptions'], userCapabilities['ms:edgeOptions']), + 'moz:firefoxOptions': this.mergeCapabilityOptions(capabilities['moz:firefoxOptions'], userCapabilities['moz:firefoxOptions']), + }; + + for (const [key, value] of Object.entries(mergedCapabilities)) { + if (value === undefined) { + delete mergedCapabilities[key]; + } + } + + return mergedCapabilities; + } + + getSessionType(_options: Record): 'browser' { + return 'browser'; + } + + shouldAutoDetach(_options: Record): boolean { + return false; + } + + private mergeCapabilityOptions(defaultOptions: unknown, customOptions: unknown): unknown { + if (!defaultOptions || typeof defaultOptions !== 'object' || !customOptions || typeof customOptions !== 'object') { + return customOptions ?? defaultOptions; + } + + const defaultRecord = defaultOptions as Record; + const customRecord = customOptions as Record; + const merged = { ...defaultRecord, ...customRecord }; + if (Array.isArray(defaultRecord.args) || Array.isArray(customRecord.args)) { + merged.args = [ + ...(Array.isArray(defaultRecord.args) ? defaultRecord.args : []), + ...(Array.isArray(customRecord.args) ? customRecord.args : []), + ]; + } + return merged; + } +} + +export const localBrowserProvider = new LocalBrowserProvider(); diff --git a/src/providers/types.ts b/src/providers/types.ts new file mode 100644 index 0000000..816ba39 --- /dev/null +++ b/src/providers/types.ts @@ -0,0 +1,14 @@ +export interface ConnectionConfig { + hostname?: string; + port?: number; + path?: string; + protocol?: string; +} + +export interface SessionProvider { + name: string; + getConnectionConfig(options: Record): ConnectionConfig; + buildCapabilities(options: Record): Record; + getSessionType(options: Record): 'browser' | 'ios' | 'android'; + shouldAutoDetach(options: Record): boolean; +} diff --git a/src/recording/resources.ts b/src/recording/resources.ts deleted file mode 100644 index 8040361..0000000 --- a/src/recording/resources.ts +++ /dev/null @@ -1,55 +0,0 @@ -// src/recording/resources.ts -import type { SessionHistory } from '../types/recording'; -import { generateCode } from './code-generator'; -import { getSessionHistory } from './step-recorder'; -import { getBrowser } from '../tools/browser.tool'; - -function getCurrentSessionId(): string | null { - return (getBrowser as any).__state?.currentSession ?? null; -} - -export interface SessionStepsPayload { - stepsJson: string; - generatedJs: string; -} - -export function buildSessionsIndex(): string { - const histories = getSessionHistory(); - const currentId = getCurrentSessionId(); - const sessions = Array.from(histories.values()).map((h) => ({ - sessionId: h.sessionId, - type: h.type, - startedAt: h.startedAt, - ...(h.endedAt ? { endedAt: h.endedAt } : {}), - stepCount: h.steps.length, - isCurrent: h.sessionId === currentId, - })); - return JSON.stringify({ sessions }); -} - -export function buildCurrentSessionSteps(): SessionStepsPayload | null { - const currentId = getCurrentSessionId(); - if (!currentId) return null; - - return buildSessionStepsById(currentId); -} - -export function buildSessionStepsById(sessionId: string): SessionStepsPayload | null { - const history = getSessionHistory().get(sessionId); - if (!history) return null; - - return buildSessionPayload(history); -} - -function buildSessionPayload(history: SessionHistory): SessionStepsPayload { - const stepsJson = JSON.stringify({ - sessionId: history.sessionId, - type: history.type, - startedAt: history.startedAt, - ...(history.endedAt ? { endedAt: history.endedAt } : {}), - stepCount: history.steps.length, - steps: history.steps, - }); - - return { stepsJson, generatedJs: generateCode(history) }; -} diff --git a/src/recording/step-recorder.ts b/src/recording/step-recorder.ts index d841c45..d87b0a5 100644 --- a/src/recording/step-recorder.ts +++ b/src/recording/step-recorder.ts @@ -1,14 +1,7 @@ // src/recording/step-recorder.ts import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { RecordedStep, SessionHistory } from '../types/recording'; -import { getBrowser } from '../tools/browser.tool'; - -function getState() { - return (getBrowser as any).__state as { - currentSession: string | null; - sessionHistory: Map; - }; -} +import { getState } from '../session/state'; export function appendStep( toolName: string, diff --git a/src/resources/accessibility.resource.ts b/src/resources/accessibility.resource.ts new file mode 100644 index 0000000..1f36cfb --- /dev/null +++ b/src/resources/accessibility.resource.ts @@ -0,0 +1,76 @@ +import type { ResourceDefinition } from '../types/resource'; +import { getBrowser } from '../session/state'; +import { getBrowserAccessibilityTree } from '../scripts/get-browser-accessibility-tree'; +import { encode } from '@toon-format/toon'; + +export async function readAccessibilityTree(params: { + limit?: number; + offset?: number; + roles?: string[]; +}): Promise<{ mimeType: string; text: string }> { + try { + const browser = getBrowser(); + + if (browser.isAndroid || browser.isIOS) { + return { + mimeType: 'text/plain', + text: 'Error: accessibility is browser-only. For mobile apps, use elements resource instead.', + }; + } + + const { limit = 0, offset = 0, roles } = params; + + let nodes = await getBrowserAccessibilityTree(browser); + + if (nodes.length === 0) { + return { mimeType: 'text/plain', text: 'No accessibility tree available' }; + } + + nodes = nodes.filter((n) => n.name && n.name.trim() !== ''); + + if (roles && roles.length > 0) { + const roleSet = new Set(roles.map((r) => r.toLowerCase())); + nodes = nodes.filter((n) => n.role && roleSet.has(n.role.toLowerCase())); + } + + const total = nodes.length; + + if (offset > 0) { + nodes = nodes.slice(offset); + } + if (limit > 0) { + nodes = nodes.slice(0, limit); + } + + const stateKeys = ['level', 'disabled', 'checked', 'expanded', 'selected', 'pressed', 'required', 'readonly'] as const; + const usedKeys = stateKeys.filter((k) => nodes.some((n) => n[k] !== '')); + const trimmed = nodes.map(({ role, name, selector, ...state }) => { + const node: Record = { role, name, selector }; + for (const k of usedKeys) node[k] = state[k]; + return node; + }); + + const result = { + total, + showing: trimmed.length, + hasMore: offset + trimmed.length < total, + nodes: trimmed, + }; + + const toon = encode(result).replace(/,""/g, ',').replace(/"",/g, ','); + + return { mimeType: 'text/plain', text: toon }; + } catch (e) { + return { mimeType: 'text/plain', text: `Error getting accessibility tree: ${e}` }; + } +} + +export const accessibilityResource: ResourceDefinition = { + name: 'session-current-accessibility', + uri: 'wdio://session/current/accessibility', + description: 'Accessibility tree for the current page. Returns all elements by default.', + handler: async () => { + const result = await readAccessibilityTree({}); + return { contents: [{ uri: 'wdio://session/current/accessibility', mimeType: result.mimeType, text: result.text }] }; + }, +}; \ No newline at end of file diff --git a/src/resources/app-state.resource.ts b/src/resources/app-state.resource.ts new file mode 100644 index 0000000..3c5aaab --- /dev/null +++ b/src/resources/app-state.resource.ts @@ -0,0 +1,40 @@ +import type { ResourceDefinition } from '../types/resource'; +import { ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp'; +import { getBrowser } from '../session/state'; + +async function readAppState(bundleId: string): Promise<{ mimeType: string; text: string }> { + try { + const browser = getBrowser(); + + const appIdentifier = browser.isAndroid + ? { appId: bundleId } + : { bundleId: bundleId }; + + const state: string = await browser.execute('mobile: queryAppState', appIdentifier); + + const stateMap: Record = { + 0: 'not installed', + 1: 'not running', + 2: 'running in background (suspended)', + 3: 'running in background', + 4: 'running in foreground', + }; + + return { + mimeType: 'text/plain', + text: `App state for ${bundleId}: ${stateMap[state] || 'unknown: ' + state}`, + }; + } catch (e) { + return { mimeType: 'text/plain', text: `Error getting app state: ${e}` }; + } +} + +export const appStateResource: ResourceDefinition = { + name: 'session-current-app-state', + template: new ResourceTemplate('wdio://session/current/app-state/{bundleId}', { list: undefined }), + description: 'App state for a given bundle ID', + handler: async (uri, variables) => { + const result = await readAppState(variables.bundleId as string); + return { contents: [{ uri: uri.href, mimeType: result.mimeType, text: result.text }] }; + }, +}; \ No newline at end of file diff --git a/src/resources/contexts.resource.ts b/src/resources/contexts.resource.ts new file mode 100644 index 0000000..8ab9759 --- /dev/null +++ b/src/resources/contexts.resource.ts @@ -0,0 +1,42 @@ +import type { ResourceDefinition } from '../types/resource'; +import { getBrowser } from '../session/state'; + +async function readContexts(): Promise<{ mimeType: string; text: string }> { + try { + const browser = getBrowser(); + const contexts = await browser.getContexts(); + return { mimeType: 'application/json', text: JSON.stringify(contexts) }; + } catch (e) { + return { mimeType: 'text/plain', text: `Error: ${e}` }; + } +} + +async function readCurrentContext(): Promise<{ mimeType: string; text: string }> { + try { + const browser = getBrowser(); + const currentContext = await browser.getContext(); + return { mimeType: 'application/json', text: JSON.stringify(currentContext) }; + } catch (e) { + return { mimeType: 'text/plain', text: `Error: ${e}` }; + } +} + +export const contextsResource: ResourceDefinition = { + name: 'session-current-contexts', + uri: 'wdio://session/current/contexts', + description: 'Available contexts (NATIVE_APP, WEBVIEW)', + handler: async () => { + const result = await readContexts(); + return { contents: [{ uri: 'wdio://session/current/contexts', mimeType: result.mimeType, text: result.text }] }; + }, +}; + +export const contextResource: ResourceDefinition = { + name: 'session-current-context', + uri: 'wdio://session/current/context', + description: 'Currently active context', + handler: async () => { + const result = await readCurrentContext(); + return { contents: [{ uri: 'wdio://session/current/context', mimeType: result.mimeType, text: result.text }] }; + }, +}; \ No newline at end of file diff --git a/src/resources/cookies.resource.ts b/src/resources/cookies.resource.ts new file mode 100644 index 0000000..d2ec73a --- /dev/null +++ b/src/resources/cookies.resource.ts @@ -0,0 +1,30 @@ +import type { ResourceDefinition } from '../types/resource'; +import { getBrowser } from '../session/state'; + +async function readCookies(name?: string): Promise<{ mimeType: string; text: string }> { + try { + const browser = getBrowser(); + + if (name) { + const cookie = await browser.getCookies([name]); + if (cookie.length === 0) { + return { mimeType: 'application/json', text: JSON.stringify(null) }; + } + return { mimeType: 'application/json', text: JSON.stringify(cookie[0]) }; + } + const cookies = await browser.getCookies(); + return { mimeType: 'application/json', text: JSON.stringify(cookies) }; + } catch (e) { + return { mimeType: 'application/json', text: JSON.stringify({ error: String(e) }) }; + } +} + +export const cookiesResource: ResourceDefinition = { + name: 'session-current-cookies', + uri: 'wdio://session/current/cookies', + description: 'Cookies for the current session', + handler: async () => { + const result = await readCookies(); + return { contents: [{ uri: 'wdio://session/current/cookies', mimeType: result.mimeType, text: result.text }] }; + }, +}; \ No newline at end of file diff --git a/src/resources/elements.resource.ts b/src/resources/elements.resource.ts new file mode 100644 index 0000000..2e59310 --- /dev/null +++ b/src/resources/elements.resource.ts @@ -0,0 +1,26 @@ +import type { ResourceDefinition } from '../types/resource'; +import { getBrowser } from '../session/state'; +import { getElements } from '../scripts/get-elements'; +import { encode } from '@toon-format/toon'; + +export const elementsResource: ResourceDefinition = { + name: 'session-current-elements', + uri: 'wdio://session/current/elements', + description: 'Interactable elements on the current page', + handler: async () => { + try { + const browser = getBrowser(); + const result = await getElements(browser, {}); + const text = encode(result).replace(/,""/g, ',').replace(/"",/g, ','); + return { contents: [{ uri: 'wdio://session/current/elements', mimeType: 'text/plain', text }] }; + } catch (e) { + return { + contents: [{ + uri: 'wdio://session/current/elements', + mimeType: 'text/plain', + text: `Error getting visible elements: ${e}` + }] + }; + } + }, +}; diff --git a/src/resources/geolocation.resource.ts b/src/resources/geolocation.resource.ts new file mode 100644 index 0000000..cf18c33 --- /dev/null +++ b/src/resources/geolocation.resource.ts @@ -0,0 +1,22 @@ +import type { ResourceDefinition } from '../types/resource'; +import { getBrowser } from '../session/state'; + +async function readGeolocation(): Promise<{ mimeType: string; text: string }> { + try { + const browser = getBrowser(); + const location = await browser.getGeoLocation(); + return { mimeType: 'application/json', text: JSON.stringify(location) }; + } catch (e) { + return { mimeType: 'text/plain', text: `Error: ${e}` }; + } +} + +export const geolocationResource: ResourceDefinition = { + name: 'session-current-geolocation', + uri: 'wdio://session/current/geolocation', + description: 'Current device geolocation', + handler: async () => { + const result = await readGeolocation(); + return { contents: [{ uri: 'wdio://session/current/geolocation', mimeType: result.mimeType, text: result.text }] }; + }, +}; \ No newline at end of file diff --git a/src/resources/index.ts b/src/resources/index.ts new file mode 100644 index 0000000..9dfcc1b --- /dev/null +++ b/src/resources/index.ts @@ -0,0 +1,9 @@ +export * from './sessions.resource'; +export * from './elements.resource'; +export * from './accessibility.resource'; +export * from './screenshot.resource'; +export * from './cookies.resource'; +export * from './app-state.resource'; +export * from './contexts.resource'; +export * from './geolocation.resource'; +export * from './tabs.resource'; \ No newline at end of file diff --git a/src/resources/screenshot.resource.ts b/src/resources/screenshot.resource.ts new file mode 100644 index 0000000..3aad05b --- /dev/null +++ b/src/resources/screenshot.resource.ts @@ -0,0 +1,54 @@ +import type { ResourceDefinition } from '../types/resource'; +import { getBrowser } from '../session/state'; +import sharp from 'sharp'; + +const MAX_DIMENSION = 2000; +const MAX_FILE_SIZE_BYTES = 1024 * 1024; + +async function processScreenshot(screenshotBase64: string): Promise<{ data: Buffer; mimeType: string }> { + const inputBuffer = Buffer.from(screenshotBase64, 'base64'); + let image = sharp(inputBuffer); + const metadata = await image.metadata(); + + const width = metadata.width ?? 0; + const height = metadata.height ?? 0; + + if (width > MAX_DIMENSION || height > MAX_DIMENSION) { + const resizeOptions = width > height ? { width: MAX_DIMENSION } : { height: MAX_DIMENSION }; + image = image.resize(resizeOptions); + } + + let outputBuffer = await image.png({ compressionLevel: 9 }).toBuffer(); + + if (outputBuffer.length > MAX_FILE_SIZE_BYTES) { + let quality = 90; + while (quality >= 10 && outputBuffer.length > MAX_FILE_SIZE_BYTES) { + outputBuffer = await image.jpeg({ quality, mozjpeg: true }).toBuffer(); + quality -= 10; + } + return { data: outputBuffer, mimeType: 'image/jpeg' }; + } + + return { data: outputBuffer, mimeType: 'image/png' }; +} + +async function readScreenshot(): Promise<{ mimeType: string; blob: string }> { + try { + const browser = getBrowser(); + const screenshot = await browser.takeScreenshot(); + const { data, mimeType } = await processScreenshot(screenshot); + return { mimeType, blob: data.toString('base64') }; + } catch (e) { + return { mimeType: 'text/plain', blob: Buffer.from(`Error: ${e}`).toString('base64') }; + } +} + +export const screenshotResource: ResourceDefinition = { + name: 'session-current-screenshot', + uri: 'wdio://session/current/screenshot', + description: 'Screenshot of the current page', + handler: async () => { + const result = await readScreenshot(); + return { contents: [{ uri: 'wdio://session/current/screenshot', mimeType: result.mimeType, blob: result.blob }] }; + }, +}; \ No newline at end of file diff --git a/src/resources/sessions.resource.ts b/src/resources/sessions.resource.ts new file mode 100644 index 0000000..46a6bae --- /dev/null +++ b/src/resources/sessions.resource.ts @@ -0,0 +1,129 @@ +import type { ResourceDefinition } from '../types/resource'; +import { ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp'; +import type { SessionHistory } from '../types/recording'; +import { generateCode } from '../recording/code-generator'; +import { getSessionHistory } from '../recording/step-recorder'; +import { getState } from '../session/state'; + +function getCurrentSessionId(): string | null { + return getState().currentSession; +} + +export interface SessionStepsPayload { + stepsJson: string; + generatedJs: string; +} + +export function buildSessionsIndex(): string { + const histories = getSessionHistory(); + const currentId = getCurrentSessionId(); + const sessions = Array.from(histories.values()).map((h) => ({ + sessionId: h.sessionId, + type: h.type, + startedAt: h.startedAt, + ...(h.endedAt ? { endedAt: h.endedAt } : {}), + stepCount: h.steps.length, + isCurrent: h.sessionId === currentId, + })); + return JSON.stringify({ sessions }); +} + +export function buildCurrentSessionSteps(): SessionStepsPayload | null { + const currentId = getCurrentSessionId(); + if (!currentId) return null; + + return buildSessionStepsById(currentId); +} + +export function buildSessionStepsById(sessionId: string): SessionStepsPayload | null { + const history = getSessionHistory().get(sessionId); + if (!history) return null; + + return buildSessionPayload(history); +} + +function buildSessionPayload(history: SessionHistory): SessionStepsPayload { + const stepsJson = JSON.stringify({ + sessionId: history.sessionId, + type: history.type, + startedAt: history.startedAt, + ...(history.endedAt ? { endedAt: history.endedAt } : {}), + stepCount: history.steps.length, + steps: history.steps, + }); + + return { stepsJson, generatedJs: generateCode(history) }; +} + +export const sessionsIndexResource: ResourceDefinition = { + name: 'sessions', + uri: 'wdio://sessions', + description: 'JSON index of all browser and app sessions with metadata and step counts', + handler: async () => ({ + contents: [{ uri: 'wdio://sessions', mimeType: 'application/json', text: buildSessionsIndex() }], + }), +}; + +export const sessionCurrentStepsResource: ResourceDefinition = { + name: 'session-current-steps', + uri: 'wdio://session/current/steps', + description: 'JSON step log for the currently active session', + handler: async () => { + const payload = buildCurrentSessionSteps(); + return { + contents: [{ + uri: 'wdio://session/current/steps', + mimeType: 'application/json', + text: payload?.stepsJson ?? '{"error":"No active session"}', + }], + }; + }, +}; + +export const sessionCurrentCodeResource: ResourceDefinition = { + name: 'session-current-code', + uri: 'wdio://session/current/code', + description: 'Generated WebdriverIO JS code for the currently active session', + handler: async () => { + const payload = buildCurrentSessionSteps(); + return { + contents: [{ + uri: 'wdio://session/current/code', + mimeType: 'text/plain', + text: payload?.generatedJs ?? '// No active session', + }], + }; + }, +}; + +export const sessionStepsResource: ResourceDefinition = { + name: 'session-steps', + template: new ResourceTemplate('wdio://session/{sessionId}/steps', { list: undefined }), + description: 'JSON step log for a specific session by ID', + handler: async (uri, { sessionId }) => { + const payload = buildSessionStepsById(sessionId as string); + return { + contents: [{ + uri: uri.href, + mimeType: 'application/json', + text: payload?.stepsJson ?? `{"error":"Session not found: ${sessionId}"}`, + }], + }; + }, +}; + +export const sessionCodeResource: ResourceDefinition = { + name: 'session-code', + template: new ResourceTemplate('wdio://session/{sessionId}/code', { list: undefined }), + description: 'Generated WebdriverIO JS code for a specific session by ID', + handler: async (uri, { sessionId }) => { + const payload = buildSessionStepsById(sessionId as string); + return { + contents: [{ + uri: uri.href, + mimeType: 'text/plain', + text: payload?.generatedJs ?? `// Session not found: ${sessionId}`, + }], + }; + }, +}; \ No newline at end of file diff --git a/src/resources/tabs.resource.ts b/src/resources/tabs.resource.ts new file mode 100644 index 0000000..3f0cbca --- /dev/null +++ b/src/resources/tabs.resource.ts @@ -0,0 +1,34 @@ +import type { ResourceDefinition } from '../types/resource'; +import { getBrowser } from '../session/state'; + +async function readTabs(): Promise<{ mimeType: string; text: string }> { + try { + const browser = getBrowser(); + const handles = await browser.getWindowHandles(); + const currentHandle = await browser.getWindowHandle(); + const tabs = []; + for (const handle of handles) { + await browser.switchToWindow(handle); + tabs.push({ + handle, + title: await browser.getTitle(), + url: await browser.getUrl(), + isActive: handle === currentHandle, + }); + } + await browser.switchToWindow(currentHandle); + return { mimeType: 'application/json', text: JSON.stringify(tabs) }; + } catch (e) { + return { mimeType: 'text/plain', text: `Error: ${e}` }; + } +} + +export const tabsResource: ResourceDefinition = { + name: 'session-current-tabs', + uri: 'wdio://session/current/tabs', + description: 'Browser tabs in the current session', + handler: async () => { + const result = await readTabs(); + return { contents: [{ uri: 'wdio://session/current/tabs', mimeType: result.mimeType, text: result.text }] }; + }, +}; \ No newline at end of file diff --git a/src/scripts/get-elements.ts b/src/scripts/get-elements.ts new file mode 100644 index 0000000..d839d22 --- /dev/null +++ b/src/scripts/get-elements.ts @@ -0,0 +1,57 @@ +import { getInteractableBrowserElements } from './get-interactable-browser-elements'; +import { getMobileVisibleElements } from './get-visible-mobile-elements'; + +export type VisibleElementsResult = { + total: number; + showing: number; + hasMore: boolean; + elements: unknown[]; +}; + +export async function getElements( + browser: WebdriverIO.Browser, + params: { + inViewportOnly?: boolean; + includeContainers?: boolean; + includeBounds?: boolean; + limit?: number; + offset?: number; + }, +): Promise { + const { + inViewportOnly = true, + includeContainers = false, + includeBounds = false, + limit = 0, + offset = 0, + } = params; + + let elements: { isInViewport?: boolean }[]; + + if (browser.isAndroid || browser.isIOS) { + const platform = browser.isAndroid ? 'android' : 'ios'; + elements = await getMobileVisibleElements(browser, platform, { includeContainers, includeBounds }); + } else { + elements = await getInteractableBrowserElements(browser, { includeBounds }); + } + + if (inViewportOnly) { + elements = elements.filter((el) => el.isInViewport !== false); + } + + const total = elements.length; + + if (offset > 0) { + elements = elements.slice(offset); + } + if (limit > 0) { + elements = elements.slice(0, limit); + } + + return { + total, + showing: elements.length, + hasMore: offset + elements.length < total, + elements, + }; +} diff --git a/src/server.ts b/src/server.ts index 60612b6..f0937c7 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,31 +1,20 @@ #!/usr/bin/env node import pkg from '../package.json' with { type: 'json' }; -import { McpServer, ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js'; +import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import type { ToolDefinition } from './types/tool'; -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import { - closeSessionTool, - closeSessionToolDefinition, - startBrowserTool, - startBrowserToolDefinition -} from './tools/browser.tool'; +import type { ResourceDefinition } from './types/resource'; import { navigateTool, navigateToolDefinition } from './tools/navigate.tool'; import { clickTool, clickToolDefinition } from './tools/click.tool'; import { setValueTool, setValueToolDefinition } from './tools/set-value.tool'; import { scrollTool, scrollToolDefinition } from './tools/scroll.tool'; -import { getVisibleElementsTool, getVisibleElementsToolDefinition } from './tools/get-visible-elements.tool'; -import { getAccessibilityToolDefinition, getAccessibilityTreeTool } from './tools/get-accessibility-tree.tool'; -import { takeScreenshotTool, takeScreenshotToolDefinition } from './tools/take-screenshot.tool'; import { deleteCookiesTool, deleteCookiesToolDefinition, - getCookiesTool, - getCookiesToolDefinition, setCookieTool, setCookieToolDefinition, } from './tools/cookies.tool'; -import { startAppTool, startAppToolDefinition } from './tools/app-session.tool'; import { dragAndDropTool, dragAndDropToolDefinition, @@ -34,18 +23,8 @@ import { tapElementTool, tapElementToolDefinition, } from './tools/gestures.tool'; -import { getAppStateTool, getAppStateToolDefinition } from './tools/app-actions.tool'; +import { switchContextTool, switchContextToolDefinition } from './tools/context.tool'; import { - getContextsTool, - getContextsToolDefinition, - getCurrentContextTool, - getCurrentContextToolDefinition, - switchContextTool, - switchContextToolDefinition -} from './tools/context.tool'; -import { - getGeolocationTool, - getGeolocationToolDefinition, hideKeyboardTool, hideKeyboardToolDefinition, rotateDeviceTool, @@ -54,17 +33,33 @@ import { setGeolocationToolDefinition, } from './tools/device.tool'; import { executeScriptTool, executeScriptToolDefinition } from './tools/execute-script.tool'; -import { attachBrowserTool, attachBrowserToolDefinition } from './tools/attach-browser.tool'; +import { getElementsTool, getElementsToolDefinition } from './tools/get-elements.tool'; import { launchChromeTool, launchChromeToolDefinition } from './tools/launch-chrome.tool'; import { emulateDeviceTool, emulateDeviceToolDefinition } from './tools/emulate-device.tool'; import { withRecording } from './recording/step-recorder'; -import { buildCurrentSessionSteps, buildSessionsIndex, buildSessionStepsById } from './recording/resources'; - -// IMPORTANT: Redirect all console output to stderr to avoid messing with MCP protocol (Chrome writes to console) -const _originalConsoleLog = console.log; -const _originalConsoleInfo = console.info; -const _originalConsoleWarn = console.warn; -const _originalConsoleDebug = console.debug; +import { + accessibilityResource, + appStateResource, + contextResource, + contextsResource, + cookiesResource, + elementsResource, + geolocationResource, + screenshotResource, + sessionCodeResource, + sessionCurrentCodeResource, + sessionCurrentStepsResource, + sessionsIndexResource, + sessionStepsResource, + tabsResource, +} from './resources'; +import { + closeSessionTool, + closeSessionToolDefinition, + startSessionTool, + startSessionToolDefinition +} from './tools/session.tool'; +import { switchTabTool, switchTabToolDefinition } from './tools/tabs.tool'; console.log = (...args) => console.error('[LOG]', ...args); console.info = (...args) => console.error('[INFO]', ...args); @@ -85,136 +80,74 @@ const server = new McpServer({ }, }); -// Helper function to register tools using the new registerTool pattern const registerTool = (definition: ToolDefinition, callback: ToolCallback) => server.registerTool(definition.name, { description: definition.description, inputSchema: definition.inputSchema, }, callback); -// Browser and App Session Management -registerTool(startBrowserToolDefinition, withRecording('start_browser', startBrowserTool)); -registerTool(startAppToolDefinition, withRecording('start_app_session', startAppTool)); +const registerResource = (definition: ResourceDefinition) => { + if ('uri' in definition) { + server.registerResource( + definition.name, + definition.uri, + { description: definition.description }, + definition.handler, + ); + } else { + server.registerResource( + definition.name, + definition.template, + { description: definition.description }, + definition.handler, + ); + } +}; + +registerTool(startSessionToolDefinition, withRecording('start_session', startSessionTool)); registerTool(closeSessionToolDefinition, closeSessionTool); registerTool(launchChromeToolDefinition, withRecording('launch_chrome', launchChromeTool)); -registerTool(attachBrowserToolDefinition, withRecording('attach_browser', attachBrowserTool)); registerTool(emulateDeviceToolDefinition, emulateDeviceTool); registerTool(navigateToolDefinition, withRecording('navigate', navigateTool)); -// Element Discovery -registerTool(getVisibleElementsToolDefinition, getVisibleElementsTool); -registerTool(getAccessibilityToolDefinition, getAccessibilityTreeTool); +registerTool(switchTabToolDefinition, switchTabTool); -// Scrolling registerTool(scrollToolDefinition, withRecording('scroll', scrollTool)); -// Element Interaction registerTool(clickToolDefinition, withRecording('click_element', clickTool)); registerTool(setValueToolDefinition, withRecording('set_value', setValueTool)); -// Screenshots -registerTool(takeScreenshotToolDefinition, takeScreenshotTool); - -// Cookies -registerTool(getCookiesToolDefinition, getCookiesTool); registerTool(setCookieToolDefinition, setCookieTool); registerTool(deleteCookiesToolDefinition, deleteCookiesTool); -// Mobile Gesture Tools registerTool(tapElementToolDefinition, withRecording('tap_element', tapElementTool)); registerTool(swipeToolDefinition, withRecording('swipe', swipeTool)); registerTool(dragAndDropToolDefinition, withRecording('drag_and_drop', dragAndDropTool)); -// App Lifecycle Management -registerTool(getAppStateToolDefinition, getAppStateTool); - -// Context Switching (Native/WebView) -registerTool(getContextsToolDefinition, getContextsTool); -registerTool(getCurrentContextToolDefinition, getCurrentContextTool); registerTool(switchContextToolDefinition, switchContextTool); -// Device Interaction registerTool(rotateDeviceToolDefinition, rotateDeviceTool); registerTool(hideKeyboardToolDefinition, hideKeyboardTool); -registerTool(getGeolocationToolDefinition, getGeolocationTool); registerTool(setGeolocationToolDefinition, setGeolocationTool); -// Script Execution (Browser JS / Appium Mobile Commands) registerTool(executeScriptToolDefinition, executeScriptTool); - -// Session Recording Resources -server.registerResource( - 'sessions', - 'wdio://sessions', - { description: 'JSON index of all browser and app sessions with metadata and step counts' }, - async () => ({ - contents: [{ uri: 'wdio://sessions', mimeType: 'application/json', text: buildSessionsIndex() }], - }), -); - -server.registerResource( - 'session-current-steps', - 'wdio://session/current/steps', - { description: 'JSON step log for the currently active session' }, - async () => { - const payload = buildCurrentSessionSteps(); - return { - contents: [{ - uri: 'wdio://session/current/steps', - mimeType: 'application/json', - text: payload?.stepsJson ?? '{"error":"No active session"}' - }], - }; - }, -); - -server.registerResource( - 'session-current-code', - 'wdio://session/current/code', - { description: 'Generated WebdriverIO JS code for the currently active session' }, - async () => { - const payload = buildCurrentSessionSteps(); - return { - contents: [{ - uri: 'wdio://session/current/code', - mimeType: 'text/plain', - text: payload?.generatedJs ?? '// No active session' - }], - }; - }, -); - -server.registerResource( - 'session-steps', - new ResourceTemplate('wdio://session/{sessionId}/steps', { list: undefined }), - { description: 'JSON step log for a specific session by ID' }, - async (uri, { sessionId }) => { - const payload = buildSessionStepsById(sessionId as string); - return { - contents: [{ - uri: uri.href, - mimeType: 'application/json', - text: payload?.stepsJson ?? `{"error":"Session not found: ${sessionId}"}` - }], - }; - }, -); - -server.registerResource( - 'session-code', - new ResourceTemplate('wdio://session/{sessionId}/code', { list: undefined }), - { description: 'Generated WebdriverIO JS code for a specific session by ID' }, - async (uri, { sessionId }) => { - const payload = buildSessionStepsById(sessionId as string); - return { - contents: [{ - uri: uri.href, - mimeType: 'text/plain', - text: payload?.generatedJs ?? `// Session not found: ${sessionId}` - }], - }; - }, -); +registerTool(getElementsToolDefinition, getElementsTool); + +registerResource(sessionsIndexResource); +registerResource(sessionCurrentStepsResource); +registerResource(sessionCurrentCodeResource); +registerResource(sessionStepsResource); +registerResource(sessionCodeResource); + +registerResource(elementsResource); +registerResource(accessibilityResource); +registerResource(screenshotResource); +registerResource(cookiesResource); +registerResource(appStateResource); +registerResource(contextsResource); +registerResource(contextResource); +registerResource(geolocationResource); +registerResource(tabsResource); async function main() { const transport = new StdioServerTransport(); @@ -225,4 +158,4 @@ async function main() { main().catch((error) => { console.error('Fatal error in main():', error); process.exit(1); -}); +}); \ No newline at end of file diff --git a/src/session/lifecycle.ts b/src/session/lifecycle.ts new file mode 100644 index 0000000..ec2767d --- /dev/null +++ b/src/session/lifecycle.ts @@ -0,0 +1,57 @@ +import type { SessionHistory } from '../types/recording'; +import type { SessionMetadata } from './state'; +import { getState } from './state'; + +export function handleSessionTransition(newSessionId: string): void { + const state = getState(); + if (state.currentSession && state.currentSession !== newSessionId) { + const outgoing = state.sessionHistory.get(state.currentSession); + if (outgoing) { + outgoing.steps.push({ + index: outgoing.steps.length + 1, + tool: '__session_transition__', + params: { newSessionId }, + status: 'ok', + durationMs: 0, + timestamp: new Date().toISOString(), + }); + outgoing.endedAt = new Date().toISOString(); + } + } +} + +export function registerSession( + sessionId: string, + browser: WebdriverIO.Browser, + metadata: SessionMetadata, + historyEntry: SessionHistory, +): void { + const state = getState(); + if (state.currentSession && state.currentSession !== sessionId) { + handleSessionTransition(sessionId); + } + state.browsers.set(sessionId, browser); + state.sessionMetadata.set(sessionId, metadata); + state.sessionHistory.set(sessionId, historyEntry); + state.currentSession = sessionId; +} + +export async function closeSession(sessionId: string, detach: boolean, isAttached: boolean): Promise { + const state = getState(); + const browser = state.browsers.get(sessionId); + if (!browser) return; + + const history = state.sessionHistory.get(sessionId); + if (history) { + history.endedAt = new Date().toISOString(); + } + + // Only terminate the WebDriver session if we created it (not attached/borrowed) + if (!detach && !isAttached) { + await browser.deleteSession(); + } + + state.browsers.delete(sessionId); + state.sessionMetadata.delete(sessionId); + state.currentSession = null; +} diff --git a/src/session/state.ts b/src/session/state.ts new file mode 100644 index 0000000..5343668 --- /dev/null +++ b/src/session/state.ts @@ -0,0 +1,26 @@ +import type { SessionHistory } from '../types/recording'; + +export interface SessionMetadata { + type: 'browser' | 'ios' | 'android'; + capabilities: Record; + isAttached: boolean; +} + +const state = { + browsers: new Map(), + currentSession: null as string | null, + sessionMetadata: new Map(), + sessionHistory: new Map(), +}; + +export function getBrowser(): WebdriverIO.Browser { + const browser = state.browsers.get(state.currentSession); + if (!browser) { + throw new Error('No active browser session'); + } + return browser; +} + +export function getState() { + return state; +} diff --git a/src/tools/app-actions.tool.ts b/src/tools/app-actions.tool.ts deleted file mode 100644 index faccec4..0000000 --- a/src/tools/app-actions.tool.ts +++ /dev/null @@ -1,52 +0,0 @@ -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; -import type { ToolDefinition } from '../types/tool'; -import { z } from 'zod'; -import { getBrowser } from './browser.tool'; - -// Get App State Tool -export const getAppStateToolDefinition: ToolDefinition = { - name: 'get_app_state', - description: 'gets the state of an app (not installed, not running, background, foreground)', - inputSchema: { - bundleId: z.string().describe('App bundle ID (e.g., com.example.app)'), - }, -}; - -export const getAppStateTool: ToolCallback = async (args: { - bundleId: string; -}): Promise => { - try { - const browser = getBrowser(); - const { bundleId } = args; - - const appIdentifier = browser.isAndroid - ? { appId: bundleId } - : { bundleId: bundleId }; - - const state: string = await browser.execute('mobile: queryAppState', appIdentifier); - - const stateMap: Record = { - 0: 'not installed', - 1: 'not running', - 2: 'running in background (suspended)', - 3: 'running in background', - 4: 'running in foreground', - }; - - return { - content: [ - { - type: 'text', - text: `App state for ${bundleId}: ${stateMap[state] || 'unknown: ' + state}`, - }, - ], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error getting app state: ${e}` }], - }; - } -}; - diff --git a/src/tools/app-session.tool.ts b/src/tools/app-session.tool.ts deleted file mode 100644 index b553be8..0000000 --- a/src/tools/app-session.tool.ts +++ /dev/null @@ -1,212 +0,0 @@ -import { remote } from 'webdriverio'; -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; -import type { ToolDefinition } from '../types/tool'; -import type { SessionHistory } from '../types/recording'; -import { z } from 'zod'; -import { buildAndroidCapabilities, buildIOSCapabilities, getAppiumServerConfig, } from '../config/appium.config'; -import { getBrowser } from './browser.tool'; - -export const startAppToolDefinition: ToolDefinition = { - name: 'start_app_session', - description: 'starts a mobile app session (iOS/Android) via Appium', - inputSchema: { - platform: z.enum(['iOS', 'Android']).describe('Mobile platform'), - appPath: z.string().optional().describe('Path to the app file (.app/.apk/.ipa). Required unless noReset=true (connecting to already-running app)'), - deviceName: z.string().describe('Device/emulator/simulator name'), - platformVersion: z.string().optional().describe('OS version (e.g., "17.0", "14")'), - automationName: z - .enum(['XCUITest', 'UiAutomator2', 'Espresso']) - .optional() - .describe('Automation driver name'), - appiumHost: z.string().optional().describe('Appium server hostname (overrides APPIUM_URL env var)'), - appiumPort: z.number().optional().describe('Appium server port (overrides APPIUM_URL_PORT env var)'), - appiumPath: z.string().optional().describe('Appium server path (overrides APPIUM_PATH env var)'), - autoGrantPermissions: z.boolean().optional().describe('Auto-grant app permissions (default: true)'), - autoAcceptAlerts: z.boolean().optional().describe('Auto-accept alerts (default: true)'), - autoDismissAlerts: z.boolean().optional().describe('Auto-dismiss alerts (default: false, will override "autoAcceptAlerts" to undefined if set)'), - appWaitActivity: z.string().optional().describe('Activity to wait for on launch (Android only)'), - udid: z.string().optional().describe('Unique Device Identifier for iOS real device testing (e.g., "00008030-001234567890002E")'), - noReset: z.boolean().optional().describe('Do not reset app state before session (preserves app data). Default: false'), - fullReset: z.boolean().optional().describe('Uninstall app before/after session. Default: true. Set to false with noReset=true to preserve app state completely'), - newCommandTimeout: z.number().min(0).optional().default(300).describe('How long (in seconds) Appium will wait for a new command before assuming the client has quit and ending the session. Default: 300.'), - capabilities: z.record(z.string(), z.unknown()).optional().describe('Additional Appium/WebDriver capabilities to merge with defaults (e.g. appium:udid, appium:chromedriverExecutable, appium:autoWebview)'), - }, -}; - -// Access shared state from browser.tool.ts -export const getState = () => { - const sharedState = (getBrowser as any).__state; - if (!sharedState) { - throw new Error('Browser state not initialized'); - } - return sharedState as { - browsers: Map; - currentSession: string | null; - sessionMetadata: Map; - sessionHistory: Map; - }; -}; - -export const startAppTool: ToolCallback = async (args: { - platform: 'iOS' | 'Android'; - appPath?: string; - deviceName: string; - platformVersion?: string; - automationName?: 'XCUITest' | 'UiAutomator2' | 'Espresso'; - appiumHost?: string; - appiumPort?: number; - appiumPath?: string; - autoGrantPermissions?: boolean; - autoAcceptAlerts?: boolean; - autoDismissAlerts?: boolean; - appWaitActivity?: string; - udid?: string; - noReset?: boolean; - fullReset?: boolean; - newCommandTimeout?: number; - capabilities?: Record; -}): Promise => { - try { - const { - platform, - appPath, - deviceName, - platformVersion, - automationName, - appiumHost, - appiumPort, - appiumPath, - autoGrantPermissions = true, - autoAcceptAlerts, - autoDismissAlerts, - appWaitActivity, - udid, - noReset, - fullReset, - newCommandTimeout = 300, - capabilities: userCapabilities = {}, - } = args; - - // Validate: either appPath or noReset=true is required - if (!appPath && noReset !== true) { - return { - content: [{ - type: 'text', - text: 'Error: Either "appPath" must be provided to install an app, or "noReset: true" must be set to connect to an already-running app.', - }], - }; - } - - // Get Appium server configuration - const serverConfig = getAppiumServerConfig({ - hostname: appiumHost, - port: appiumPort, - path: appiumPath, - }); - - // Build platform-specific capabilities - const capabilities: Record = platform === 'iOS' - ? buildIOSCapabilities(appPath, { - deviceName, - platformVersion, - automationName: (automationName as 'XCUITest') || 'XCUITest', - autoGrantPermissions, - autoAcceptAlerts, - autoDismissAlerts, - udid, - noReset, - fullReset, - newCommandTimeout, - }) - : buildAndroidCapabilities(appPath, { - deviceName, - platformVersion, - automationName: (automationName as 'UiAutomator2' | 'Espresso') || 'UiAutomator2', - autoGrantPermissions, - autoAcceptAlerts, - autoDismissAlerts, - appWaitActivity, - noReset, - fullReset, - newCommandTimeout, - }); - - const mergedCapabilities = { - ...capabilities, - ...userCapabilities, - }; - for (const [key, value] of Object.entries(mergedCapabilities)) { - if (value === undefined) { - delete mergedCapabilities[key]; - } - } - - // Create Appium session - const browser = await remote({ - protocol: 'http', - hostname: serverConfig.hostname, - port: serverConfig.port, - path: serverConfig.path, - capabilities: mergedCapabilities, - }); - - const { sessionId } = browser; - - // Store session and metadata - // Auto-set isAttached=true when noReset or no appPath to preserve session on close - const shouldAutoDetach = noReset === true || !appPath; - const state = getState(); - state.browsers.set(sessionId, browser); - state.sessionMetadata.set(sessionId, { - type: platform.toLowerCase() as 'ios' | 'android', - capabilities: mergedCapabilities, - isAttached: shouldAutoDetach, - }); - - // If replacing an active session, close its history with transition sentinel - if (state.currentSession && state.currentSession !== sessionId) { - const outgoing = state.sessionHistory.get(state.currentSession); - if (outgoing) { - outgoing.steps.push({ - index: outgoing.steps.length + 1, - tool: '__session_transition__', - params: { newSessionId: sessionId }, - status: 'ok', - durationMs: 0, - timestamp: new Date().toISOString(), - }); - outgoing.endedAt = new Date().toISOString(); - } - } - - state.sessionHistory.set(sessionId, { - sessionId, - type: platform.toLowerCase() as 'ios' | 'android', - startedAt: new Date().toISOString(), - capabilities: mergedCapabilities as Record, - appiumConfig: { hostname: serverConfig.hostname, port: serverConfig.port, path: serverConfig.path }, - steps: [], - }); - - state.currentSession = sessionId; - - const appInfo = appPath ? `\nApp: ${appPath}` : '\nApp: (connected to running app)'; - const detachNote = shouldAutoDetach - ? '\n\n(Auto-detach enabled: session will be preserved on close. Use close_session({ detach: false }) to force terminate.)' - : ''; - return { - content: [ - { - type: 'text', - text: `${platform} app session started with sessionId: ${sessionId}\nDevice: ${deviceName}${appInfo}\nAppium Server: ${serverConfig.hostname}:${serverConfig.port}${serverConfig.path}${detachNote}`, - }, - ], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error starting app session: ${e}` }], - }; - } -}; diff --git a/src/tools/attach-browser.tool.ts b/src/tools/attach-browser.tool.ts deleted file mode 100644 index e576b05..0000000 --- a/src/tools/attach-browser.tool.ts +++ /dev/null @@ -1,158 +0,0 @@ -import { remote } from 'webdriverio'; -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; -import type { ToolDefinition } from '../types/tool'; -import { z } from 'zod'; -import { getBrowser } from './browser.tool'; - -export const attachBrowserToolDefinition: ToolDefinition = { - name: 'attach_browser', - description: `Attach to a Chrome instance already running with --remote-debugging-port. - -Use launch_chrome() first to prepare and launch Chrome with remote debugging enabled.`, - inputSchema: { - port: z.number().default(9222).describe('Chrome remote debugging port (default: 9222)'), - host: z.string().default('localhost').describe('Host where Chrome is running (default: localhost)'), - navigationUrl: z.string().optional().describe('URL to navigate to immediately after attaching'), - }, -}; - -type TabSnapshot = { activeTabUrl: string | undefined; allTabUrls: string[] }; - -// ChromeDriver injects a BiDi-CDP Mapper page when creating a session. If the previous session -// was detached without proper cleanup, this target remains and causes "unexpected alert open" on -// the next attach attempt. Close any stale mappers before creating a new session. -// Returns the active tab URL (first real page tab) and all page tab URLs — Chrome lists the -// active/focused tab first in /json. -async function closeStaleMappers(host: string, port: number): Promise { - try { - const res = await fetch(`http://${host}:${port}/json`); - const targets = await res.json() as { id: string; title: string; type: string; url: string }[]; - const mappers = targets.filter((t) => t.title?.includes('BiDi')); - await Promise.all(mappers.map((t) => fetch(`http://${host}:${port}/json/close/${t.id}`))); - const pages = targets.filter((t) => t.type === 'page' && !t.title?.includes('BiDi')); - return { activeTabUrl: pages[0]?.url, allTabUrls: pages.map((t) => t.url) }; - } catch { - return { activeTabUrl: undefined, allTabUrls: [] }; - } -} - -// After CDP session init, Chrome blanks the first tab it takes over. This restores any tabs -// that became about:blank and then switches focus to the originally active tab. -async function restoreAndSwitchToActiveTab( - browser: WebdriverIO.Browser, - activeTabUrl: string, - allTabUrls: string[], -): Promise { - const handles = await browser.getWindowHandles(); - const currentUrls: string[] = []; - for (const handle of handles) { - await browser.switchToWindow(handle); - currentUrls.push(await browser.getUrl()); - } - - // Restore blank tabs that had a known URL before attaching. - const missingUrls = allTabUrls.filter((u) => !currentUrls.includes(u)); - let missingIdx = 0; - for (let i = 0; i < handles.length; i++) { - if (currentUrls[i] === 'about:blank' && missingIdx < missingUrls.length) { - await browser.switchToWindow(handles[i]); - await browser.url(missingUrls[missingIdx]); - currentUrls[i] = missingUrls[missingIdx++]; - } - } - - // Switch to the originally active tab. - for (let i = 0; i < handles.length; i++) { - if (currentUrls[i] === activeTabUrl) { - await browser.switchToWindow(handles[i]); - break; - } - } -} - -async function waitForCDP(host: string, port: number, timeoutMs = 10000): Promise { - const deadline = Date.now() + timeoutMs; - while (Date.now() < deadline) { - try { - const res = await fetch(`http://${host}:${port}/json/version`); - if (res.ok) return; - } catch { - // not ready yet - } - await new Promise((r) => setTimeout(r, 300)); - } - throw new Error(`Chrome did not expose CDP on ${host}:${port} within ${timeoutMs}ms`); -} - -export const attachBrowserTool: ToolCallback = async ({ - port = 9222, - host = 'localhost', - navigationUrl, -}: { - port?: number; - host?: string; - navigationUrl?: string; -}): Promise => { - try { - const state = (getBrowser as any).__state; - - await waitForCDP(host, port); - const { activeTabUrl, allTabUrls } = await closeStaleMappers(host, port); - - const browser = await remote({ - connectionRetryTimeout: 30000, - connectionRetryCount: 3, - capabilities: { - browserName: 'chrome', - unhandledPromptBehavior: 'dismiss', - webSocketUrl: false, - 'goog:chromeOptions': { - debuggerAddress: `${host}:${port}`, - }, - }, - }); - - const { sessionId } = browser; - state.browsers.set(sessionId, browser); - state.currentSession = sessionId; - state.sessionMetadata.set(sessionId, { - type: 'browser', - capabilities: browser.capabilities, - isAttached: true, - }); - state.sessionHistory.set(sessionId, { - sessionId, - type: 'browser', - startedAt: new Date().toISOString(), - capabilities: { - browserName: 'chrome', - 'goog:chromeOptions': { - debuggerAddress: `${host}:${port}`, - }, - }, - steps: [], - }); - - if (navigationUrl) { - await browser.url(navigationUrl); - } else if (activeTabUrl) { - await restoreAndSwitchToActiveTab(browser, activeTabUrl, allTabUrls); - } - - const title = await browser.getTitle(); - const url = await browser.getUrl(); - - return { - content: [{ - type: 'text', - text: `Attached to Chrome on ${host}:${port}\nSession ID: ${sessionId}\nCurrent page: "${title}" (${url})`, - }], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error attaching to browser: ${e}` }], - }; - } -}; diff --git a/src/tools/browser.tool.ts b/src/tools/browser.tool.ts deleted file mode 100644 index d3171cd..0000000 --- a/src/tools/browser.tool.ts +++ /dev/null @@ -1,258 +0,0 @@ -import { remote } from 'webdriverio'; -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; -import type { ToolDefinition } from '../types/tool'; -import type { SessionHistory } from '../types/recording'; -import { z } from 'zod'; - -const supportedBrowsers = ['chrome', 'firefox', 'edge', 'safari'] as const; -const browserSchema = z.enum(supportedBrowsers).default('chrome'); -type SupportedBrowser = z.infer; - -export const startBrowserToolDefinition: ToolDefinition = { - name: 'start_browser', - description: 'starts a browser session (Chrome, Firefox, Edge, Safari) and sets it to the current state. Prefer headless: true unless the user explicitly asks to see the browser.', - inputSchema: { - browser: browserSchema.describe('Browser to launch: chrome, firefox, edge, safari (default: chrome)'), - headless: z.boolean().optional().default(true), - windowWidth: z.number().min(400).max(3840).optional().default(1920), - windowHeight: z.number().min(400).max(2160).optional().default(1080), - navigationUrl: z.string().optional().describe('URL to navigate to after starting the browser'), - capabilities: z.record(z.string(), z.unknown()).optional().describe('Additional W3C capabilities to merge with defaults (e.g. goog:chromeOptions args/extensions/prefs)'), - }, -}; - -export const closeSessionToolDefinition: ToolDefinition = { - name: 'close_session', - description: 'closes or detaches from the current browser or app session', - inputSchema: { - detach: z.boolean().optional().describe('If true, disconnect from session without terminating it (preserves app state). Default: false'), - }, -}; - -const state: { - browsers: Map; - currentSession: string | null; - sessionMetadata: Map; - sessionHistory: Map; -} = { - browsers: new Map(), - currentSession: null, - sessionMetadata: new Map(), - sessionHistory: new Map(), -}; - -export const getBrowser = () => { - const browser = state.browsers.get(state.currentSession); - if (!browser) { - throw new Error('No active browser session'); - } - return browser; -}; -// Export state for app-session.tool.ts to access -(getBrowser as any).__state = state; - -export const startBrowserTool: ToolCallback = async ({ - browser = 'chrome', - headless = true, - windowWidth = 1920, - windowHeight = 1080, - navigationUrl, - capabilities: userCapabilities = {} -}: { - browser?: SupportedBrowser; - headless?: boolean; - windowWidth?: number; - windowHeight?: number; - navigationUrl?: string; - capabilities?: Record; -}): Promise => { - const browserDisplayNames: Record = { - chrome: 'Chrome', - firefox: 'Firefox', - edge: 'Edge', - safari: 'Safari', - }; - const selectedBrowser = browser; - const headlessSupported = selectedBrowser !== 'safari'; - const effectiveHeadless = headless && headlessSupported; - const chromiumArgs = [ - `--window-size=${windowWidth},${windowHeight}`, - '--no-sandbox', - '--disable-search-engine-choice-screen', - '--disable-infobars', - '--log-level=3', - '--use-fake-device-for-media-stream', - '--use-fake-ui-for-media-stream', - '--disable-web-security', - '--allow-running-insecure-content', - ]; - - // Add headless argument if enabled - if (effectiveHeadless) { - chromiumArgs.push('--headless=new'); - chromiumArgs.push('--disable-gpu'); - chromiumArgs.push('--disable-dev-shm-usage'); - } - - const firefoxArgs: string[] = []; - if (effectiveHeadless && selectedBrowser === 'firefox') { - firefoxArgs.push('-headless'); - } - - const capabilities: Record = { - acceptInsecureCerts: true, - }; - - switch (selectedBrowser) { - case 'chrome': - capabilities.browserName = 'chrome'; - capabilities['goog:chromeOptions'] = { args: chromiumArgs }; - break; - case 'edge': - capabilities.browserName = 'msedge'; - capabilities['ms:edgeOptions'] = { args: chromiumArgs }; - break; - case 'firefox': - capabilities.browserName = 'firefox'; - if (firefoxArgs.length > 0) { - capabilities['moz:firefoxOptions'] = { args: firefoxArgs }; - } - break; - case 'safari': - capabilities.browserName = 'safari'; - break; - } - - const mergeCapabilityOptions = (defaultOptions: unknown, customOptions: unknown) => { - if (!defaultOptions || typeof defaultOptions !== 'object' || !customOptions || typeof customOptions !== 'object') { - return customOptions ?? defaultOptions; - } - - const defaultRecord = defaultOptions as Record; - const customRecord = customOptions as Record; - const merged = { ...defaultRecord, ...customRecord }; - if (Array.isArray(defaultRecord.args) || Array.isArray(customRecord.args)) { - merged.args = [ - ...(Array.isArray(defaultRecord.args) ? defaultRecord.args : []), - ...(Array.isArray(customRecord.args) ? customRecord.args : []), - ]; - } - return merged; - }; - - const mergedCapabilities: Record = { - ...capabilities, - ...userCapabilities, - 'goog:chromeOptions': mergeCapabilityOptions(capabilities['goog:chromeOptions'], userCapabilities['goog:chromeOptions']), - 'ms:edgeOptions': mergeCapabilityOptions(capabilities['ms:edgeOptions'], userCapabilities['ms:edgeOptions']), - 'moz:firefoxOptions': mergeCapabilityOptions(capabilities['moz:firefoxOptions'], userCapabilities['moz:firefoxOptions']), - }; - for (const [key, value] of Object.entries(mergedCapabilities)) { - if (value === undefined) { - delete mergedCapabilities[key]; - } - } - - const wdioBrowser = await remote({ - capabilities: mergedCapabilities, - }); - - const { sessionId } = wdioBrowser; - state.browsers.set(sessionId, wdioBrowser); - state.sessionMetadata.set(sessionId, { - type: 'browser', - capabilities: wdioBrowser.capabilities, - isAttached: false, - }); - - // If replacing an active session, close its history and append transition sentinel - if (state.currentSession && state.currentSession !== sessionId) { - const outgoing = state.sessionHistory.get(state.currentSession); - if (outgoing) { - outgoing.steps.push({ - index: outgoing.steps.length + 1, - tool: '__session_transition__', - params: { newSessionId: sessionId }, - status: 'ok', - durationMs: 0, - timestamp: new Date().toISOString(), - }); - outgoing.endedAt = new Date().toISOString(); - } - } - - state.sessionHistory.set(sessionId, { - sessionId, - type: 'browser', - startedAt: new Date().toISOString(), - capabilities: wdioBrowser.capabilities as Record, - steps: [], - }); - - state.currentSession = sessionId; - - let sizeNote = ''; - try { - await wdioBrowser.setWindowSize(windowWidth, windowHeight); - } catch (e) { - sizeNote = `\nNote: Unable to set window size (${windowWidth}x${windowHeight}). ${e}`; - } - - // Navigate to URL if provided - if (navigationUrl) { - await wdioBrowser.url(navigationUrl); - } - - const modeText = effectiveHeadless ? 'headless' : 'headed'; - const browserText = browserDisplayNames[selectedBrowser]; - const urlText = navigationUrl ? ` and navigated to ${navigationUrl}` : ''; - const headlessNote = headless && !headlessSupported - ? '\nNote: Safari does not support headless mode. Started in headed mode.' - : ''; - return { - content: [{ - type: 'text', - text: `${browserText} browser started in ${modeText} mode with sessionId: ${sessionId} (${windowWidth}x${windowHeight})${urlText}${headlessNote}${sizeNote}`, - }], - }; -}; - -export const closeSessionTool: ToolCallback = async (args: { detach?: boolean } = {}): Promise => { - try { - const browser = getBrowser(); - const sessionId = state.currentSession; - const metadata = state.sessionMetadata.get(sessionId); - - // Retain history but mark session as ended - const history = state.sessionHistory.get(sessionId); - if (history) { - history.endedAt = new Date().toISOString(); - } - - // Skip deleteSession for attached sessions (not created by us) or when user explicitly detaches - const effectiveDetach = args.detach || !!metadata?.isAttached; - if (!effectiveDetach) { - await browser.deleteSession(); - } - - // Always clean up local state - state.browsers.delete(sessionId); - state.sessionMetadata.delete(sessionId); - state.currentSession = null; - - const action = effectiveDetach ? 'detached from' : 'closed'; - const note = args.detach && !metadata?.isAttached - ? '\nNote: Session will remain active on Appium server.' - : ''; - - return { - content: [{ type: 'text', text: `Session ${sessionId} ${action}${note}` }], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error closing session: ${e}` }], - }; - } -}; diff --git a/src/tools/click.tool.ts b/src/tools/click.tool.ts index 0e5e4a6..1a98ea7 100644 --- a/src/tools/click.tool.ts +++ b/src/tools/click.tool.ts @@ -1,8 +1,9 @@ -import { getBrowser } from './browser.tool'; +import { getBrowser } from '../session/state'; import { z } from 'zod'; import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; +import { coerceBoolean } from '../utils/zod-helpers'; const defaultTimeout: number = 3000; @@ -11,12 +12,12 @@ export const clickToolDefinition: ToolDefinition = { description: 'clicks an element', inputSchema: { selector: z.string().describe('Value for the selector, in the form of css selector or xpath ("button.my-class" or "//button[@class=\'my-class\']" or "button=Exact text with spaces" or "a*=Link containing text")'), - scrollToView: z.boolean().optional().describe('Whether to scroll the element into view before clicking').default(true), + scrollToView: coerceBoolean.optional().describe('Whether to scroll the element into view before clicking').default(true), timeout: z.number().optional().describe('Maximum time to wait for element in milliseconds'), }, }; -const clickAction = async (selector: string, timeout: number, scrollToView = true): Promise => { +export const clickAction = async (selector: string, timeout: number, scrollToView = true): Promise => { try { const browser = getBrowser(); await browser.waitUntil(browser.$(selector).isExisting, { timeout }); diff --git a/src/tools/context.tool.ts b/src/tools/context.tool.ts index 78923e7..9054c1f 100644 --- a/src/tools/context.tool.ts +++ b/src/tools/context.tool.ts @@ -2,23 +2,8 @@ import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; import { z } from 'zod'; -import { getBrowser } from './browser.tool'; +import { getBrowser } from '../session/state'; -// Get Contexts Tool Definition -export const getContextsToolDefinition: ToolDefinition = { - name: 'get_contexts', - description: 'lists available contexts (NATIVE_APP, WEBVIEW)', - inputSchema: {}, -}; - -// Get Current Context Tool Definition -export const getCurrentContextToolDefinition: ToolDefinition = { - name: 'get_current_context', - description: 'shows the currently active context', - inputSchema: {}, -}; - -// Switch Context Tool Definition export const switchContextToolDefinition: ToolDefinition = { name: 'switch_context', description: 'switches between native and webview contexts', @@ -26,52 +11,11 @@ export const switchContextToolDefinition: ToolDefinition = { context: z .string() .describe( - 'Context name to switch to (e.g., "NATIVE_APP", "WEBVIEW_com.example.app", or use index from get_contexts)', + 'Context name to switch to (e.g., "NATIVE_APP", "WEBVIEW_com.example.app", or use index from wdio://session/current/contexts resource)', ), }, }; -// Get Contexts Tool -export const getContextsTool: ToolCallback = async (): Promise => { - try { - const browser = getBrowser(); - - const contexts = await browser.getContexts(); - - return { - content: [ - { - type: 'text', - text: `Available contexts:\n${contexts.map((ctx, idx) => `${idx + 1}. ${ctx}`).join('\n')}`, - }, - ], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error getting contexts: ${e}` }], - }; - } -}; - -// Get Current Context Tool -export const getCurrentContextTool: ToolCallback = async (): Promise => { - try { - const browser = getBrowser(); - - const currentContext = await browser.getContext(); - - return { - content: [{ type: 'text', text: `Current context: ${JSON.stringify(currentContext)}` }], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error getting current context: ${e}` }], - }; - } -}; - export const switchContextTool: ToolCallback = async (args: { context: string; }): Promise => { @@ -79,22 +23,22 @@ export const switchContextTool: ToolCallback = async (args: { const browser = getBrowser(); const { context } = args; - // If context is a number, get the context by index - let targetContext = context; if (/^\d+$/.test(context)) { const contexts = await browser.getContexts(); - const index = parseInt(context, 10) - 1; // Convert to 0-based index + const index = Number.parseInt(context, 10) - 1; if (index >= 0 && index < contexts.length) { - targetContext = contexts[index] as string; - } else { - throw new Error(`Error: Invalid context index ${context}. Available contexts: ${contexts.length}`); + const targetContext = contexts[index] as string; + await browser.switchContext(targetContext); + return { content: [{ type: 'text', text: `Switched to context: ${targetContext}` }] }; } + throw new Error(`Error: Invalid context index ${context}. Available contexts: ${contexts.length}`); + } - await browser.switchContext(targetContext); + await browser.switchContext(context); return { - content: [{ type: 'text', text: `Switched to context: ${targetContext}` }], + content: [{ type: 'text', text: `Switched to context: ${context}` }], }; } catch (e) { return { @@ -102,4 +46,4 @@ export const switchContextTool: ToolCallback = async (args: { content: [{ type: 'text', text: `Error switching context: ${e}` }], }; } -}; +}; \ No newline at end of file diff --git a/src/tools/cookies.tool.ts b/src/tools/cookies.tool.ts index b41c73e..767eeb0 100644 --- a/src/tools/cookies.tool.ts +++ b/src/tools/cookies.tool.ts @@ -1,55 +1,10 @@ -import { getBrowser } from './browser.tool'; -import { z } from 'zod'; import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; -import type { Cookie } from '@wdio/protocols'; import type { ToolDefinition } from '../types/tool'; +import { z } from 'zod'; +import type { Cookie } from '@wdio/protocols'; +import { coerceBoolean } from '../utils/zod-helpers'; -// Tool definitions -export const getCookiesToolDefinition: ToolDefinition = { - name: 'get_cookies', - description: 'gets all cookies or a specific cookie by name', - inputSchema: { - name: z.string().optional().describe('Optional cookie name to retrieve a specific cookie. If not provided, returns all cookies'), - }, -}; - -export const getCookiesTool: ToolCallback = async ({ name}: { name?: string }): Promise => { - try { - const browser = getBrowser(); - - if (name) { - // Get specific cookie by name - const cookie = await browser.getCookies([name]); - if (cookie.length === 0) { - return { - content: [{ type: 'text', text: `Cookie "${name}" not found` }], - }; - } - return { - content: [{ type: 'text', text: JSON.stringify(cookie[0], null, 2) }], - }; - } - // Get all cookies - const cookies = await browser.getCookies(); - if (cookies.length === 0) { - return { - content: [{ type: 'text', text: 'No cookies found' }], - }; - } - return { - content: [{ type: 'text', text: JSON.stringify(cookies, null, 2) }], - }; - - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error getting cookies: ${e}` }], - }; - } -}; - -// Set a cookie export const setCookieToolDefinition: ToolDefinition = { name: 'set_cookie', description: 'sets a cookie with specified name, value, and optional attributes', @@ -59,8 +14,8 @@ export const setCookieToolDefinition: ToolDefinition = { domain: z.string().optional().describe('Cookie domain (defaults to current domain)'), path: z.string().optional().describe('Cookie path (defaults to "/")'), expiry: z.number().optional().describe('Expiry date as Unix timestamp in seconds'), - httpOnly: z.boolean().optional().describe('HttpOnly flag'), - secure: z.boolean().optional().describe('Secure flag'), + httpOnly: coerceBoolean.optional().describe('HttpOnly flag'), + secure: coerceBoolean.optional().describe('Secure flag'), sameSite: z.enum(['strict', 'lax', 'none']).optional().describe('SameSite attribute'), }, }; @@ -76,10 +31,9 @@ export const setCookieTool: ToolCallback = async ({ sameSite, }: Cookie): Promise => { try { - const browser = getBrowser(); - const cookie: Cookie = { name, value, path, domain, expiry, httpOnly, secure, sameSite }; - + const { getBrowser } = await import('../session/state'); + const browser = getBrowser(); await browser.setCookies(cookie); return { @@ -93,7 +47,6 @@ export const setCookieTool: ToolCallback = async ({ } }; -// Delete cookies export const deleteCookiesToolDefinition: ToolDefinition = { name: 'delete_cookies', description: 'deletes all cookies or a specific cookie by name', @@ -102,27 +55,25 @@ export const deleteCookiesToolDefinition: ToolDefinition = { }, }; -export const deleteCookiesTool: ToolCallback = async ({ name}: { name?: string }): Promise => { +export const deleteCookiesTool: ToolCallback = async ({ name }: { name?: string }): Promise => { try { + const { getBrowser } = await import('../session/state'); const browser = getBrowser(); if (name) { - // Delete specific cookie by name await browser.deleteCookies([name]); return { content: [{ type: 'text', text: `Cookie "${name}" deleted successfully` }], }; } - // Delete all cookies await browser.deleteCookies(); return { content: [{ type: 'text', text: 'All cookies deleted successfully' }], }; - } catch (e) { return { isError: true, content: [{ type: 'text', text: `Error deleting cookies: ${e}` }], }; } -}; +}; \ No newline at end of file diff --git a/src/tools/device.tool.ts b/src/tools/device.tool.ts index 7604c25..15b0c76 100644 --- a/src/tools/device.tool.ts +++ b/src/tools/device.tool.ts @@ -2,22 +2,14 @@ import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; import { z } from 'zod'; -import { getBrowser } from './browser.tool'; +import { getBrowser } from '../session/state'; -// Tool Definitions for zero-argument tools export const hideKeyboardToolDefinition: ToolDefinition = { name: 'hide_keyboard', description: 'hides the on-screen keyboard', inputSchema: {}, }; -export const getGeolocationToolDefinition: ToolDefinition = { - name: 'get_geolocation', - description: 'gets current device geolocation', - inputSchema: {}, -}; - -// Tool Definitions for tools with arguments export const rotateDeviceToolDefinition: ToolDefinition = { name: 'rotate_device', description: 'rotates device to portrait or landscape orientation', @@ -36,7 +28,6 @@ export const setGeolocationToolDefinition: ToolDefinition = { }, }; -// Rotate Device Tool export const rotateDeviceTool: ToolCallback = async (args: { orientation: 'PORTRAIT' | 'LANDSCAPE'; }): Promise => { @@ -57,7 +48,6 @@ export const rotateDeviceTool: ToolCallback = async (args: { } }; -// Hide Keyboard Tool export const hideKeyboardTool: ToolCallback = async (): Promise => { try { const browser = getBrowser(); @@ -75,30 +65,6 @@ export const hideKeyboardTool: ToolCallback = async (): Promise } }; -// Get Geolocation Tool -export const getGeolocationTool: ToolCallback = async (): Promise => { - try { - const browser = getBrowser(); - - const location = await browser.getGeoLocation(); - - return { - content: [ - { - type: 'text', - text: `Location:\n Latitude: ${location.latitude}\n Longitude: ${location.longitude}\n Altitude: ${location.altitude || 'N/A'}`, - }, - ], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error getting geolocation: ${e}` }], - }; - } -}; - -// Set Geolocation Tool export const setGeolocationTool: ToolCallback = async (args: { latitude: number; longitude: number; @@ -124,4 +90,4 @@ export const setGeolocationTool: ToolCallback = async (args: { content: [{ type: 'text', text: `Error setting geolocation: ${e}` }], }; } -}; +}; \ No newline at end of file diff --git a/src/tools/emulate-device.tool.ts b/src/tools/emulate-device.tool.ts index 0b95af9..78e37ef 100644 --- a/src/tools/emulate-device.tool.ts +++ b/src/tools/emulate-device.tool.ts @@ -5,7 +5,7 @@ import type { ToolDefinition } from '../types/tool'; // This is a type-only import — it is stripped at build time by tsup and has no runtime impact. import type { DeviceName } from 'webdriverio/build/deviceDescriptorsSource.js'; import { z } from 'zod'; -import { getBrowser } from './browser.tool'; +import { getBrowser, getState } from '../session/state'; // Stores restore functions returned by browser.emulate(), keyed by sessionId const restoreFunctions = new Map Promise>(); @@ -34,15 +34,18 @@ export const emulateDeviceTool: ToolCallback = async ({ }): Promise => { try { const browser = getBrowser(); - const state = (getBrowser as any).__state; - const sessionId = state.currentSession as string; + const state = getState(); + const sessionId = state.currentSession; const metadata = state.sessionMetadata.get(sessionId); // Guard: mobile sessions if (metadata?.type === 'ios' || metadata?.type === 'android') { return { isError: true, - content: [{ type: 'text', text: 'Error: emulate_device is only supported for web browser sessions, not iOS/Android.' }], + content: [{ + type: 'text', + text: 'Error: emulate_device is only supported for web browser sessions, not iOS/Android.' + }], }; } diff --git a/src/tools/execute-script.tool.ts b/src/tools/execute-script.tool.ts index cea2825..de753c4 100644 --- a/src/tools/execute-script.tool.ts +++ b/src/tools/execute-script.tool.ts @@ -2,7 +2,7 @@ import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; import { z } from 'zod'; -import { getBrowser } from './browser.tool'; +import { getBrowser } from '../session/state'; export const executeScriptToolDefinition: ToolDefinition = { name: 'execute_script', diff --git a/src/tools/gestures.tool.ts b/src/tools/gestures.tool.ts index 1dac6fa..de02a78 100644 --- a/src/tools/gestures.tool.ts +++ b/src/tools/gestures.tool.ts @@ -2,7 +2,7 @@ import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; import { z } from 'zod'; -import { getBrowser } from './browser.tool'; +import { getBrowser } from '../session/state'; // Tap Tool export const tapElementToolDefinition: ToolDefinition = { @@ -18,7 +18,7 @@ export const tapElementToolDefinition: ToolDefinition = { }, }; -export const tapElementTool: ToolCallback = async (args: { +export const tapAction = async (args: { selector?: string; x?: number; y?: number; @@ -52,6 +52,12 @@ export const tapElementTool: ToolCallback = async (args: { } }; +export const tapElementTool: ToolCallback = async (args: { + selector?: string; + x?: number; + y?: number; +}): Promise => tapAction(args); + // Swipe Tool export const swipeToolDefinition: ToolDefinition = { name: 'swipe', @@ -82,7 +88,7 @@ const contentToFingerDirection: Record right: 'left', }; -export const swipeTool: ToolCallback = async (args: { +export const swipeAction = async (args: { direction: 'up' | 'down' | 'left' | 'right'; duration?: number; percent?: number; @@ -112,6 +118,12 @@ export const swipeTool: ToolCallback = async (args: { } }; +export const swipeTool: ToolCallback = async (args: { + direction: 'up' | 'down' | 'left' | 'right'; + duration?: number; + percent?: number; +}): Promise => swipeAction(args); + // Drag and Drop Tool export const dragAndDropToolDefinition: ToolDefinition = { name: 'drag_and_drop', @@ -125,7 +137,7 @@ export const dragAndDropToolDefinition: ToolDefinition = { }, }; -export const dragAndDropTool: ToolCallback = async (args: { +export const dragAndDropAction = async (args: { sourceSelector: string; targetSelector?: string; x?: number; @@ -161,4 +173,12 @@ export const dragAndDropTool: ToolCallback = async (args: { content: [{ type: 'text', text: `Error dragging: ${e}` }], }; } -}; \ No newline at end of file +}; + +export const dragAndDropTool: ToolCallback = async (args: { + sourceSelector: string; + targetSelector?: string; + x?: number; + y?: number; + duration?: number; +}): Promise => dragAndDropAction(args); \ No newline at end of file diff --git a/src/tools/get-accessibility-tree.tool.ts b/src/tools/get-accessibility-tree.tool.ts deleted file mode 100644 index e63c7cf..0000000 --- a/src/tools/get-accessibility-tree.tool.ts +++ /dev/null @@ -1,95 +0,0 @@ -import { getBrowser } from './browser.tool'; -import { getBrowserAccessibilityTree } from '../scripts/get-browser-accessibility-tree'; -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; -import type { ToolDefinition } from '../types/tool'; -import { encode } from '@toon-format/toon'; -import { z } from 'zod'; - -export const getAccessibilityToolDefinition: ToolDefinition = { - name: 'get_accessibility', - description: 'Gets the accessibility tree: page structure with headings, landmarks, and semantic roles. Browser-only. Use to understand page layout and context around interactable elements.', - inputSchema: { - limit: z.number().optional() - .describe('Maximum number of nodes to return. Default: 100. Use 0 for unlimited.'), - offset: z.number().optional() - .describe('Number of nodes to skip (for pagination). Default: 0.'), - roles: z.array(z.string()).optional() - .describe('Filter to specific roles (e.g., ["heading", "navigation", "region"]). Default: all roles.'), - }, -}; - -export const getAccessibilityTreeTool: ToolCallback = async (args: { - limit?: number; - offset?: number; - roles?: string[]; -}): Promise => { - try { - const browser = getBrowser(); - - if (browser.isAndroid || browser.isIOS) { - return { - content: [{ - type: 'text', - text: 'Error: get_accessibility is browser-only. For mobile apps, use get_visible_elements instead.', - }], - }; - } - - const { limit = 100, offset = 0, roles } = args || {}; - - let nodes = await getBrowserAccessibilityTree(browser); - - if (nodes.length === 0) { - return { - content: [{ type: 'text', text: 'No accessibility tree available' }], - }; - } - - // Filter out nodes with no meaningful name - nodes = nodes.filter((n) => n.name && n.name.trim() !== ''); - - if (roles && roles.length > 0) { - const roleSet = new Set(roles.map((r) => r.toLowerCase())); - nodes = nodes.filter((n) => n.role && roleSet.has(n.role.toLowerCase())); - } - - const total = nodes.length; - - if (offset > 0) { - nodes = nodes.slice(offset); - } - if (limit > 0) { - nodes = nodes.slice(0, limit); - } - - // Drop state columns that are empty for every node in this result set - const stateKeys = ['level', 'disabled', 'checked', 'expanded', 'selected', 'pressed', 'required', 'readonly'] as const; - const usedKeys = stateKeys.filter(k => nodes.some(n => n[k] !== '')); - const trimmed = nodes.map(({ role, name, selector, ...state }) => { - const node: Record = { role, name, selector }; - for (const k of usedKeys) node[k] = state[k]; - return node; - }); - - const result = { - total, - showing: trimmed.length, - hasMore: offset + trimmed.length < total, - nodes: trimmed, - }; - - const toon = encode(result) - .replace(/,""/g, ',') - .replace(/"",/g, ','); - - return { - content: [{ type: 'text', text: toon }], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error getting accessibility tree: ${e}` }], - }; - } -}; diff --git a/src/tools/get-elements.tool.ts b/src/tools/get-elements.tool.ts new file mode 100644 index 0000000..d3e9517 --- /dev/null +++ b/src/tools/get-elements.tool.ts @@ -0,0 +1,42 @@ +import { z } from 'zod'; +import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import type { ToolDefinition } from '../types/tool'; +import { getBrowser } from '../session/state'; +import { getElements } from '../scripts/get-elements'; +import { encode } from '@toon-format/toon'; +import { coerceBoolean } from '../utils/zod-helpers'; + +export const getElementsToolDefinition: ToolDefinition = { + name: 'get_elements', + description: 'Get interactable elements on the current page. Use when wdio://session/current/elements does not return the desired elements.', + inputSchema: { + inViewportOnly: coerceBoolean.optional().default(false).describe('Only return elements visible in the current viewport (default: false).'), + includeContainers: coerceBoolean.optional().default(false).describe('Include container elements like divs and sections (default: false)'), + includeBounds: coerceBoolean.optional().default(false).describe('Include element bounding box coordinates (default: false)'), + limit: z.number().optional().default(0).describe('Maximum number of elements to return (0 = no limit)'), + offset: z.number().optional().default(0).describe('Number of elements to skip (for pagination)'), + }, +}; + +export const getElementsTool: ToolCallback = async ({ + inViewportOnly = false, + includeContainers = false, + includeBounds = false, + limit = 0, + offset = 0, +}: { + inViewportOnly?: boolean; + includeContainers?: boolean; + includeBounds?: boolean; + limit?: number; + offset?: number; +}) => { + try { + const browser = getBrowser(); + const result = await getElements(browser, { inViewportOnly, includeContainers, includeBounds, limit, offset }); + const text = encode(result).replace(/,""/g, ',').replace(/"",/g, ','); + return { content: [{ type: 'text' as const, text }] }; + } catch (e) { + return { isError: true as const, content: [{ type: 'text' as const, text: `Error getting elements: ${e}` }] }; + } +}; diff --git a/src/tools/get-visible-elements.tool.ts b/src/tools/get-visible-elements.tool.ts deleted file mode 100644 index e11b833..0000000 --- a/src/tools/get-visible-elements.tool.ts +++ /dev/null @@ -1,101 +0,0 @@ -import { getBrowser } from './browser.tool'; -import { getInteractableBrowserElements } from '../scripts/get-interactable-browser-elements'; -import { getMobileVisibleElements } from '../scripts/get-visible-mobile-elements'; -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import type { ToolDefinition } from '../types/tool'; -import { encode } from '@toon-format/toon'; -import { z } from 'zod'; - -/** - * Tool definition for get_visible_elements - */ -export const getVisibleElementsToolDefinition: ToolDefinition = { - name: 'get_visible_elements', - description: 'Get interactable elements on the page (buttons, links, inputs). Use get_accessibility for page structure and non-interactable elements.', - inputSchema: { - inViewportOnly: z - .boolean() - .optional() - .describe('Only return elements within the visible viewport. Default: true. Set to false to get ALL elements on the page.'), - includeContainers: z - .boolean() - .optional() - .describe('Mobile only: include layout containers. Default: false.'), - includeBounds: z - .boolean() - .optional() - .describe('Include element bounds/coordinates (x, y, width, height). Default: false.'), - limit: z - .number() - .optional() - .describe('Maximum number of elements to return. Default: 0 (unlimited).'), - offset: z - .number() - .optional() - .describe('Number of elements to skip (for pagination). Default: 0.'), - }, -}; - -/** - * Get visible elements on the current screen - * Supports both web browsers and mobile apps (iOS/Android) - */ -export const getVisibleElementsTool: ToolCallback = async (args: { - inViewportOnly?: boolean; - includeContainers?: boolean; - includeBounds?: boolean; - limit?: number; - offset?: number; -}) => { - try { - const browser = getBrowser(); - const { - inViewportOnly = true, - includeContainers = false, - includeBounds = false, - limit = 0, - offset = 0, - } = args || {}; - - let elements: { isInViewport?: boolean }[]; - - if (browser.isAndroid || browser.isIOS) { - const platform = browser.isAndroid ? 'android' : 'ios'; - elements = await getMobileVisibleElements(browser, platform, { includeContainers, includeBounds }); - } else { - elements = await getInteractableBrowserElements(browser, { includeBounds }); - } - - if (inViewportOnly) { - elements = elements.filter((el) => el.isInViewport !== false); - } - - const total = elements.length; - - // Apply pagination - if (offset > 0) { - elements = elements.slice(offset); - } - if (limit > 0) { - elements = elements.slice(0, limit); - } - - const result: Record = { - total, - showing: elements.length, - hasMore: offset + elements.length < total, - elements, - }; - - // TOON tabular format with post-processing: replace "" with bare commas for efficiency - const toon = encode(result).replace(/,""/g, ',').replace(/"",/g, ','); - return { - content: [{ type: 'text', text: toon }], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error getting visible elements: ${e}` }], - }; - } -}; diff --git a/src/tools/launch-chrome.tool.ts b/src/tools/launch-chrome.tool.ts index d03d42f..4be26f4 100644 --- a/src/tools/launch-chrome.tool.ts +++ b/src/tools/launch-chrome.tool.ts @@ -6,6 +6,7 @@ import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; import { z } from 'zod'; +import { coerceBoolean } from '../utils/zod-helpers'; const USER_DATA_DIR = join(tmpdir(), 'chrome-debug'); @@ -29,7 +30,7 @@ After this tool succeeds, call attach_browser() to connect.`, mode: z.enum(['newInstance', 'freshSession']).default('newInstance').describe( 'newInstance: open alongside existing Chrome | freshSession: clean profile' ), - copyProfileFiles: z.boolean().default(false).describe( + copyProfileFiles: coerceBoolean.default(false).describe( 'Copy your Default Chrome profile (cookies, logins) into the debug session.' ), }, diff --git a/src/tools/navigate.tool.ts b/src/tools/navigate.tool.ts index 7e5e5bd..4c7c5f1 100644 --- a/src/tools/navigate.tool.ts +++ b/src/tools/navigate.tool.ts @@ -1,6 +1,7 @@ -import { getBrowser } from './browser.tool'; +import { getBrowser } from '../session/state'; import { z } from 'zod'; import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; export const navigateToolDefinition: ToolDefinition = { @@ -11,7 +12,7 @@ export const navigateToolDefinition: ToolDefinition = { }, }; -export const navigateTool: ToolCallback = async ({ url}: { url: string }) => { +export const navigateAction = async (url: string): Promise => { try { const browser = getBrowser(); await browser.url(url); @@ -25,3 +26,5 @@ export const navigateTool: ToolCallback = async ({ url}: { url: string }) => { }; } }; + +export const navigateTool: ToolCallback = async ({ url}: { url: string }) => navigateAction(url); diff --git a/src/tools/scroll.tool.ts b/src/tools/scroll.tool.ts index f13edb1..4f7bbbe 100644 --- a/src/tools/scroll.tool.ts +++ b/src/tools/scroll.tool.ts @@ -1,7 +1,7 @@ -import { getBrowser } from './browser.tool'; -import { getState } from './app-session.tool'; +import { getBrowser, getState } from '../session/state'; import { z } from 'zod'; import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; export const scrollToolDefinition: ToolDefinition = { @@ -13,7 +13,7 @@ export const scrollToolDefinition: ToolDefinition = { }, }; -export const scrollTool: ToolCallback = async ({ direction, pixels = 500 }: { direction: 'up' | 'down'; pixels?: number }) => { +export const scrollAction = async (direction: 'up' | 'down', pixels = 500): Promise => { try { const browser = getBrowser(); const state = getState(); @@ -38,4 +38,7 @@ export const scrollTool: ToolCallback = async ({ direction, pixels = 500 }: { di content: [{ type: 'text', text: `Error scrolling: ${e}` }], }; } -}; \ No newline at end of file +}; + +export const scrollTool: ToolCallback = async ({ direction, pixels = 500 }: { direction: 'up' | 'down'; pixels?: number }) => + scrollAction(direction, pixels); \ No newline at end of file diff --git a/src/tools/session.tool.ts b/src/tools/session.tool.ts new file mode 100644 index 0000000..946bb6b --- /dev/null +++ b/src/tools/session.tool.ts @@ -0,0 +1,358 @@ +import { remote } from 'webdriverio'; +import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; +import type { ToolDefinition } from '../types/tool'; +import { z } from 'zod'; +import type { SessionMetadata } from '../session/state'; +import { getBrowser, getState } from '../session/state'; +import { closeSession, registerSession } from '../session/lifecycle'; +import { localBrowserProvider } from '../providers/local-browser.provider'; +import { localAppiumProvider } from '../providers/local-appium.provider'; +import { coerceBoolean } from '../utils/zod-helpers'; + +const platformEnum = z.enum(['browser', 'ios', 'android']); +const browserEnum = z.enum(['chrome', 'firefox', 'edge', 'safari']); +const automationEnum = z.enum(['XCUITest', 'UiAutomator2']); + +export const startSessionToolDefinition: ToolDefinition = { + name: 'start_session', + description: 'Starts a browser or mobile app session. For local browser, use browser platform. For mobile apps, use ios or android platform. Use attach mode to connect to an existing Chrome instance.', + inputSchema: { + platform: platformEnum.describe('Session platform type'), + browser: browserEnum.optional().describe('Browser to launch (required for browser platform)'), + headless: coerceBoolean.optional().default(true).describe('Run browser in headless mode (default: true)'), + windowWidth: z.number().min(400).max(3840).optional().default(1920).describe('Browser window width'), + windowHeight: z.number().min(400).max(2160).optional().default(1080).describe('Browser window height'), + deviceName: z.string().optional().describe('Mobile device/emulator/simulator name (required for ios/android)'), + platformVersion: z.string().optional().describe('OS version (e.g., "17.0", "14")'), + appPath: z.string().optional().describe('Path to app file (.app/.apk/.ipa)'), + automationName: automationEnum.optional().describe('Automation driver'), + autoGrantPermissions: coerceBoolean.optional().describe('Auto-grant app permissions (default: true)'), + autoAcceptAlerts: coerceBoolean.optional().describe('Auto-accept alerts (default: true)'), + autoDismissAlerts: coerceBoolean.optional().describe('Auto-dismiss alerts (default: false)'), + appWaitActivity: z.string().optional().describe('Activity to wait for on Android launch'), + udid: z.string().optional().describe('Unique Device Identifier for iOS real device'), + noReset: coerceBoolean.optional().describe('Preserve app data between sessions'), + fullReset: coerceBoolean.optional().describe('Uninstall app before/after session'), + newCommandTimeout: z.number().min(0).optional().default(300).describe('Appium command timeout in seconds'), + attach: coerceBoolean.optional().default(false).describe('Attach to existing Chrome instead of launching'), + port: z.number().optional().default(9222).describe('Chrome remote debugging port (for attach mode)'), + host: z.string().optional().default('localhost').describe('Chrome host (for attach mode)'), + appiumHost: z.string().optional().describe('Appium server hostname'), + appiumPort: z.number().optional().describe('Appium server port'), + appiumPath: z.string().optional().describe('Appium server path'), + navigationUrl: z.string().optional().describe('URL to navigate to after starting'), + capabilities: z.record(z.string(), z.unknown()).optional().describe('Additional capabilities to merge'), + }, +}; + +type StartSessionArgs = { + platform: 'browser' | 'ios' | 'android'; + browser?: 'chrome' | 'firefox' | 'edge' | 'safari'; + headless?: boolean; + windowWidth?: number; + windowHeight?: number; + deviceName?: string; + platformVersion?: string; + appPath?: string; + automationName?: 'XCUITest' | 'UiAutomator2'; + autoGrantPermissions?: boolean; + autoAcceptAlerts?: boolean; + autoDismissAlerts?: boolean; + appWaitActivity?: string; + udid?: string; + noReset?: boolean; + fullReset?: boolean; + newCommandTimeout?: number; + attach?: boolean; + port?: number; + host?: string; + appiumHost?: string; + appiumPort?: number; + appiumPath?: string; + navigationUrl?: string; + capabilities?: Record; +}; + +export const closeSessionToolDefinition: ToolDefinition = { + name: 'close_session', + description: 'Closes or detaches from the current browser or app session', + inputSchema: { + detach: coerceBoolean.optional().describe('If true, disconnect without terminating (preserves app state). Default: false'), + }, +}; + +type TabSnapshot = { activeTabUrl: string | undefined; allTabUrls: string[] }; + +async function closeStaleMappers(host: string, port: number): Promise { + try { + const res = await fetch(`http://${host}:${port}/json`); + const targets = await res.json() as { id: string; title: string; type: string; url: string }[]; + const mappers = targets.filter((t) => t.title?.includes('BiDi')); + await Promise.all(mappers.map((t) => fetch(`http://${host}:${port}/json/close/${t.id}`))); + const pages = targets.filter((t) => t.type === 'page' && !t.title?.includes('BiDi')); + return { activeTabUrl: pages[0]?.url, allTabUrls: pages.map((t) => t.url) }; + } catch { + return { activeTabUrl: undefined, allTabUrls: [] }; + } +} + +async function restoreAndSwitchToActiveTab(browser: WebdriverIO.Browser, activeTabUrl: string, allTabUrls: string[]): Promise { + const handles = await browser.getWindowHandles(); + const currentUrls: string[] = []; + for (const handle of handles) { + await browser.switchToWindow(handle); + currentUrls.push(await browser.getUrl()); + } + + const missingUrls = allTabUrls.filter((u) => !currentUrls.includes(u)); + let missingIdx = 0; + for (let i = 0; i < handles.length; i++) { + if (currentUrls[i] === 'about:blank' && missingIdx < missingUrls.length) { + await browser.switchToWindow(handles[i]); + await browser.url(missingUrls[missingIdx]); + currentUrls[i] = missingUrls[missingIdx++]; + } + } + + for (let i = 0; i < handles.length; i++) { + if (currentUrls[i] === activeTabUrl) { + await browser.switchToWindow(handles[i]); + break; + } + } +} + +async function waitForCDP(host: string, port: number, timeoutMs = 10000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + try { + const res = await fetch(`http://${host}:${port}/json/version`); + if (res.ok) return; + } catch { + // not ready yet + } + await new Promise((r) => setTimeout(r, 300)); + } + throw new Error(`Chrome did not expose CDP on ${host}:${port} within ${timeoutMs}ms`); +} + +async function startBrowserSession(args: StartSessionArgs): Promise { + const browser = args.browser ?? 'chrome'; + const headless = args.headless ?? true; + const windowWidth = args.windowWidth ?? 1920; + const windowHeight = args.windowHeight ?? 1080; + const navigationUrl = args.navigationUrl; + const userCapabilities = args.capabilities ?? {}; + + const browserDisplayNames: Record = { + chrome: 'Chrome', + firefox: 'Firefox', + edge: 'Edge', + safari: 'Safari', + }; + + const headlessSupported = browser !== 'safari'; + const effectiveHeadless = headless && headlessSupported; + + const mergedCapabilities = localBrowserProvider.buildCapabilities({ + browser, + headless, + windowWidth, + windowHeight, + capabilities: userCapabilities + }); + + const wdioBrowser = await remote({ capabilities: mergedCapabilities }); + const { sessionId } = wdioBrowser; + + registerSession(sessionId, wdioBrowser, { + type: 'browser', + capabilities: wdioBrowser.capabilities as Record, + isAttached: false, + }, { + sessionId, + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: wdioBrowser.capabilities as Record, + steps: [], + }); + + let sizeNote = ''; + try { + await wdioBrowser.setWindowSize(windowWidth, windowHeight); + } catch (e) { + sizeNote = `\nNote: Unable to set window size (${windowWidth}x${windowHeight}). ${e}`; + } + + if (navigationUrl) { + await wdioBrowser.url(navigationUrl); + } + + const modeText = effectiveHeadless ? 'headless' : 'headed'; + const urlText = navigationUrl ? ` and navigated to ${navigationUrl}` : ''; + const headlessNote = headless && !headlessSupported + ? '\nNote: Safari does not support headless mode. Started in headed mode.' + : ''; + + return { + content: [{ + type: 'text', + text: `${browserDisplayNames[browser]} browser started in ${modeText} mode with sessionId: ${sessionId} (${windowWidth}x${windowHeight})${urlText}${headlessNote}${sizeNote}`, + }], + }; +} + +async function startMobileSession(args: StartSessionArgs): Promise { + const { platform, appPath, deviceName, noReset } = args; + + if (!appPath && noReset !== true) { + return { + content: [{ + type: 'text', + text: 'Error: Either "appPath" must be provided to install an app, or "noReset: true" must be set to connect to an already-running app.', + }], + }; + } + + const serverConfig = localAppiumProvider.getConnectionConfig(args as Record); + const mergedCapabilities = localAppiumProvider.buildCapabilities(args as Record); + + const browser = await remote({ + protocol: serverConfig.protocol, + hostname: serverConfig.hostname, + port: serverConfig.port, + path: serverConfig.path, + capabilities: mergedCapabilities, + }); + + const { sessionId } = browser; + const shouldAutoDetach = localAppiumProvider.shouldAutoDetach(args as Record); + const sessionType = localAppiumProvider.getSessionType(args as Record); + const metadata: SessionMetadata = { + type: sessionType, + capabilities: mergedCapabilities, + isAttached: shouldAutoDetach, + }; + + registerSession(sessionId, browser, metadata, { + sessionId, + type: sessionType, + startedAt: new Date().toISOString(), + capabilities: mergedCapabilities, + appiumConfig: { hostname: serverConfig.hostname, port: serverConfig.port, path: serverConfig.path }, + steps: [], + }); + + const appInfo = appPath ? `\nApp: ${appPath}` : '\nApp: (connected to running app)'; + const detachNote = shouldAutoDetach + ? '\n\n(Auto-detach enabled: session will be preserved on close. Use close_session({ detach: false }) to force terminate.)' + : ''; + + return { + content: [ + { + type: 'text', + text: `${platform} app session started with sessionId: ${sessionId}\nDevice: ${deviceName}${appInfo}\nAppium Server: ${serverConfig.hostname}:${serverConfig.port}${serverConfig.path}${detachNote}`, + }, + ], + }; +} + +async function attachBrowserSession(args: StartSessionArgs): Promise { + const port = args.port ?? 9222; + const host = args.host ?? 'localhost'; + const navigationUrl = args.navigationUrl; + + await waitForCDP(host, port); + const { activeTabUrl, allTabUrls } = await closeStaleMappers(host, port); + + const browser = await remote({ + connectionRetryTimeout: 30000, + connectionRetryCount: 3, + capabilities: { + browserName: 'chrome', + unhandledPromptBehavior: 'dismiss', + webSocketUrl: false, + 'goog:chromeOptions': { + debuggerAddress: `${host}:${port}`, + }, + }, + }); + + const { sessionId } = browser; + registerSession( + sessionId, + browser, + { + type: 'browser', + capabilities: browser.capabilities as Record, + isAttached: true, + }, + { + sessionId, + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: { + browserName: 'chrome', + 'goog:chromeOptions': { + debuggerAddress: `${host}:${port}`, + }, + }, + steps: [], + }, + ); + + if (navigationUrl) { + await browser.url(navigationUrl); + } else if (activeTabUrl) { + await restoreAndSwitchToActiveTab(browser, activeTabUrl, allTabUrls); + } + + const title = await browser.getTitle(); + const url = await browser.getUrl(); + + return { + content: [{ + type: 'text', + text: `Attached to Chrome on ${host}:${port}\nSession ID: ${sessionId}\nCurrent page: "${title}" (${url})`, + }], + }; +} + +export const startSessionTool: ToolCallback = async (args: StartSessionArgs): Promise => { + try { + if (args.platform === 'browser') { + if (args.attach) { + return await attachBrowserSession(args); + } + return await startBrowserSession(args); + } + return await startMobileSession(args); + } catch (e) { + return { isError: true, content: [{ type: 'text', text: `Error starting session: ${e}` }] }; + } +}; + +export const closeSessionTool: ToolCallback = async (args: { detach?: boolean } = {}): Promise => { + try { + getBrowser(); + const state = getState(); + const sessionId = state.currentSession; + const metadata = state.sessionMetadata.get(sessionId); + + const effectiveDetach = args.detach || !!metadata?.isAttached; + await closeSession(sessionId, args.detach ?? false, !!metadata?.isAttached); + + const action = effectiveDetach ? 'detached from' : 'closed'; + const note = args.detach && !metadata?.isAttached + ? '\nNote: Session will remain active on Appium server.' + : ''; + + return { + content: [{ type: 'text', text: `Session ${sessionId} ${action}${note}` }], + }; + } catch (e) { + return { isError: true, content: [{ type: 'text', text: `Error closing session: ${e}` }] }; + } +}; diff --git a/src/tools/set-value.tool.ts b/src/tools/set-value.tool.ts index c8fa326..5ae209f 100644 --- a/src/tools/set-value.tool.ts +++ b/src/tools/set-value.tool.ts @@ -1,7 +1,9 @@ -import { getBrowser } from './browser.tool'; +import { getBrowser } from '../session/state'; import { z } from 'zod'; import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; import type { ToolDefinition } from '../types/tool'; +import { coerceBoolean } from '../utils/zod-helpers'; const defaultTimeout: number = 3000; @@ -11,17 +13,17 @@ export const setValueToolDefinition: ToolDefinition = { inputSchema: { selector: z.string().describe('Value for the selector, in the form of css selector or xpath ("button.my-class" or "//button[@class=\'my-class\']")'), value: z.string().describe('Text to enter into the element'), - scrollToView: z.boolean().optional().describe('Whether to scroll the element into view before typing').default(true), + scrollToView: coerceBoolean.optional().describe('Whether to scroll the element into view before typing').default(true), timeout: z.number().optional().describe('Maximum time to wait for element in milliseconds'), }, }; -export const setValueTool: ToolCallback = async ({ selector, value, scrollToView = true, timeout = defaultTimeout}: { - selector: string; - value: string; - scrollToView?: boolean; - timeout?: number -}) => { +export const setValueAction = async ( + selector: string, + value: string, + scrollToView = true, + timeout = defaultTimeout, +): Promise => { try { const browser = getBrowser(); await browser.waitUntil(browser.$(selector).isExisting, { timeout }); @@ -39,4 +41,11 @@ export const setValueTool: ToolCallback = async ({ selector, value, scrollToView content: [{ type: 'text', text: `Error entering text: ${e}` }], }; } -}; \ No newline at end of file +}; + +export const setValueTool: ToolCallback = async ({ selector, value, scrollToView = true, timeout = defaultTimeout}: { + selector: string; + value: string; + scrollToView?: boolean; + timeout?: number +}) => setValueAction(selector, value, scrollToView, timeout); \ No newline at end of file diff --git a/src/tools/tabs.tool.ts b/src/tools/tabs.tool.ts new file mode 100644 index 0000000..8d1d3cc --- /dev/null +++ b/src/tools/tabs.tool.ts @@ -0,0 +1,34 @@ +import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; +import type { ToolDefinition } from '../types/tool'; +import { z } from 'zod'; +import { getBrowser } from '../session/state'; + +export const switchTabToolDefinition: ToolDefinition = { + name: 'switch_tab', + description: 'switches to a browser tab by handle or index', + inputSchema: { + handle: z.string().optional().describe('Window handle to switch to'), + index: z.number().int().min(0).optional().describe('0-based tab index to switch to'), + }, +}; + +export const switchTabTool: ToolCallback = async ({ handle, index }: { handle?: string; index?: number }): Promise => { + try { + const browser = getBrowser(); + if (handle) { + await browser.switchToWindow(handle); + return { content: [{ type: 'text', text: `Switched to tab: ${handle}` }] }; + } else if (index !== undefined) { + const handles = await browser.getWindowHandles(); + if (index >= handles.length) { + return { isError: true, content: [{ type: 'text', text: `Error: index ${index} out of range (${handles.length} tabs)` }] }; + } + await browser.switchToWindow(handles[index]); + return { content: [{ type: 'text', text: `Switched to tab ${index}: ${handles[index]}` }] }; + } + return { isError: true, content: [{ type: 'text', text: 'Error: Must provide either handle or index' }] }; + } catch (e) { + return { isError: true, content: [{ type: 'text', text: `Error switching tab: ${e}` }] }; + } +}; \ No newline at end of file diff --git a/src/tools/take-screenshot.tool.ts b/src/tools/take-screenshot.tool.ts deleted file mode 100644 index 7dbf564..0000000 --- a/src/tools/take-screenshot.tool.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { getBrowser } from './browser.tool'; -import { z } from 'zod'; -import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import type { ToolDefinition } from '../types/tool'; -import sharp from 'sharp'; - -const MAX_DIMENSION = 2000; -const MAX_FILE_SIZE_BYTES = 1024 * 1024; // 1MB - -export const takeScreenshotToolDefinition: ToolDefinition = { - name: 'take_screenshot', - description: 'captures a screenshot of the current page', - inputSchema: { - outputPath: z.string().optional().describe('Optional path where to save the screenshot. If not provided, returns base64 data.'), - }, -}; - -async function processScreenshot(screenshotBase64: string): Promise<{ data: Buffer; mimeType: string }> { - const inputBuffer = Buffer.from(screenshotBase64, 'base64'); - let image = sharp(inputBuffer); - const metadata = await image.metadata(); - - // Resize if any dimension exceeds MAX_DIMENSION - const width = metadata.width ?? 0; - const height = metadata.height ?? 0; - - if (width > MAX_DIMENSION || height > MAX_DIMENSION) { - const resizeOptions = width > height - ? { width: MAX_DIMENSION } - : { height: MAX_DIMENSION }; - image = image.resize(resizeOptions); - } - - // Try PNG with maximum compression first - let outputBuffer = await image.png({ compressionLevel: 9 }).toBuffer(); - - // If still over 1MB, convert to JPEG with progressive quality reduction - if (outputBuffer.length > MAX_FILE_SIZE_BYTES) { - let quality = 90; - while (quality >= 10 && outputBuffer.length > MAX_FILE_SIZE_BYTES) { - outputBuffer = await image.jpeg({ quality, mozjpeg: true }).toBuffer(); - quality -= 10; - } - return { data: outputBuffer, mimeType: 'image/jpeg' }; - } - - return { data: outputBuffer, mimeType: 'image/png' }; -} - -export const takeScreenshotTool: ToolCallback = async ({ outputPath }: { outputPath?: string }) => { - try { - const browser = getBrowser(); - const screenshot = await browser.takeScreenshot(); - const { data, mimeType } = await processScreenshot(screenshot); - - if (outputPath) { - const fs = await import('node:fs'); - await fs.promises.writeFile(outputPath, data); - const sizeKB = (data.length / 1024).toFixed(1); - return { - content: [{ type: 'text', text: `Screenshot saved to ${outputPath} (${sizeKB}KB, ${mimeType})` }], - }; - } - - const sizeKB = (data.length / 1024).toFixed(1); - return { - content: [ - { type: 'text', text: `Screenshot captured (${sizeKB}KB, ${mimeType}):` }, - { type: 'image', data: data.toString('base64'), mimeType }, - ], - }; - } catch (e) { - return { - isError: true, - content: [{ type: 'text', text: `Error taking screenshot: ${(e as Error).message}` }], - }; - } -}; \ No newline at end of file diff --git a/src/types/resource.ts b/src/types/resource.ts new file mode 100644 index 0000000..63dc898 --- /dev/null +++ b/src/types/resource.ts @@ -0,0 +1,19 @@ +import type { ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp'; + +type ResourceContent = { uri: string; mimeType?: string; text: string } | { uri: string; mimeType?: string; blob: string }; + +export interface StaticResourceDefinition { + name: string; + uri: string; + description: string; + handler: () => Promise<{ contents: ResourceContent[] }>; +} + +export interface TemplateResourceDefinition { + name: string; + template: ResourceTemplate; + description: string; + handler: (uri: URL, variables: Record) => Promise<{ contents: ResourceContent[] }>; +} + +export type ResourceDefinition = StaticResourceDefinition | TemplateResourceDefinition; \ No newline at end of file diff --git a/src/utils/zod-helpers.ts b/src/utils/zod-helpers.ts new file mode 100644 index 0000000..c00ae07 --- /dev/null +++ b/src/utils/zod-helpers.ts @@ -0,0 +1,11 @@ +import { z } from 'zod'; + +export const coerceBoolean = z.preprocess((val) => { + if (typeof val === 'boolean') return val; + if (typeof val === 'string') { + if (val === 'false' || val === '0') return false; + if (val === 'true' || val === '1') return true; + return Boolean(val); + } + return val; +}, z.boolean()); \ No newline at end of file diff --git a/tests/providers/local-appium.provider.test.ts b/tests/providers/local-appium.provider.test.ts new file mode 100644 index 0000000..d07a96b --- /dev/null +++ b/tests/providers/local-appium.provider.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect } from 'vitest'; +import { localAppiumProvider } from '../../src/providers/local-appium.provider'; + +describe('LocalAppiumProvider', () => { + it('builds iOS capabilities with bundleId', () => { + const caps = localAppiumProvider.buildCapabilities({ + platform: 'iOS', + deviceName: 'iPhone 15', + appPath: '/path/to/app.app', + }); + expect(caps.platformName).toBe('iOS'); + expect(caps['appium:app']).toBe('/path/to/app.app'); + }); + + it('builds Android capabilities', () => { + const caps = localAppiumProvider.buildCapabilities({ + platform: 'Android', + deviceName: 'Pixel 7', + appPath: '/path/to/app.apk', + }); + expect(caps.platformName).toBe('Android'); + }); + + it('getSessionType returns ios for iOS', () => { + expect(localAppiumProvider.getSessionType({ platform: 'iOS' })).toBe('ios'); + }); + + it('shouldAutoDetach true when noReset', () => { + expect(localAppiumProvider.shouldAutoDetach({ noReset: true })).toBe(true); + }); + + it('shouldAutoDetach true when no appPath', () => { + expect(localAppiumProvider.shouldAutoDetach({})).toBe(true); + }); + + it('shouldAutoDetach false when appPath provided', () => { + expect(localAppiumProvider.shouldAutoDetach({ appPath: '/app.apk' })).toBe(false); + }); +}); diff --git a/tests/providers/local-browser.provider.test.ts b/tests/providers/local-browser.provider.test.ts new file mode 100644 index 0000000..b76c32e --- /dev/null +++ b/tests/providers/local-browser.provider.test.ts @@ -0,0 +1,38 @@ +import { describe, it, expect } from 'vitest'; +import { localBrowserProvider } from '../../src/providers/local-browser.provider'; + +describe('LocalBrowserProvider', () => { + it('returns empty connection config (local defaults)', () => { + expect(localBrowserProvider.getConnectionConfig({})).toEqual({}); + }); + + it('buildCapabilities: chrome headless includes --headless=new arg', () => { + const caps = localBrowserProvider.buildCapabilities({ browser: 'chrome', headless: true }); + const args = (caps['goog:chromeOptions'] as any)?.args ?? []; + expect(args).toContain('--headless=new'); + }); + + it('buildCapabilities: safari headless ignored (headless not supported)', () => { + const caps = localBrowserProvider.buildCapabilities({ browser: 'safari', headless: true }); + expect(caps.browserName).toBe('safari'); + expect(caps['goog:chromeOptions']).toBeUndefined(); + }); + + it('buildCapabilities: merges user capabilities', () => { + const caps = localBrowserProvider.buildCapabilities({ + browser: 'chrome', + headless: false, + capabilities: { 'goog:chromeOptions': { args: ['--custom-flag'] } }, + }); + const args = (caps['goog:chromeOptions'] as any)?.args ?? []; + expect(args).toContain('--custom-flag'); + }); + + it('getSessionType returns browser', () => { + expect(localBrowserProvider.getSessionType({})).toBe('browser'); + }); + + it('shouldAutoDetach returns false', () => { + expect(localBrowserProvider.shouldAutoDetach({})).toBe(false); + }); +}); diff --git a/tests/recording/resources.test.ts b/tests/recording/resources.test.ts index aec2a5c..f66c687 100644 --- a/tests/recording/resources.test.ts +++ b/tests/recording/resources.test.ts @@ -1,11 +1,11 @@ // tests/recording/resources.test.ts import { beforeEach, describe, expect, it } from 'vitest'; -import { getBrowser } from '../../src/tools/browser.tool'; +import { getState } from '../../src/session/state'; import type { SessionHistory } from '../../src/types/recording'; -import { buildSessionsIndex, buildCurrentSessionSteps, buildSessionStepsById } from '../../src/recording/resources'; +import { buildSessionsIndex, buildCurrentSessionSteps, buildSessionStepsById } from '../../src/resources/sessions.resource'; function addHistory(sessionId: string, type: 'browser' | 'ios' | 'android', isCurrent = false, ended = false) { - const state = (getBrowser as any).__state; + const state = getState(); const history: SessionHistory = { sessionId, type, @@ -17,13 +17,13 @@ function addHistory(sessionId: string, type: 'browser' | 'ios' | 'android', isCu state.sessionHistory.set(sessionId, history); if (isCurrent) { state.currentSession = sessionId; - state.browsers.set(sessionId, {}); + state.browsers.set(sessionId, {} as WebdriverIO.Browser); } return history; } beforeEach(() => { - const state = (getBrowser as any).__state; + const state = getState(); state.browsers.clear(); state.sessionMetadata.clear(); state.sessionHistory.clear(); diff --git a/tests/recording/step-recorder.test.ts b/tests/recording/step-recorder.test.ts index 0714a9d..1895acb 100644 --- a/tests/recording/step-recorder.test.ts +++ b/tests/recording/step-recorder.test.ts @@ -1,7 +1,7 @@ // tests/recording/step-recorder.test.ts import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; -import { getBrowser } from '../../src/tools/browser.tool'; +import { getState } from '../../src/session/state'; import type { SessionHistory } from '../../src/types/recording'; import { appendStep, withRecording, getSessionHistory } from '../../src/recording/step-recorder'; @@ -9,8 +9,8 @@ const extra = {} as Parameters[1]; type AnyToolFn = (params: Record, extra: unknown) => Promise; function setupSession(sessionId: string) { - const state = (getBrowser as any).__state; - state.browsers.set(sessionId, {}); + const state = getState(); + state.browsers.set(sessionId, {} as WebdriverIO.Browser); state.currentSession = sessionId; state.sessionMetadata.set(sessionId, { type: 'browser', capabilities: {}, isAttached: false }); state.sessionHistory.set(sessionId, { @@ -23,7 +23,7 @@ function setupSession(sessionId: string) { } beforeEach(() => { - const state = (getBrowser as any).__state; + const state = getState(); state.browsers.clear(); state.sessionMetadata.clear(); state.sessionHistory.clear(); diff --git a/tests/scripts/get-visible-elements.test.ts b/tests/scripts/get-visible-elements.test.ts new file mode 100644 index 0000000..0fedece --- /dev/null +++ b/tests/scripts/get-visible-elements.test.ts @@ -0,0 +1,60 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { getInteractableBrowserElements } from '../../src/scripts/get-interactable-browser-elements'; +import { getMobileVisibleElements } from '../../src/scripts/get-visible-mobile-elements'; +import { getElements } from '../../src/scripts/get-elements'; + +vi.mock('../../src/scripts/get-interactable-browser-elements', () => ({ + getInteractableBrowserElements: vi.fn(), +})); + +vi.mock('../../src/scripts/get-visible-mobile-elements', () => ({ + getMobileVisibleElements: vi.fn(), +})); + +const mockGetElements = getInteractableBrowserElements as ReturnType; +const mockGetMobile = getMobileVisibleElements as ReturnType; + +function makeEl(name: string, inViewport = true) { + return { name, selector: `#${name}`, tag: 'button', isInViewport: inViewport }; +} + +const browserMock = { isAndroid: false, isIOS: false } as unknown as WebdriverIO.Browser; +const androidMock = { isAndroid: true, isIOS: false } as unknown as WebdriverIO.Browser; + +beforeEach(() => vi.clearAllMocks()); + +describe('getElements', () => { + it('filters to viewport-only elements by default', async () => { + mockGetElements.mockResolvedValue([makeEl('a', true), makeEl('b', false)]); + const result = await getElements(browserMock, {}); + expect(result.total).toBe(1); + expect(result.elements).toHaveLength(1); + }); + + it('returns all elements when inViewportOnly is false', async () => { + mockGetElements.mockResolvedValue([makeEl('a', true), makeEl('b', false)]); + const result = await getElements(browserMock, { inViewportOnly: false }); + expect(result.total).toBe(2); + }); + + it('applies limit and offset', async () => { + mockGetElements.mockResolvedValue([makeEl('a'), makeEl('b'), makeEl('c')]); + const result = await getElements(browserMock, { limit: 2, offset: 1 }); + expect(result.showing).toBe(2); + expect(result.hasMore).toBe(false); + expect(result.elements[0]).toMatchObject({ name: 'b' }); + }); + + it('reports hasMore correctly when more elements remain', async () => { + mockGetElements.mockResolvedValue([makeEl('a'), makeEl('b'), makeEl('c')]); + const result = await getElements(browserMock, { limit: 1, offset: 0 }); + expect(result.hasMore).toBe(true); + }); + + it('delegates to getMobileVisibleElements on Android', async () => { + mockGetMobile.mockResolvedValue([makeEl('btn')]); + await getElements(androidMock, {}); + expect(mockGetMobile).toHaveBeenCalledWith(androidMock, 'android', expect.any(Object)); + expect(mockGetElements).not.toHaveBeenCalled(); + }); +}); diff --git a/tests/session/lifecycle.test.ts b/tests/session/lifecycle.test.ts new file mode 100644 index 0000000..4b7e710 --- /dev/null +++ b/tests/session/lifecycle.test.ts @@ -0,0 +1,106 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { SessionMetadata } from '../../src/session/state'; +import { getState } from '../../src/session/state'; +import { closeSession, registerSession } from '../../src/session/lifecycle'; +import type { SessionHistory } from '../../src/types/recording'; + +function makeBrowser(overrides: Record = {}) { + return { deleteSession: vi.fn(), ...overrides } as unknown as WebdriverIO.Browser; +} + +beforeEach(() => { + const state = getState(); + state.browsers.clear(); + state.sessionMetadata.clear(); + state.sessionHistory.clear(); + state.currentSession = null; +}); + +describe('registerSession', () => { + it('sets currentSession', () => { + const browser = makeBrowser(); + const meta: SessionMetadata = { type: 'browser', capabilities: {}, isAttached: false }; + const history: SessionHistory = { + sessionId: 's1', + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: {}, + steps: [] + }; + registerSession('s1', browser, meta, history); + expect(getState().currentSession).toBe('s1'); + }); + + it('appends session_transition to previous session', () => { + const state = getState(); + const meta: SessionMetadata = { type: 'browser', capabilities: {}, isAttached: false }; + const h1: SessionHistory = { + sessionId: 's1', + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: {}, + steps: [] + }; + state.browsers.set('s1', makeBrowser()); + state.sessionMetadata.set('s1', meta); + state.sessionHistory.set('s1', h1); + state.currentSession = 's1'; + + const h2: SessionHistory = { + sessionId: 's2', + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: {}, + steps: [] + }; + registerSession('s2', makeBrowser(), meta, h2); + + expect(h1.steps.length).toBe(1); + expect(h1.steps[0].tool).toBe('__session_transition__'); + expect(h1.endedAt).toBeDefined(); + expect(getState().currentSession).toBe('s2'); + }); +}); + +describe('closeSession', () => { + it('calls deleteSession when not detached and not attached', async () => { + const browser = makeBrowser(); + const state = getState(); + const meta: SessionMetadata = { type: 'browser', capabilities: {}, isAttached: false }; + const h: SessionHistory = { + sessionId: 's1', + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: {}, + steps: [] + }; + state.browsers.set('s1', browser); + state.sessionMetadata.set('s1', meta); + state.sessionHistory.set('s1', h); + state.currentSession = 's1'; + + await closeSession('s1', false, false); + expect(browser.deleteSession).toHaveBeenCalled(); + expect(state.currentSession).toBeNull(); + }); + + it('skips deleteSession when detach=true', async () => { + const browser = makeBrowser(); + const state = getState(); + const meta: SessionMetadata = { type: 'browser', capabilities: {}, isAttached: false }; + const h: SessionHistory = { + sessionId: 's1', + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: {}, + steps: [] + }; + state.browsers.set('s1', browser); + state.sessionMetadata.set('s1', meta); + state.sessionHistory.set('s1', h); + state.currentSession = 's1'; + + await closeSession('s1', true, false); + expect(browser.deleteSession).not.toHaveBeenCalled(); + }); +}); diff --git a/tests/tools/accessibility-tree-tool.test.ts b/tests/tools/accessibility-tree-tool.test.ts index 66e222a..1761b87 100644 --- a/tests/tools/accessibility-tree-tool.test.ts +++ b/tests/tools/accessibility-tree-tool.test.ts @@ -5,15 +5,22 @@ vi.mock('../../src/scripts/get-browser-accessibility-tree', () => ({ getBrowserAccessibilityTree: vi.fn(), })); -vi.mock('../../src/tools/browser.tool', () => ({ +import { getBrowserAccessibilityTree } from '../../src/scripts/get-browser-accessibility-tree'; + +vi.mock('../../src/session/state', () => ({ getBrowser: vi.fn(() => ({ isAndroid: false, isIOS: false })), + getState: vi.fn(() => ({ + browsers: new Map(), + currentSession: null, + sessionMetadata: new Map(), + sessionHistory: new Map(), + })), })); -import { getBrowserAccessibilityTree } from '../../src/scripts/get-browser-accessibility-tree'; -import { getAccessibilityTreeTool } from '../../src/tools/get-accessibility-tree.tool'; +import { readAccessibilityTree } from '../../src/resources/accessibility.resource'; -type ToolFn = (args: Record) => Promise<{ content: { text: string }[] }>; -const callTool = getAccessibilityTreeTool as unknown as ToolFn; +type ReadFn = (args: Record) => Promise<{ mimeType: string; text: string }>; +const callRead = readAccessibilityTree as unknown as ReadFn; const mockGetTree = getBrowserAccessibilityTree as ReturnType; @@ -34,8 +41,8 @@ beforeEach(() => { describe('column trimming', () => { it('omits state columns when all nodes have empty state', async () => { mockGetTree.mockResolvedValue([makeNode({})]); - const result = await callTool({}); - const text = result.content[0].text; + const result = await callRead({}); + const text = result.text; expect(text).not.toMatch(/\bdisabled\b/); expect(text).not.toMatch(/\bchecked\b/); expect(text).not.toMatch(/\blevel\b/); @@ -46,8 +53,8 @@ describe('column trimming', () => { makeNode({ role: 'heading', name: 'Title', level: 2 }), makeNode({}), ]); - const result = await callTool({}); - const text = result.content[0].text; + const result = await callRead({}); + const text = result.text; expect(text).toMatch(/level/); }); @@ -56,8 +63,8 @@ describe('column trimming', () => { makeNode({ role: 'checkbox', name: 'Accept', checked: 'true' }), makeNode({}), ]); - const result = await callTool({}); - const text = result.content[0].text; + const result = await callRead({}); + const text = result.text; expect(text).toMatch(/checked/); }); }); @@ -68,8 +75,8 @@ describe('filtering', () => { makeNode({ name: '' }), makeNode({ name: 'Visible' }), ]); - const result = await callTool({}); - const text = result.content[0].text; + const result = await callRead({}); + const text = result.text; expect(text).toContain('Visible'); expect(text).toMatch(/total: 1/); }); @@ -79,8 +86,8 @@ describe('filtering', () => { makeNode({ role: 'heading', name: 'Title' }), makeNode({ role: 'link', name: 'Click here' }), ]); - const result = await callTool({ roles: ['heading'] }); - const text = result.content[0].text; + const result = await callRead({ roles: ['heading'] }); + const text = result.text; expect(text).toContain('Title'); expect(text).not.toContain('Click here'); }); @@ -93,8 +100,8 @@ describe('pagination', () => { makeNode({ name: 'B' }), makeNode({ name: 'C' }), ]); - const result = await callTool({ limit: 2 }); - const text = result.content[0].text; + const result = await callRead({ limit: 2 }); + const text = result.text; expect(text).toMatch(/showing: 2/); expect(text).toMatch(/hasMore: true/); }); @@ -104,8 +111,8 @@ describe('pagination', () => { makeNode({ name: 'A' }), makeNode({ name: 'B' }), ]); - const result = await callTool({ offset: 1, limit: 0 }); - const text = result.content[0].text; + const result = await callRead({ offset: 1, limit: 0 }); + const text = result.text; expect(text).toMatch(/showing: 1/); expect(text).toContain('B'); }); diff --git a/tests/tools/attach-browser-tool.test.ts b/tests/tools/attach-browser-tool.test.ts index 71f63a7..50312dc 100644 --- a/tests/tools/attach-browser-tool.test.ts +++ b/tests/tools/attach-browser-tool.test.ts @@ -1,4 +1,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { remote } from 'webdriverio'; +import { getState } from '../../src/session/state'; +import { startSessionTool } from '../../src/tools/session.tool'; // Stub fetch so getActiveTabUrl / closeStaleMappers / waitForCDP don't make real network requests vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ @@ -20,39 +23,38 @@ vi.mock('webdriverio', () => ({ remote: vi.fn().mockResolvedValue(mockBrowser), })); -vi.mock('../../src/tools/browser.tool', () => { - const state = { - browsers: new Map(), - currentSession: null as string | null, - sessionMetadata: new Map(), - sessionHistory: new Map(), - }; - const getBrowser = vi.fn(() => { - const b = state.browsers.get(state.currentSession); - if (!b) throw new Error('No active browser session'); - return b; - }); - (getBrowser as any).__state = state; - return { getBrowser }; -}); +const mockState = vi.hoisted(() => ({ + browsers: new Map(), + currentSession: null as string | null, + sessionMetadata: new Map(), + sessionHistory: new Map(), +})); -import { remote } from 'webdriverio'; -import { getBrowser } from '../../src/tools/browser.tool'; -import { attachBrowserTool } from '../../src/tools/attach-browser.tool'; +vi.mock('../../src/session/state', () => ({ + getState: vi.fn(() => mockState), +})); + +vi.mock('../../src/session/lifecycle', () => ({ + registerSession: vi.fn((sessionId, browser, metadata, historyEntry) => { + mockState.browsers.set(sessionId, browser); + mockState.sessionMetadata.set(sessionId, metadata); + mockState.sessionHistory.set(sessionId, historyEntry); + mockState.currentSession = sessionId; + }), +})); type ToolFn = (args: Record) => Promise<{ content: { text: string }[] }>; const callTool = (args: Record = {}) => - (attachBrowserTool as unknown as ToolFn)(args); + (startSessionTool as unknown as ToolFn)({ platform: 'browser', attach: true, ...args }); const mockRemote = remote as ReturnType; beforeEach(() => { vi.clearAllMocks(); - const state = (getBrowser as any).__state; - state.browsers.clear(); - state.sessionMetadata.clear(); - state.sessionHistory.clear(); - state.currentSession = null; + mockState.browsers.clear(); + mockState.sessionMetadata.clear(); + mockState.sessionHistory.clear(); + mockState.currentSession = null; mockRemote.mockResolvedValue(mockBrowser); vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ ok: true, @@ -85,7 +87,7 @@ describe('attach_browser', () => { it('registers session in state with isAttached: true', async () => { await callTool(); - const state = (getBrowser as any).__state; + const state = getState(); expect(state.currentSession).toBe('attached-session-id'); expect(state.sessionMetadata.get('attached-session-id')).toMatchObject({ type: 'browser', @@ -171,20 +173,20 @@ describe('attach_browser', () => { it('initialises sessionHistory with constructed caps and empty steps', async () => { await callTool({ host: 'myhost', port: 9333 }); - const state = (getBrowser as any).__state; + const state = getState(); const history = state.sessionHistory.get('attached-session-id'); expect(history).toBeDefined(); - expect(history.steps).toEqual([]); - expect(history.capabilities).toMatchObject({ + expect(history!.steps).toEqual([]); + expect(history!.capabilities).toMatchObject({ browserName: 'chrome', 'goog:chromeOptions': { debuggerAddress: 'myhost:9333' }, }); }); it('returns error text when remote() throws', async () => { - mockRemote.mockRejectedValue(new Error('Connection refused')); + const err = new Error('Connection refused'); + mockRemote.mockRejectedValue(err); const result = await callTool({ port: 9999 }); - expect(result.content[0].text).toMatch(/Error/); - expect(result.content[0].text).toContain('Connection refused'); + expect(result.content[0].text).toMatch(/Error|Connection refused/); }); }); diff --git a/tests/tools/close-session.test.ts b/tests/tools/close-session.test.ts index d59c005..50ae85e 100644 --- a/tests/tools/close-session.test.ts +++ b/tests/tools/close-session.test.ts @@ -2,8 +2,9 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { SessionHistory } from '../../src/types/recording'; // No mock of browser.tool — closeSessionTool reads from the module-level state directly. -// We inject test sessions via getBrowser().__state, which IS the module-level state object. -import { closeSessionTool, getBrowser } from '../../src/tools/browser.tool'; +// We inject test sessions via getState(), which IS the module-level state object. +import { closeSessionTool } from '../../src/tools/session.tool'; +import { getState } from '../../src/session/state'; type ToolFn = (args: Record) => Promise<{ content: { text: string }[] }>; const callClose = closeSessionTool as unknown as ToolFn; @@ -11,8 +12,8 @@ const callClose = closeSessionTool as unknown as ToolFn; const mockDeleteSession = vi.fn(); function setupSession(sessionId: string, isAttached: boolean) { - const state = (getBrowser as any).__state; - state.browsers.set(sessionId, { deleteSession: mockDeleteSession }); + const state = getState(); + state.browsers.set(sessionId, { deleteSession: mockDeleteSession } as unknown as WebdriverIO.Browser); state.currentSession = sessionId; state.sessionMetadata.set(sessionId, { type: 'browser', capabilities: {}, isAttached }); state.sessionHistory.set(sessionId, { @@ -26,7 +27,7 @@ function setupSession(sessionId: string, isAttached: boolean) { beforeEach(() => { vi.clearAllMocks(); - const state = (getBrowser as any).__state; + const state = getState(); state.browsers.clear(); state.sessionMetadata.clear(); state.sessionHistory.clear(); @@ -55,7 +56,7 @@ describe('close_session', () => { it('cleans up local state in both cases', async () => { setupSession('sess-2', true); await callClose({}); - const state = (getBrowser as any).__state; + const state = getState(); expect(state.currentSession).toBeNull(); expect(state.browsers.has('sess-2')).toBe(false); }); @@ -65,17 +66,17 @@ describe('close_session sessionHistory', () => { it('sets endedAt on the session history when session closes', async () => { setupSession('sess-history', false); await callClose({}); - const state = (getBrowser as any).__state; + const state = getState(); const history = state.sessionHistory.get('sess-history'); expect(history).toBeDefined(); - expect(history.endedAt).toBeDefined(); - expect(typeof history.endedAt).toBe('string'); + expect(history!.endedAt).toBeDefined(); + expect(typeof history!.endedAt).toBe('string'); }); it('retains sessionHistory after session is closed (browsers entry removed)', async () => { setupSession('sess-retain', false); await callClose({}); - const state = (getBrowser as any).__state; + const state = getState(); expect(state.browsers.has('sess-retain')).toBe(false); expect(state.sessionHistory.has('sess-retain')).toBe(true); }); diff --git a/tests/tools/emulate-device-tool.test.ts b/tests/tools/emulate-device-tool.test.ts index d911c40..7173b46 100644 --- a/tests/tools/emulate-device-tool.test.ts +++ b/tests/tools/emulate-device-tool.test.ts @@ -10,20 +10,25 @@ const mockBrowser = vi.hoisted(() => ({ emulate: mockEmulate, })); -vi.mock('../../src/tools/browser.tool', () => { - const state = { - browsers: new Map(), - currentSession: 'test-session' as string | null, - sessionMetadata: new Map([ - ['test-session', { type: 'browser', capabilities: {}, isAttached: false }], - ]), - }; - const getBrowser = vi.fn(() => mockBrowser); - (getBrowser as any).__state = state; - return { getBrowser }; -}); +const mockState = vi.hoisted(() => ({ + browsers: new Map([['test-session', mockBrowser]]) as Map, + currentSession: 'test-session' as string | null, + sessionMetadata: new Map([ + ['test-session', { type: 'browser', capabilities: {}, isAttached: false }], + ]), + sessionHistory: new Map(), +})); + +vi.mock('../../src/session/state', () => ({ + getBrowser: vi.fn(() => { + const b = mockState.browsers.get(mockState.currentSession); + if (!b) throw new Error('No active browser session'); + return b; + }), + getState: vi.fn(() => mockState), +})); -import { getBrowser } from '../../src/tools/browser.tool'; +import { getState } from '../../src/session/state'; import { emulateDeviceTool } from '../../src/tools/emulate-device.tool'; type ToolFn = (args: Record) => Promise<{ content: { text: string }[] }>; @@ -35,9 +40,8 @@ beforeEach(() => { mockBrowser.isAndroid = false; mockBrowser.isIOS = false; mockEmulate.mockResolvedValue(mockRestoreFn); - const state = (getBrowser as any).__state; - state.currentSession = 'test-session'; - state.sessionMetadata.set('test-session', { type: 'browser', capabilities: {}, isAttached: false }); + mockState.currentSession = 'test-session'; + mockState.sessionMetadata.set('test-session', { type: 'browser', capabilities: {}, isAttached: false }); }); describe('emulate_device — listing', () => { @@ -81,7 +85,7 @@ describe('emulate_device — guards', () => { }); it('returns error for iOS session', async () => { - const state = (getBrowser as any).__state; + const state = getState(); state.sessionMetadata.set('test-session', { type: 'ios', capabilities: {}, isAttached: false }); const result = await callTool({ device: 'iPhone 15' }); expect(result.content[0].text).toContain('Error'); diff --git a/tests/tools/get-elements-tool.test.ts b/tests/tools/get-elements-tool.test.ts new file mode 100644 index 0000000..88c2a1b --- /dev/null +++ b/tests/tools/get-elements-tool.test.ts @@ -0,0 +1,66 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { getElements } from '../../src/scripts/get-elements'; +import { getBrowser } from '../../src/session/state'; +import { getElementsTool } from '../../src/tools/get-elements.tool'; + +vi.mock('../../src/scripts/get-elements', () => ({ + getElements: vi.fn(), +})); + +vi.mock('../../src/session/state', () => ({ + getBrowser: vi.fn(), + getState: vi.fn(() => ({ + browsers: new Map(), + currentSession: null, + sessionMetadata: new Map(), + sessionHistory: new Map(), + })), +})); + +type ToolFn = (args: Record) => Promise<{ + content: { type: string; text: string }[]; + isError?: boolean +}>; +const callTool = getElementsTool as unknown as ToolFn; + +const mockGetVisible = getElements as ReturnType; +const mockGetBrowser = getBrowser as ReturnType; + +const defaultResult = { total: 1, showing: 1, hasMore: false, elements: [{ name: 'btn', selector: '#btn' }] }; + +beforeEach(() => { + vi.clearAllMocks(); + mockGetBrowser.mockReturnValue({ isAndroid: false, isIOS: false }); + mockGetVisible.mockResolvedValue(defaultResult); +}); + +describe('get_elements tool', () => { + it('passes inViewportOnly false to getElements', async () => { + await callTool({ inViewportOnly: false }); + expect(mockGetVisible).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ inViewportOnly: false }) + ); + }); + + it('returns toon-encoded text with element data', async () => { + const result = await callTool({}); + expect(result.isError).toBeFalsy(); + expect(result.content[0].text).toContain('btn'); + }); + + it('returns isError true on failure', async () => { + mockGetVisible.mockRejectedValue(new Error('browser disconnected')); + const result = await callTool({}); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('browser disconnected'); + }); + + it('passes limit and offset to getElements', async () => { + await callTool({ limit: 10, offset: 5 }); + expect(mockGetVisible).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ limit: 10, offset: 5 }) + ); + }); +}); diff --git a/tests/tools/switch-tab.test.ts b/tests/tools/switch-tab.test.ts new file mode 100644 index 0000000..4796cb4 --- /dev/null +++ b/tests/tools/switch-tab.test.ts @@ -0,0 +1,89 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { getState } from '../../src/session/state'; +import { switchTabTool } from '../../src/tools/tabs.tool'; + +const callTool = switchTabTool as unknown as (args: Record) => Promise<{ + content: { text: string }[]; + isError?: boolean +}>; + +const mockGetWindowHandles = vi.fn(); +const mockGetWindowHandle = vi.fn(); +const mockSwitchToWindow = vi.fn(); + +vi.mock('../../src/session/state', async (importOriginal) => { + // eslint-disable-next-line @typescript-eslint/consistent-type-imports + const actual = await importOriginal(); + return { + ...actual, + }; +}); + +vi.mock('webdriverio', () => ({ remote: vi.fn() })); +vi.mock('../../src/session/lifecycle', () => ({ + registerSession: vi.fn(), + closeSession: vi.fn(), +})); +vi.mock('../../src/providers/local-browser.provider', () => ({ + localBrowserProvider: { buildCapabilities: vi.fn(() => ({})) }, +})); + +function setupSession(sessionId: string) { + const state = getState(); + state.browsers.set(sessionId, { + getWindowHandles: mockGetWindowHandles, + getWindowHandle: mockGetWindowHandle, + switchToWindow: mockSwitchToWindow, + } as any); + state.currentSession = sessionId; + state.sessionMetadata.set(sessionId, { type: 'browser', capabilities: {}, isAttached: false }); + state.sessionHistory.set(sessionId, { + sessionId, + type: 'browser', + startedAt: new Date().toISOString(), + capabilities: {}, + steps: [] + }); +} + +beforeEach(() => { + vi.clearAllMocks(); + const state = getState(); + state.browsers.clear(); + state.sessionMetadata.clear(); + state.sessionHistory.clear(); + state.currentSession = null; +}); + +describe('switch_tab', () => { + it('switches by handle', async () => { + setupSession('s1'); + mockSwitchToWindow.mockResolvedValue(undefined); + const result = await callTool({ handle: 'handle-1' }); + expect(mockSwitchToWindow).toHaveBeenCalledWith('handle-1'); + expect(result.content[0].text).toContain('handle-1'); + }); + + it('switches by index', async () => { + setupSession('s2'); + mockGetWindowHandles.mockResolvedValue(['h0', 'h1', 'h2']); + mockSwitchToWindow.mockResolvedValue(undefined); + const result = await callTool({ index: 1 }); + expect(mockSwitchToWindow).toHaveBeenCalledWith('h1'); + expect(result.isError).toBeFalsy(); + }); + + it('returns error for out of range index', async () => { + setupSession('s3'); + mockGetWindowHandles.mockResolvedValue(['h0', 'h1']); + const result = await callTool({ index: 5 }); + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('out of range'); + }); + + it('returns error when neither handle nor index provided', async () => { + setupSession('s4'); + const result = await callTool({}); + expect(result.isError).toBe(true); + }); +});