diff --git a/.gitignore b/.gitignore index 53ff28412..fce89463a 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,4 @@ DerivedData /.pr-learning /repros /.xcodebuildmcp +/out.nosync diff --git a/CHANGELOG.md b/CHANGELOG.md index f924cc6f7..d1c58fb5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,64 @@ ### Added - Added `nextSteps` hint lines to MCP `structuredContent` and CLI `--output json` envelopes so agents can consume follow-up actions without scraping text. CLI JSON renders shell command lines; MCP structured content renders MCP tool-call hints. Structured result schemas that include `nextSteps` now use schema version 2; existing version 1 schema files remain available for current validators. +- Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged. +- Added `batch` for executing multiple AXe UI automation steps in one simulator session. +- Added `wait_for_ui` for polling runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. +- Added structured element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions. +- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it. +- Added `drag` for element-ref based drag gestures, enabling agents to expand foreground sheets and drag real scroll/list regions without raw coordinate guesses. + +### Changed + +- Runtime snapshot guidance no longer advertises synthetic sheet swipe targets for foreground sheets. Agents should use real sheet grabber expansion and real descendant scroll/list targets with `drag` instead of inferred app/window-root sheet swipes. + +### Fixed + +- Fixed `swipe` distance handling so distance is a normalized stroke fraction used for endpoint calculation, and improved sheet/list scroll guidance so real descendant scroll containers are preferred over application/window root fallbacks. +- Fixed compact runtime snapshots so top-level app and window refs are not advertised as swipe targets just because a generic descendant overflows their frame. +- Fixed `wait_for_ui` focus waits so elements that do not expose focus state return a typed recoverable error instead of timing out. +- Fixed invalid `touch` calls so structured output no longer reports a fake touch event when neither `down` nor `up` was requested. +- Fixed compact runtime snapshots so standalone `other` elements, such as keyboard suggestions, are not advertised as swipe targets unless they behave like scrollable containers. +- Fixed runtime snapshots so off-screen elements, and clipped elements whose activation point is offscreen, are not advertised as actionable targets. +- Fixed full-screen swipe gestures so app-level scroll refs avoid unsafe screen edges such as the status bar and notch area. +- Clarified runtime snapshot tips so agents know element refs are snapshot-specific and must come from the latest `snapshot_ui` or `wait_for_ui` output, and only show swipe guidance when the snapshot includes a scroll ref. +- Made `wait_for_ui` `textContains` matching case-insensitive so assertions survive platform text normalization such as keyboard auto-capitalization, treat duplicate exact text matches as successful presence assertions, narrow broad selectors by text before reporting ambiguity, reject `text` on non-`textContains` predicates instead of silently ignoring it, and keep recoverable-error candidates compact in structured output. +- Fixed `tap` on SwiftUI switch element refs by using a touch down/up activation instead of AXe's coordinate tap path. +- Fixed selector fallback for AXe duplicate-match diagnostics that include parenthesized match counts. +- Fixed semantic taps and text-field focusing so element refs with duplicate AXe selectors use their resolved snapshot coordinates immediately. +- Fixed bottom-clipped UI automation targets so taps, touches, and long presses use a visible activation point instead of the hidden center of the accessibility frame. +- Fixed app-level horizontal swipes so full-screen refs use a content-area y-coordinate instead of missing horizontal carousels by swiping near the hero area. +- Fixed CLI commands with `simulatorId`-only contracts so `simulatorName` session defaults are resolved to a simulator ID without adding conflicting simulator arguments to tools that already accept `simulatorName`, and fixed simulator lifecycle tools so name-only defaults resolve before simctl operations. +- Fixed `snapshot_ui` and `wait_for_ui` next steps so they use the resolved simulator ID instead of leaking `SIMULATOR_UUID` placeholders. +- Fixed the Weather example app so saved-location rows are not reused as search-result rows after editing locations. +- Fixed the Weather example app's current-location button so it selects the current saved location instead of appearing as a no-op UI automation target. +- Fixed `type_text` so AXe-unsupported international/accented characters fail before focusing the field, with a clear recoverable error instead of a generic typing failure. +- Fixed `snapshot_ui` next-step guidance so the suggested tap ref prefers useful tappable controls over text fields, sheet grabbers, close buttons, and clear-search buttons. +- Fixed compact runtime snapshot JSON so target ordering matches compact text output and prioritizes useful content targets before low-value sheet chrome. +- Fixed `wait_for_ui` success output so compact text and JSON include the matched elements that satisfied the wait predicate. +- Fixed `wait_for_ui textContains` so duplicate elements with the same matching visible text satisfy presence-style assertions instead of reporting ambiguity. +- Fixed CLI `--style minimal` so final text output suppresses generated next steps for daemon-routed tools as intended. +- Fixed `snapshot_ui` next-step guidance so snapshots with no tappable targets no longer suggest tapping the first non-actionable element. +- Fixed next-step rendering for tools shared across workflows so follow-up commands prefer the workflow that produced the result instead of drifting to another workflow alias. +- Fixed `snapshot_ui` next-step guidance so calculator-style utility and operator buttons no longer outrank more useful digit/content controls. +- Fixed `snapshot_ui` compact text, JSON, and next-step guidance so already-selected segmented controls no longer outrank unselected choices. +- Fixed compact runtime snapshots and next-step guidance so sheet grabbers remain visible as low-priority targets, allowing agents to expand or dismiss sheets without outranking useful content controls. +- Fixed compact wait-match rows so static assertion matches render with `none` instead of exposing low-level long-press/touch actions as if they were primary agent actions. +- Fixed compact runtime snapshot ordering and next-step guidance so destructive controls such as Remove/Delete are demoted behind safer content and navigation targets. +- Clarified simulator keyboard shortcut failures when Simulator.app is running without a visible device window. +- Fixed hardware button automation so successful button presses wait briefly for system UI transitions before returning, reducing stale immediate follow-up snapshots. +- Fixed runtime snapshots so modal sheet hosts remain swipeable after the currently visible sheet content fits inside the viewport. +- Fixed `wait_for_ui` validation so unknown JSON fields are rejected instead of silently broadening waits. +- Fixed CLI numeric array flags so comma-separated values such as `--key-codes 23,18,14` are parsed as numbers instead of failing validation. +- Fixed runtime snapshots so unlabeled internal custom-action nodes, such as SpringBoard icon subviews, are no longer advertised as likely tap targets. +- Fixed AXe bundling so downloaded artifacts must report the pinned AXe version, and dirty local AXe builds require an explicit opt-in. +- Fixed runtime snapshot tips so compact output names all target-ref action tools, including `long_press` and `touch`. +- Clarified key press and key sequence tool descriptions so agents know key codes are AXe/macOS virtual key codes and should prefer `type_text` for text entry. +- Clarified `wait_for_ui` timeout recovery hints so agents know selector fields match exact values and should use `textContains` for partial visible text. +- Fixed UI action success next steps so agents are prompted to refresh runtime snapshots before reusing element refs after actions such as swipes. +- Fixed `snapshot_ui` next-step guidance so state-changing controls such as segmented units and switches remain available in targets without being promoted as generic tap or batch suggestions. +- Fixed `snapshot_ui` tap next-step priority so content-rich cards are suggested before navigation controls like Settings. +- Fixed successful UI action results so they include a fresh runtime snapshot and actionable next steps, reducing follow-up refresh calls after taps, typing, swipes, and batches. ## [2.5.2] @@ -642,4 +700,3 @@ Please note that the UI automation features are an early preview and currently i - Initial release of XcodeBuildMCP - Basic support for building iOS and macOS applications - diff --git a/example_projects/.xcodebuildmcp/config.yaml b/example_projects/.xcodebuildmcp/config.yaml index 57308faa0..7b569a1d5 100644 --- a/example_projects/.xcodebuildmcp/config.yaml +++ b/example_projects/.xcodebuildmcp/config.yaml @@ -4,13 +4,11 @@ sessionDefaultsProfiles: workspacePath: ./iOS_Calculator/CalculatorApp.xcworkspace scheme: CalculatorApp simulatorName: iPhone 17 Pro - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 simulatorPlatform: iOS Simulator ios-test: projectPath: ./iOS/MCPTest.xcodeproj scheme: MCPTest simulatorName: iPhone 17 Pro - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 simulatorPlatform: iOS Simulator macos-test: projectPath: ./macOS/MCPTest.xcodeproj diff --git a/example_projects/Weather/.xcodebuildmcp/config.yaml b/example_projects/Weather/.xcodebuildmcp/config.yaml index 6663899fa..899ef7cf5 100644 --- a/example_projects/Weather/.xcodebuildmcp/config.yaml +++ b/example_projects/Weather/.xcodebuildmcp/config.yaml @@ -7,7 +7,7 @@ sentryDisabled: false sessionDefaults: projectPath: Weather.xcodeproj scheme: Weather - simulatorName: iPhone 17 Pro + simulatorName: iPhone 17 Pro Max setupPreferences: platforms: - iOS diff --git a/example_projects/Weather/README.md b/example_projects/Weather/README.md index 8becf8103..6e7aa9325 100644 --- a/example_projects/Weather/README.md +++ b/example_projects/Weather/README.md @@ -2,22 +2,14 @@ Atmos Weather is a native SwiftUI weather app prototype for iOS. -## Launch with mock weather data +## Launch -Build and run the app with XcodeBuildMCP first: +Build and run the app with XcodeBuildMCP: ```bash ../../build/cli.js simulator build-and-run ``` -Then relaunch the installed app with the mock API argument: - -```bash -../../build/cli.js simulator launch-app \ - --bundle-id com.sentry.weather.Weather \ - --args=--mock-weather-api -``` - ## JSON fixtures Fixture JSON files live in: @@ -98,4 +90,4 @@ Run the app test suite through XcodeBuildMCP: ../../build/cli.js simulator test ``` -UI tests inject `--mock-weather-api` themselves so they do not depend on the production API endpoint. \ No newline at end of file +The app uses bundled deterministic weather data so UI tests do not depend on the production API endpoint. \ No newline at end of file diff --git a/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift b/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift index 6217871d5..730549e5e 100644 --- a/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift +++ b/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift @@ -29,8 +29,10 @@ struct MockWeatherAPIClient: WeatherAPIClient, Sendable { guard !trimmed.isEmpty else { return [] } let needle = trimmed.localizedLowercase - return fixtures.searchPool.filter { location in - location.name.localizedLowercase.contains(needle) + var seenLocationIDs = Set() + return (fixtures.locations + fixtures.searchPool).filter { location in + guard seenLocationIDs.insert(location.id).inserted else { return false } + return location.name.localizedLowercase.contains(needle) || location.subtitle.localizedLowercase.contains(needle) || (location.country?.localizedLowercase.contains(needle) ?? false) } diff --git a/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift b/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift index 7b643f61e..765a9f094 100644 --- a/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift +++ b/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift @@ -103,7 +103,7 @@ struct LocationPickerView: View { } private var currentLocationButton: some View { - Button(action: {}) { + Button(action: selectCurrentLocation) { HStack(spacing: 12) { Image(systemName: "location.fill") .font(.system(size: 14)) @@ -145,6 +145,7 @@ struct LocationPickerView: View { onSelect: { select(location) }, onRemove: { remove(location) } ) + .id("saved-\(location.id)-\(isEditing)") } } else if isLoading { ForEach(0..<3, id: \.self) { _ in SearchSkeletonRow() } @@ -160,6 +161,7 @@ struct LocationPickerView: View { onPreview: { preview(location) }, onAdd: { add(location) } ) + .id("search-\(location.id)-\(isSaved(location))-\(justAddedID == location.id)") } } } @@ -229,6 +231,11 @@ struct LocationPickerView: View { justAddedID = location.id } + private func selectCurrentLocation() { + guard let currentLocation = savedLocations.first else { return } + select(currentLocation) + } + private func clearAddedIndicator() async { guard let id = justAddedID else { return } try? await Task.sleep(for: .milliseconds(1_400)) diff --git a/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift b/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift index a6412cfbb..1fd30bbd3 100644 --- a/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift +++ b/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift @@ -96,6 +96,7 @@ struct SearchLocationRow: View { .frame(maxWidth: .infinity, alignment: .leading) } .buttonStyle(.plain) + .accessibilityValue(saved || added ? "saved" : "not saved") VStack(alignment: .trailing, spacing: 3) { Text(WeatherUnitFormatter.temperatureString(location.temperatureC, units: units)) diff --git a/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift b/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift index a571db22e..e118c23b0 100644 --- a/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift +++ b/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift @@ -132,6 +132,7 @@ private struct SegmentRow: View { Button(optionLabel(option)) { selection = option } + .accessibilityValue(selection == option ? "selected" : "not selected") .font(.system(size: 13, weight: .medium)) .foregroundStyle(selection == option ? .black : .white) .padding(.horizontal, 14) diff --git a/example_projects/Weather/Weather/WeatherApp.swift b/example_projects/Weather/Weather/WeatherApp.swift index 3ec87c754..d1e5a4d80 100644 --- a/example_projects/Weather/Weather/WeatherApp.swift +++ b/example_projects/Weather/Weather/WeatherApp.swift @@ -10,15 +10,7 @@ import SwiftUI @main struct WeatherApp: App { - private let weatherService: WeatherService - - init() { - if CommandLine.arguments.contains("--mock-weather-api") { - weatherService = .mock - } else { - weatherService = .production - } - } + private let weatherService: WeatherService = .mock var body: some Scene { WindowGroup { diff --git a/example_projects/Weather/WeatherTests/WeatherTests.swift b/example_projects/Weather/WeatherTests/WeatherTests.swift index 1a8d8f9bb..c0a2ae8bb 100644 --- a/example_projects/Weather/WeatherTests/WeatherTests.swift +++ b/example_projects/Weather/WeatherTests/WeatherTests.swift @@ -47,6 +47,9 @@ struct WeatherTests { let byCountry = try await service.searchLocations(matching: "gb") #expect(byCountry.map(\.name).contains("London")) + + let savedLocationByName = try await service.searchLocations(matching: "tokyo") + #expect(savedLocationByName.contains { $0.name == "Tokyo" }) } @Test func emptySearchReturnsNoResults() async throws { diff --git a/example_projects/Weather/WeatherUITests/WeatherUITests.swift b/example_projects/Weather/WeatherUITests/WeatherUITests.swift index 75c98bb47..dcffc8058 100644 --- a/example_projects/Weather/WeatherUITests/WeatherUITests.swift +++ b/example_projects/Weather/WeatherUITests/WeatherUITests.swift @@ -69,7 +69,6 @@ final class WeatherUITests: XCTestCase { @MainActor private func launchApp() -> XCUIApplication { let app = XCUIApplication() - app.launchArguments.append("--mock-weather-api") app.launch() return app } diff --git a/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift b/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift index 9b7f410fc..75615e972 100644 --- a/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift +++ b/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift @@ -20,7 +20,6 @@ final class WeatherUITestsLaunchTests: XCTestCase { @MainActor func testLaunch() throws { let app = XCUIApplication() - app.launchArguments.append("--mock-weather-api") app.launch() // Insert steps here to perform after app launch but before taking a screenshot, diff --git a/example_projects/iOS/.xcodebuildmcp/config.yaml b/example_projects/iOS/.xcodebuildmcp/config.yaml index 568d5e4d2..ee106f9ab 100644 --- a/example_projects/iOS/.xcodebuildmcp/config.yaml +++ b/example_projects/iOS/.xcodebuildmcp/config.yaml @@ -3,7 +3,7 @@ enabledWorkflows: ['simulator', 'ui-automation', 'debugging', 'logging'] sessionDefaults: projectPath: ./MCPTest.xcodeproj scheme: MCPTest - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 + simulatorName: iPhone 17 Pro useLatestOS: true platform: iOS Simulator bundleId: io.sentry.MCPTest diff --git a/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml b/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml index b84c72162..44458a91a 100644 --- a/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml +++ b/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml @@ -11,12 +11,11 @@ sessionDefaults: workspacePath: CalculatorApp.xcworkspace scheme: CalculatorApp configuration: Debug - simulatorId: A2C64636-37E9-4B68-B872-E7F0A82A5670 simulatorPlatform: iOS Simulator useLatestOS: true arch: arm64 suppressWarnings: false - derivedDataPath: ./iOS_Calculator/.derivedData + derivedDataPath: ./.build/DerivedData preferXcodebuild: true bundleId: io.sentry.calculatorapp simulatorName: iPhone 17 Pro diff --git a/manifests/tools/batch.yaml b/manifests/tools/batch.yaml new file mode 100644 index 000000000..699eab740 --- /dev/null +++ b/manifests/tools/batch.yaml @@ -0,0 +1,17 @@ +id: batch +module: mcp/tools/ui-automation/batch +names: + mcp: batch + cli: batch +description: >- + UI automation batch for multiple same-screen elementRef taps, especially visible settings switches that can be toggled without intermediate assertions. The input key is steps, never commands, and each step is an object such as {"action":"tap","elementRef":"e1"}; do not pass raw command strings. Use refs from the latest snapshot_ui or wait_for_ui output, for example {"steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}. Omit preDelay/postDelay for switch elementRefs; switches execute as touch down/up steps and reject delays. +outputSchema: + schema: xcodebuildmcp.output.ui-action-result + version: '2' +routing: + stateful: true +annotations: + title: Batch UI Actions + readOnlyHint: true + destructiveHint: false + openWorldHint: false diff --git a/manifests/tools/drag.yaml b/manifests/tools/drag.yaml new file mode 100644 index 000000000..3ed9078dc --- /dev/null +++ b/manifests/tools/drag.yaml @@ -0,0 +1,17 @@ +id: drag +module: mcp/tools/ui-automation/drag +names: + mcp: drag + cli: drag +description: >- + Drag from a visible runtime elementRef in a direction, then return a refreshed runtime UI snapshot. Use this for exposed sheet grabbers or real scroll/list content refs when nextSteps suggests dragging; do not use raw screen coordinates. +outputSchema: + schema: xcodebuildmcp.output.ui-action-result + version: "2" +routing: + stateful: true +annotations: + title: Drag + readOnlyHint: true + destructiveHint: false + openWorldHint: false diff --git a/manifests/tools/key_press.yaml b/manifests/tools/key_press.yaml index 1d2d60a7f..56336773e 100644 --- a/manifests/tools/key_press.yaml +++ b/manifests/tools/key_press.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/key_press names: mcp: key_press cli: key-press -description: Press key by keycode. +description: Press one hardware key using an AXe HID key code. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "2" + version: '2' +routing: + stateful: true annotations: title: Key Press readOnlyHint: true diff --git a/manifests/tools/key_sequence.yaml b/manifests/tools/key_sequence.yaml index d313f71a0..8550b6396 100644 --- a/manifests/tools/key_sequence.yaml +++ b/manifests/tools/key_sequence.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/key_sequence names: mcp: key_sequence cli: key-sequence -description: Press a sequence of keys by their keycodes. +description: Press hardware keys using AXe HID key codes. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "2" + version: '2' +routing: + stateful: true annotations: title: Key Sequence readOnlyHint: true diff --git a/manifests/tools/long_press.yaml b/manifests/tools/long_press.yaml index 0e39ab876..7aee1112e 100644 --- a/manifests/tools/long_press.yaml +++ b/manifests/tools/long_press.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/long_press names: mcp: long_press cli: long-press -description: Long press at coords. +description: Long press a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Long Press readOnlyHint: true diff --git a/manifests/tools/snapshot_ui.yaml b/manifests/tools/snapshot_ui.yaml index 81d659216..c0746db8c 100644 --- a/manifests/tools/snapshot_ui.yaml +++ b/manifests/tools/snapshot_ui.yaml @@ -3,28 +3,12 @@ module: mcp/tools/ui-automation/snapshot_ui names: mcp: snapshot_ui cli: snapshot-ui -description: Print view hierarchy with precise view coordinates (x, y, width, height) for visible elements. +description: Capture a semantic rs/1 runtime UI snapshot with elementRef targets. Observe once, use tap for one target or batch for multiple same-screen targets, and refresh after navigation, scrolling, sheet changes, or obvious layout changes. outputSchema: schema: xcodebuildmcp.output.capture-result - version: "2" -nextSteps: - - label: Refresh after layout changes - toolId: snapshot_ui - params: - simulatorId: SIMULATOR_UUID - when: success - - label: Tap on element - toolId: tap - params: - simulatorId: SIMULATOR_UUID - x: 0 - y: 0 - when: success - - label: Take screenshot for verification - toolId: screenshot - params: - simulatorId: SIMULATOR_UUID - when: success + version: '2' +routing: + stateful: true annotations: title: Snapshot UI readOnlyHint: true diff --git a/manifests/tools/swipe.yaml b/manifests/tools/swipe.yaml index e365373ab..b916d65c4 100644 --- a/manifests/tools/swipe.yaml +++ b/manifests/tools/swipe.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/swipe names: mcp: swipe cli: swipe -description: Swipe between points. +description: Swipe within a scrollable UI element using a visible element reference from the current UI. Optional distance is a normalized stroke fraction greater than 0 and up to 1. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Swipe readOnlyHint: true diff --git a/manifests/tools/tap.yaml b/manifests/tools/tap.yaml index f2c3ba405..9587e704c 100644 --- a/manifests/tools/tap.yaml +++ b/manifests/tools/tap.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/tap names: mcp: tap cli: tap -description: Tap UI element by accessibility id/label (recommended) or coordinates as fallback. +description: Tap one elementRef from the latest snapshot_ui or wait_for_ui output. For multiple same-screen taps or visible switch toggles with no intermediate assertion, prefer batch. Other same-screen refs may remain usable after success; refresh after navigation, scrolling, sheet changes, or obvious layout changes. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "2" + version: '2' +routing: + stateful: true annotations: title: Tap readOnlyHint: true diff --git a/manifests/tools/touch.yaml b/manifests/tools/touch.yaml index 4faf99470..3849c5bc8 100644 --- a/manifests/tools/touch.yaml +++ b/manifests/tools/touch.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/touch names: mcp: touch cli: touch -description: Touch down/up at coords. +description: Send touch down/up events to a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Touch readOnlyHint: true diff --git a/manifests/tools/type_text.yaml b/manifests/tools/type_text.yaml index de6a08a9a..c2cd64ec7 100644 --- a/manifests/tools/type_text.yaml +++ b/manifests/tools/type_text.yaml @@ -3,10 +3,13 @@ module: mcp/tools/ui-automation/type_text names: mcp: type_text cli: type-text -description: Type text. +description: >- + Type text into a UI element by elementRef from a current rs/1 runtime snapshot, optionally replacing existing field contents. elementRef is required; do not call with only text. Example input: {"elementRef":"e8","text":"London","replaceExisting":true}. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Type Text readOnlyHint: true diff --git a/manifests/tools/wait_for_ui.yaml b/manifests/tools/wait_for_ui.yaml new file mode 100644 index 000000000..bac213089 --- /dev/null +++ b/manifests/tools/wait_for_ui.yaml @@ -0,0 +1,25 @@ +id: wait_for_ui +module: mcp/tools/ui-automation/wait_for_ui +names: + mcp: wait_for_ui + cli: wait-for-ui +description: Poll rs/1 runtime UI snapshots until a selector-based UI predicate, selector-free textContains/gone text predicate, or selector-free settled predicate is satisfied, then record the latest snapshot. Prefer this after navigation or layout changes. Select with elementRef, identifier, label, role, or value when a selector is needed. +outputSchema: + schema: xcodebuildmcp.output.capture-result + version: '2' +routing: + stateful: true +annotations: + title: Wait for UI + readOnlyHint: true + destructiveHint: false + openWorldHint: false +nextSteps: + - label: Capture a fresh runtime UI snapshot + toolId: snapshot_ui + priority: 1 + when: success + - label: Wait until the UI is settled + toolId: wait_for_ui + priority: 2 + when: success diff --git a/manifests/workflows/ui-automation.yaml b/manifests/workflows/ui-automation.yaml index c11e5dd72..0dcde9f50 100644 --- a/manifests/workflows/ui-automation.yaml +++ b/manifests/workflows/ui-automation.yaml @@ -3,14 +3,17 @@ title: UI Automation description: UI automation and accessibility testing tools for iOS simulators. Perform gestures, interactions, screenshots, and UI analysis for automated testing workflows. targetPlatforms: [iOS] tools: + - snapshot_ui + - wait_for_ui + - batch - tap - touch - long_press - swipe + - drag - gesture - button - key_press - key_sequence - type_text - screenshot - - snapshot_ui diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json index 7ff1cd8b4..8d25e8028 100644 --- a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -13,132 +13,243 @@ "type": "object", "additionalProperties": false, "properties": { - "x": { - "type": "number" - }, - "y": { - "type": "number" - }, - "width": { - "type": "number" + "x": { "type": "number" }, + "y": { "type": "number" }, + "width": { "type": "number" }, + "height": { "type": "number" } + }, + "required": ["x", "y", "width", "height"] + }, + "uiHierarchyNode": { + "type": "object" + }, + "uiHierarchyCapture": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "ui-hierarchy" }, + "uiHierarchy": { + "type": "array", + "items": { "$ref": "#/$defs/uiHierarchyNode" } + } + }, + "required": ["type", "uiHierarchy"] + }, + "runtimeActionName": { + "enum": ["tap", "typeText", "longPress", "touch", "swipeWithin"] + }, + "runtimeElementRole": { + "enum": [ + "application", + "button", + "cell", + "image", + "keyboard-key", + "list", + "menu", + "other", + "scroll-view", + "slider", + "switch", + "tab", + "text", + "text-field", + "window" + ] + }, + "runtimeElementState": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { "type": "boolean" }, + "focused": { "type": "boolean" }, + "selected": { "type": "boolean" }, + "visible": { "type": "boolean" } + } + }, + "runtimeElement": { + "type": "object", + "additionalProperties": false, + "properties": { + "ref": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "role": { "$ref": "#/$defs/runtimeElementRole" }, + "label": { "type": "string" }, + "value": { "type": "string" }, + "identifier": { "type": "string" }, + "frame": { "$ref": "#/$defs/frame" }, + "state": { "$ref": "#/$defs/runtimeElementState" }, + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionName" } + } + }, + "required": ["ref", "frame", "actions"] + }, + "runtimeActionHint": { + "type": "object", + "additionalProperties": false, + "properties": { + "action": { "$ref": "#/$defs/runtimeActionName" }, + "elementRef": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "label": { "type": "string" } + }, + "required": ["action", "elementRef"] + }, + "runtimeSnapshot": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot" }, + "protocol": { "const": "rs/1" }, + "simulatorId": { "type": "string" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "capturedAtMs": { "type": "integer", "minimum": 0 }, + "expiresAtMs": { "type": "integer", "minimum": 0 }, + "elements": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeElement" } }, - "height": { - "type": "number" + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionHint" } } }, "required": [ - "x", - "y", - "width", - "height" + "type", + "protocol", + "simulatorId", + "screenHash", + "seq", + "capturedAtMs", + "expiresAtMs", + "elements", + "actions" ] }, - "accessibilityNode": { + "compactRuntimeSnapshot": { "type": "object", - "additionalProperties": true, + "additionalProperties": false, "properties": { - "frame": { - "$ref": "#/$defs/frame" - }, - "type": { - "type": "string" + "type": { "const": "runtime-snapshot" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "count": { "type": "integer", "minimum": 0 }, + "targets": { + "type": "array", + "items": { "type": "string" } }, - "role": { - "type": "string" + "scroll": { + "type": "array", + "items": { "type": "string" } }, - "children": { + "text": { "type": "array", - "items": { - "$ref": "#/$defs/accessibilityNode" - } + "items": { "type": "string" } }, - "enabled": { - "type": "boolean" + "evidence": { + "description": "Non-actionable semantic evidence rows in role|label|value|identifier format. These rows intentionally omit element refs.", + "type": "array", + "items": { "type": "string" } }, - "custom_actions": { + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] + }, + "runtimeSnapshotUnchanged": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot-unchanged" }, + "protocol": { "const": "rs/1" }, + "simulatorId": { "type": "string" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "protocol", "simulatorId", "screenHash", "seq"] + }, + "compactRuntimeSnapshotUnchanged": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot-unchanged" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "unchanged": { "const": true }, + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "unchanged", "udid"] + }, + "videoRecordingCapture": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "video-recording" }, + "state": { "enum": ["started", "stopped"] }, + "fps": { "type": "integer", "minimum": 1 }, + "outputFile": { "type": "string" }, + "sessionId": { "type": "string" } + }, + "required": ["type", "state"] + }, + "waitPredicate": { + "enum": ["exists", "gone", "enabled", "focused", "textContains", "settled"] + }, + "waitMatch": { + "type": "object", + "additionalProperties": false, + "properties": { + "predicate": { "$ref": "#/$defs/waitPredicate" }, + "matches": { "type": "array", "items": { - "type": "string" + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] } - }, - "AXFrame": { - "type": "string" - }, - "AXUniqueId": { - "type": [ - "string", - "null" - ] - }, - "role_description": { - "type": [ - "string", - "null" - ] - }, - "AXLabel": { - "type": [ - "string", - "null" - ] - }, - "content_required": { - "type": "boolean" - }, - "title": { - "type": [ - "string", - "null" - ] - }, - "help": { - "type": [ - "string", - "null" - ] - }, - "AXValue": { - "type": [ - "string", - "null" + } + }, + "required": ["predicate", "matches"] + }, + "recoverableUiError": { + "type": "object", + "additionalProperties": false, + "properties": { + "code": { + "enum": [ + "SNAPSHOT_MISSING", + "SNAPSHOT_EXPIRED", + "SNAPSHOT_PARSE_FAILED", + "SNAPSHOT_CAPTURE_FAILED", + "ELEMENT_REF_NOT_FOUND", + "TARGET_NOT_FOUND", + "TARGET_AMBIGUOUS", + "TARGET_NOT_ACTIONABLE", + "WAIT_TIMEOUT", + "UI_STATE_CHANGED", + "ACTION_FAILED" ] }, - "subrole": { - "type": [ - "string", - "null" - ] + "message": { "type": "string" }, + "recoveryHint": { "type": "string" }, + "elementRef": { "type": "string" }, + "candidates": { + "type": "array", + "items": { + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] + } }, - "pid": { - "type": "number" - } + "snapshotAgeMs": { "type": "integer", "minimum": 0 }, + "timeoutMs": { "type": "integer", "minimum": 0 } }, - "required": [ - "frame", - "type", - "role", - "children", - "enabled", - "custom_actions" - ] + "required": ["code", "message", "recoveryHint"] } }, "properties": { - "schema": { - "const": "xcodebuildmcp.output.capture-result" - }, - "schemaVersion": { - "const": "2" - }, - "didError": { - "type": "boolean" - }, - "error": { - "type": [ - "string", - "null" - ] - }, + "schema": { "const": "xcodebuildmcp.output.capture-result" }, + "schemaVersion": { "const": "2" }, + "didError": { "type": "boolean" }, + "error": { "type": ["string", "null"] }, "data": { "type": "object", "additionalProperties": false, @@ -150,16 +261,10 @@ "type": "object", "additionalProperties": false, "properties": { - "simulatorId": { - "type": "string" - }, - "screenshotPath": { - "type": "string" - } + "simulatorId": { "type": "string" }, + "screenshotPath": { "type": "string" } }, - "required": [ - "simulatorId" - ] + "required": ["simulatorId"] }, "capture": { "oneOf": [ @@ -167,92 +272,31 @@ "type": "object", "additionalProperties": false, "properties": { - "format": { - "type": "string" - }, - "width": { - "type": "integer", - "minimum": 0 - }, - "height": { - "type": "integer", - "minimum": 0 - } - }, - "required": [ - "format", - "width", - "height" - ] - }, - { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "const": "ui-hierarchy" - }, - "uiHierarchy": { - "type": "array", - "items": { - "$ref": "#/$defs/accessibilityNode" - } - } + "format": { "type": "string" }, + "width": { "type": "integer", "minimum": 0 }, + "height": { "type": "integer", "minimum": 0 } }, - "required": [ - "type", - "uiHierarchy" - ] + "required": ["format", "width", "height"] }, - { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "const": "video-recording" - }, - "state": { - "enum": [ - "started", - "stopped" - ] - }, - "fps": { - "type": "integer", - "minimum": 1 - }, - "outputFile": { - "type": "string" - }, - "sessionId": { - "type": "string" - } - }, - "required": [ - "type", - "state" - ] - } + { "$ref": "#/$defs/uiHierarchyCapture" }, + { "$ref": "#/$defs/runtimeSnapshot" }, + { "$ref": "#/$defs/compactRuntimeSnapshot" }, + { "$ref": "#/$defs/videoRecordingCapture" }, + { "$ref": "#/$defs/runtimeSnapshotUnchanged" }, + { "$ref": "#/$defs/compactRuntimeSnapshotUnchanged" } ] }, "diagnostics": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/basicDiagnostics" - } + }, + "uiError": { "$ref": "#/$defs/recoverableUiError" }, + "waitMatch": { "$ref": "#/$defs/waitMatch" } }, - "required": [ - "summary", - "artifacts" - ] + "required": ["summary", "artifacts"] }, "nextSteps": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/nextSteps" } }, - "required": [ - "schema", - "schemaVersion", - "didError", - "error", - "data" - ] + "required": ["schema", "schemaVersion", "didError", "error", "data"] } diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json index 78625e55c..1a6ad06f5 100644 --- a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -8,22 +8,149 @@ "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/errorConsistency" } ], - "properties": { - "schema": { - "const": "xcodebuildmcp.output.ui-action-result" - }, - "schemaVersion": { - "const": "2" + "$defs": { + "frame": { + "type": "object", + "additionalProperties": false, + "properties": { + "x": { "type": "number" }, + "y": { "type": "number" }, + "width": { "type": "number" }, + "height": { "type": "number" } + }, + "required": ["x", "y", "width", "height"] }, - "didError": { - "type": "boolean" + "runtimeActionName": { + "enum": ["tap", "typeText", "longPress", "touch", "swipeWithin"] }, - "error": { - "type": [ - "string", - "null" + "runtimeElementRole": { + "enum": [ + "application", + "button", + "cell", + "image", + "keyboard-key", + "list", + "menu", + "other", + "scroll-view", + "slider", + "switch", + "tab", + "text", + "text-field", + "window" ] }, + "runtimeElementState": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { "type": "boolean" }, + "focused": { "type": "boolean" }, + "selected": { "type": "boolean" }, + "visible": { "type": "boolean" } + } + }, + "runtimeElement": { + "type": "object", + "additionalProperties": false, + "properties": { + "ref": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "role": { "$ref": "#/$defs/runtimeElementRole" }, + "label": { "type": "string" }, + "value": { "type": "string" }, + "identifier": { "type": "string" }, + "frame": { "$ref": "#/$defs/frame" }, + "state": { "$ref": "#/$defs/runtimeElementState" }, + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionName" } + } + }, + "required": ["ref", "frame", "actions"] + }, + "compactRuntimeSnapshot": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "count": { "type": "integer", "minimum": 0 }, + "targets": { + "type": "array", + "items": { "type": "string" } + }, + "scroll": { + "type": "array", + "items": { "type": "string" } + }, + "text": { + "type": "array", + "items": { "type": "string" } + }, + "evidence": { + "description": "Non-actionable semantic evidence rows in role|label|value|identifier format. These rows intentionally omit element refs.", + "type": "array", + "items": { "type": "string" } + }, + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] + }, + "point": { + "type": "object", + "additionalProperties": false, + "properties": { + "x": { "type": "number" }, + "y": { "type": "number" } + }, + "required": ["x", "y"] + }, + "direction": { + "enum": ["up", "down", "left", "right"] + }, + "recoverableUiError": { + "type": "object", + "additionalProperties": false, + "properties": { + "code": { + "enum": [ + "SNAPSHOT_MISSING", + "SNAPSHOT_EXPIRED", + "SNAPSHOT_PARSE_FAILED", + "SNAPSHOT_CAPTURE_FAILED", + "ELEMENT_REF_NOT_FOUND", + "TARGET_NOT_FOUND", + "TARGET_AMBIGUOUS", + "TARGET_NOT_ACTIONABLE", + "WAIT_TIMEOUT", + "UI_STATE_CHANGED", + "ACTION_FAILED" + ] + }, + "message": { "type": "string" }, + "recoveryHint": { "type": "string" }, + "elementRef": { "type": "string" }, + "candidates": { + "type": "array", + "items": { + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] + } + }, + "snapshotAgeMs": { "type": "integer", "minimum": 0 }, + "timeoutMs": { "type": "integer", "minimum": 0 } + }, + "required": ["code", "message", "recoveryHint"] + } + }, + "properties": { + "schema": { "const": "xcodebuildmcp.output.ui-action-result" }, + "schemaVersion": { "const": "2" }, + "didError": { "type": "boolean" }, + "error": { "type": ["string", "null"] }, "data": { "type": "object", "additionalProperties": false, @@ -37,174 +164,189 @@ "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "tap" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - }, - "id": { - "type": "string" - }, - "label": { - "type": "string" - } + "type": { "const": "tap" }, + "elementRef": { "type": "string" }, + "x": { "type": "number" }, + "y": { "type": "number" } }, - "required": [ - "type" - ] + "required": ["type", "elementRef"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "swipe" - }, - "from": { - "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/point" - }, - "to": { - "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/point" - }, - "durationSeconds": { - "type": "number", - "minimum": 0 - } + "type": { "const": "tap" }, + "x": { "type": "number" }, + "y": { "type": "number" } }, - "required": [ - "type" - ] + "required": ["type", "x", "y"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "touch" - }, - "event": { - "type": "string" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - } + "type": { "const": "swipe" }, + "withinElementRef": { "type": "string" }, + "direction": { "$ref": "#/$defs/direction" }, + "from": { "$ref": "#/$defs/point" }, + "to": { "$ref": "#/$defs/point" }, + "durationSeconds": { "type": "number", "minimum": 0 } }, - "required": [ - "type" - ] + "required": ["type", "withinElementRef", "direction"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "long-press" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - }, - "durationMs": { - "type": "integer", - "minimum": 0 - } + "type": { "const": "swipe" }, + "from": { "$ref": "#/$defs/point" }, + "to": { "$ref": "#/$defs/point" }, + "durationSeconds": { "type": "number", "minimum": 0 } }, - "required": [ - "type", - "x", - "y", - "durationMs" - ] + "required": ["type", "from", "to"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "button" - }, - "button": { - "type": "string" - } + "type": { "const": "swipe" } }, - "required": [ - "type", - "button" - ] + "required": ["type"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "gesture" - }, - "gesture": { - "type": "string" - } + "type": { "const": "drag" }, + "elementRef": { "type": "string" }, + "direction": { "$ref": "#/$defs/direction" }, + "from": { "$ref": "#/$defs/point" }, + "to": { "$ref": "#/$defs/point" }, + "durationSeconds": { "type": "number", "minimum": 0 }, + "steps": { "type": "integer", "minimum": 1 } }, - "required": [ - "type", - "gesture" - ] + "required": ["type", "elementRef", "direction"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "type-text" - } + "type": { "const": "touch" }, + "elementRef": { "type": "string" }, + "event": { "type": "string" }, + "x": { "type": "number" }, + "y": { "type": "number" } }, - "required": [ - "type" - ] + "required": ["type", "elementRef"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "key-press" - }, - "keyCode": { - "type": "integer", - "minimum": 0 - } + "type": { "const": "touch" }, + "event": { "type": "string" }, + "x": { "type": "number" }, + "y": { "type": "number" } + }, + "required": ["type", "x", "y"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "touch" } + }, + "required": ["type"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "long-press" }, + "elementRef": { "type": "string" }, + "durationMs": { "type": "integer", "minimum": 0 }, + "x": { "type": "number" }, + "y": { "type": "number" } + }, + "required": ["type", "elementRef", "durationMs"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "long-press" }, + "x": { "type": "number" }, + "y": { "type": "number" }, + "durationMs": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "x", "y", "durationMs"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "button" }, + "button": { "type": "string" } + }, + "required": ["type", "button"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "gesture" }, + "gesture": { "type": "string" } + }, + "required": ["type", "gesture"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "type-text" }, + "elementRef": { "type": "string" }, + "textLength": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "elementRef"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "type-text" }, + "textLength": { "type": "integer", "minimum": 0 } }, - "required": [ - "type", - "keyCode" - ] + "required": ["type"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "key-sequence" - }, + "type": { "const": "key-press" }, + "keyCode": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "keyCode"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "key-sequence" }, "keyCodes": { "type": "array", - "items": { - "type": "integer", - "minimum": 0 - } + "items": { "type": "integer", "minimum": 0 } } }, - "required": [ - "type", - "keyCodes" - ] + "required": ["type", "keyCodes"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "batch" }, + "stepCount": { "type": "integer", "minimum": 1 } + }, + "required": ["type", "stepCount"] } ] }, @@ -212,33 +354,21 @@ "type": "object", "additionalProperties": false, "properties": { - "simulatorId": { - "type": "string" - } + "simulatorId": { "type": "string" } }, - "required": [ - "simulatorId" - ] + "required": ["simulatorId"] }, "diagnostics": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/basicDiagnostics" - } + }, + "capture": { "$ref": "#/$defs/compactRuntimeSnapshot" }, + "uiError": { "$ref": "#/$defs/recoverableUiError" } }, - "required": [ - "summary", - "action", - "artifacts" - ] + "required": ["summary", "action", "artifacts"] }, "nextSteps": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/nextSteps" } }, - "required": [ - "schema", - "schemaVersion", - "didError", - "error", - "data" - ] + "required": ["schema", "schemaVersion", "didError", "error", "data"] } diff --git a/scripts/bundle-axe.sh b/scripts/bundle-axe.sh index eb15c664b..c692c32a8 100755 --- a/scripts/bundle-axe.sh +++ b/scripts/bundle-axe.sh @@ -94,10 +94,17 @@ else echo "đŸ“Ĩ Downloading latest AXe release from GitHub..." - AXE_RELEASE_BASE_URL="https://github.com/cameroncooke/AXe/releases/download/v${PINNED_AXE_VERSION}" - AXE_HOMEBREW_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-homebrew-v${PINNED_AXE_VERSION}.tar.gz" - AXE_UNIVERSAL_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-v${PINNED_AXE_VERSION}-universal.tar.gz" - AXE_LEGACY_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-v${PINNED_AXE_VERSION}.tar.gz" + if [[ "$PINNED_AXE_VERSION" == staging-* ]]; then + AXE_RELEASE_TAG="$PINNED_AXE_VERSION" + AXE_ASSET_VERSION="$PINNED_AXE_VERSION" + else + AXE_RELEASE_TAG="v${PINNED_AXE_VERSION}" + AXE_ASSET_VERSION="v${PINNED_AXE_VERSION}" + fi + AXE_RELEASE_BASE_URL="https://github.com/cameroncooke/AXe/releases/download/${AXE_RELEASE_TAG}" + AXE_HOMEBREW_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-homebrew-${AXE_ASSET_VERSION}.tar.gz" + AXE_UNIVERSAL_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-${AXE_ASSET_VERSION}-universal.tar.gz" + AXE_LEGACY_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-${AXE_ASSET_VERSION}.tar.gz" # Create temp directory mkdir -p "$AXE_TEMP_DIR" @@ -258,7 +265,8 @@ if [ "$OS_NAME" = "Darwin" ]; then ad_hoc_sign_bundled_axe_assets fi - if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ]; then + if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ] || [ "$AXE_ARCHIVE_FLAVOR" = "local-signed" ]; then + ad_hoc_sign_bundled_axe_assets echo "â„šī¸ ${AXE_ARCHIVE_FLAVOR} AXe archive detected; using ad-hoc signatures for local runtime compatibility" else echo "🔏 Verifying AXe signatures..." @@ -284,7 +292,7 @@ if [ "$OS_NAME" = "Darwin" ]; then done < <(find "$BUNDLED_DIR/Frameworks" -name "*.framework" -type d) fi - if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ]; then + if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ] || [ "$AXE_ARCHIVE_FLAVOR" = "local-signed" ]; then echo "â„šī¸ Skipping Gatekeeper assessment for ${AXE_ARCHIVE_FLAVOR} AXe archive" else echo "đŸ›Ąī¸ Assessing AXe with Gatekeeper..." @@ -316,6 +324,27 @@ else echo "âš ī¸ Skipping AXe binary verification on non-macOS (detected $OS_NAME)" AXE_VERSION="unknown (verification skipped)" fi +validate_axe_version_metadata() { + if [ "$AXE_VERSION" = "unknown (verification skipped)" ]; then + return + fi + + if [[ "$AXE_VERSION" == *dirty* ]] && [ "${AXE_ALLOW_DIRTY_LOCAL:-0}" != "1" ]; then + echo "❌ Bundled AXe reports a dirty version: $AXE_VERSION" + echo " Rebuild AXe from a clean checkout or set AXE_ALLOW_DIRTY_LOCAL=1 for explicit local testing." + exit 1 + fi + + if [ "$USE_LOCAL_AXE" = false ]; then + if [ "$AXE_VERSION" != "$PINNED_AXE_VERSION" ] && [ "$AXE_VERSION" != "v$PINNED_AXE_VERSION" ]; then + echo "❌ Bundled AXe version '$AXE_VERSION' does not match pinned version '$PINNED_AXE_VERSION'" + exit 1 + fi + fi +} + +validate_axe_version_metadata + echo "📋 AXe version: $AXE_VERSION" # Clean up temp directory if it was used diff --git a/src/cli/__tests__/register-tool-commands.test.ts b/src/cli/__tests__/register-tool-commands.test.ts index bd604ddd7..1b78378ef 100644 --- a/src/cli/__tests__/register-tool-commands.test.ts +++ b/src/cli/__tests__/register-tool-commands.test.ts @@ -6,6 +6,7 @@ import type { ToolHandlerContext } from '../../rendering/types.ts'; import { DefaultToolInvoker } from '../../runtime/tool-invoker.ts'; import type { ResolvedRuntimeConfig } from '../../utils/config-store.ts'; import { registerToolCommands } from '../register-tool-commands.ts'; +import * as simulatorResolver from '../../utils/simulator-resolver.ts'; function createTool(overrides: Partial = {}): ToolDefinition { return { @@ -260,6 +261,90 @@ describe('registerToolCommands', () => { stdoutWrite.mockRestore(); }); + it('resolves configured simulatorName for CLI tools that require simulatorId', async () => { + const resolveSimulatorNameToId = vi + .spyOn(simulatorResolver, 'resolveSimulatorNameToId') + .mockResolvedValue({ + success: true, + simulatorId: 'SIM-RESOLVED', + simulatorName: 'iPhone 17 Pro', + }); + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + simulatorId: z.string().describe('Simulator ID'), + }, + mcpSchema: { + simulatorId: z.string().describe('Simulator ID'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: { + simulatorName: 'iPhone 17 Pro', + }, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect(app.parseAsync(['simulator', 'run-tool'])).resolves.toBeDefined(); + + expect(resolveSimulatorNameToId).toHaveBeenCalledWith(expect.any(Function), 'iPhone 17 Pro'); + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + simulatorId: 'SIM-RESOLVED', + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + + it('does not synthesize simulatorId for tools that already accept simulatorName', async () => { + const resolveSimulatorNameToId = vi.spyOn(simulatorResolver, 'resolveSimulatorNameToId'); + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + simulatorId: z.string().optional().describe('Simulator ID'), + simulatorName: z.string().optional().describe('Simulator name'), + }, + mcpSchema: { + simulatorId: z.string().optional().describe('Simulator ID'), + simulatorName: z.string().optional().describe('Simulator name'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: { + simulatorName: 'iPhone 17 Pro', + }, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect(app.parseAsync(['simulator', 'run-tool'])).resolves.toBeDefined(); + + expect(resolveSimulatorNameToId).not.toHaveBeenCalled(); + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + simulatorName: 'iPhone 17 Pro', + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + it('keeps the normal missing-argument error when no hydrated default exists', async () => { const consoleError = vi.spyOn(console, 'error').mockImplementation(() => {}); @@ -458,6 +543,97 @@ describe('registerToolCommands', () => { stdoutWrite.mockRestore(); }); + it('parses comma-separated numeric array args', async () => { + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + workspacePath: z.string().describe('Workspace path'), + keyCodes: z.array(z.number()).describe('Key codes'), + }, + mcpSchema: { + workspacePath: z.string().describe('Workspace path'), + keyCodes: z.array(z.number()).describe('Key codes'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: undefined, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect( + app.parseAsync([ + 'simulator', + 'run-tool', + '--workspace-path', + 'App.xcworkspace', + '--key-codes', + '23,18,14', + ]), + ).resolves.toBeDefined(); + + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + workspacePath: 'App.xcworkspace', + keyCodes: [23, 18, 14], + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + + it('honors --style minimal by hiding next steps', async () => { + vi.spyOn(DefaultToolInvoker.prototype, 'invokeDirect').mockImplementation( + async (_tool, _args, opts) => { + opts.renderSession?.setStructuredOutput?.({ + schema: 'xcodebuildmcp.output.app-path', + schemaVersion: '1', + result: { + kind: 'app-path', + didError: false, + error: null, + artifacts: { appPath: '/tmp/MyApp.app' }, + }, + }); + opts.renderSession?.setNextSteps?.( + [ + { + label: 'Run again', + tool: 'run_tool', + workflow: 'simulator', + cliTool: 'run-tool', + }, + ], + 'cli', + ); + }, + ); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool(); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--style', 'minimal']), + ).resolves.toBeDefined(); + + const output = stdoutChunks.join(''); + expect(output).toContain('Get App Path'); + expect(output).not.toContain('Next steps:'); + expect(output).not.toContain('Run again'); + }); + it('applies --file-path-render-style to text output without forwarding it to tool args', async () => { vi.spyOn(DefaultToolInvoker.prototype, 'invokeDirect').mockImplementation( async (tool, args, opts) => { @@ -683,6 +859,376 @@ describe('registerToolCommands', () => { ); }); + it('writes compact rs/1 capture JSON for runtime snapshots by default', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'San Francisco', + value: 'selected', + identifier: 'weather.locationButton', + frame: { x: 12, y: 81.33, width: 178, height: 33.33 }, + actions: ['tap', 'longPress', 'touch'], + }, + { + ref: 'e3', + role: 'button', + label: 'Sheet Grabber', + value: 'Half screen', + frame: { x: 150, y: 10, width: 80, height: 20 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'San Francisco' }, + { action: 'tap', elementRef: 'e3', label: 'Sheet Grabber' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + expect(stdoutChunks.join('')).toBe( + `${JSON.stringify( + { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + rs: '1', + screenHash: 'screen-hash', + seq: 1, + count: 3, + targets: ['e2|tap|button|San Francisco|selected|weather.locationButton'], + scroll: ['e1|swipe|application|Weather||'], + udid: 'SIMULATOR-1', + }, + }, + }, + null, + 2, + )}\n`, + ); + }); + + it('orders destructive controls after useful targets in compact JSON', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Remove', + identifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e2', + role: 'button', + label: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 140, width: 300, height: 80 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Remove' }, + { action: 'tap', elementRef: 'e2', label: 'Portland, 1:24 PM ¡ Light Rain' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { capture: { targets: string[] } }; + }; + expect(output.data.capture.targets).toEqual([ + 'e2|tap|button|Portland, 1:24 PM ¡ Light Rain||', + 'e1|tap|button|Remove||trash', + ]); + }); + + it('orders unselected segmented controls before already-selected controls in compact JSON', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e9', + role: 'button', + label: '°F', + value: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: '°C', + value: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e9', label: '°F' }, + { action: 'tap', elementRef: 'e10', label: '°C' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { capture: { targets: string[] } }; + }; + expect(output.data.capture.targets).toEqual([ + 'e10|tap|button|°C|not selected|', + 'e9|tap|button|°F|selected|', + ]); + }); + + it('writes compact wait matches with no primary action for static text', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'textContains', + matches: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'longPress', elementRef: 'e11', label: 'No matches' }, + { action: 'touch', elementRef: 'e11', label: 'No matches' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { waitMatch: { matches: string[] } }; + }; + expect(output.data.waitMatch.matches).toEqual(['e11|none|text|No matches||']); + }); + + it('writes the full runtime snapshot envelope for verbose JSON output', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + ], + actions: [{ action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json', '--verbose']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { schema: string; data: unknown }; + expect(output.schema).toBe('xcodebuildmcp.output.capture-result'); + expect(output.data).toEqual( + expect.objectContaining({ + capture: expect.objectContaining({ + type: 'runtime-snapshot', + elements: [expect.objectContaining({ ref: 'e1', actions: ['swipeWithin'] })], + }), + }), + ); + }); + it('writes one NDJSON line per domain fragment for jsonl output and omits the final envelope', async () => { mockInvokeDirectThroughHandler(); const stdoutChunks: string[] = []; diff --git a/src/cli/__tests__/schema-to-yargs.test.ts b/src/cli/__tests__/schema-to-yargs.test.ts index 014868d80..371191415 100644 --- a/src/cli/__tests__/schema-to-yargs.test.ts +++ b/src/cli/__tests__/schema-to-yargs.test.ts @@ -25,4 +25,18 @@ describe('schemaToYargsOptions', () => { expect(options.get('workspace-path')?.demandOption).toBe(false); }); + + it('coerces comma-separated numeric array flags', () => { + const options = schemaToYargsOptions({ + keyCodes: z.array(z.number()), + }); + + const coerce = options.get('key-codes')?.coerce; + + expect(typeof coerce).toBe('function'); + expect(coerce?.('23,18,14')).toEqual([23, 18, 14]); + expect(coerce?.('23, 18, 14')).toEqual([23, 18, 14]); + expect(coerce?.(['23', '18,14'])).toEqual([23, 18, 14]); + expect(coerce?.('23,')).toEqual([23, Number.NaN]); + }); }); diff --git a/src/cli/register-tool-commands.ts b/src/cli/register-tool-commands.ts index da4f66c7d..62aeaf273 100644 --- a/src/cli/register-tool-commands.ts +++ b/src/cli/register-tool-commands.ts @@ -17,6 +17,7 @@ import { getCliSessionDefaultsForTool, isKnownCliSessionDefaultsProfile, mergeCliSessionDefaults, + resolveCliSessionDefaults, } from './session-defaults.ts'; import { createRenderSession } from '../rendering/render.ts'; import { toStructuredEnvelope } from '../utils/structured-output-envelope.ts'; @@ -26,6 +27,8 @@ import { STRUCTURED_ERROR_SCHEMA_VERSION, } from '../utils/structured-error.ts'; import { toCliJsonlEvent } from './jsonl-event.ts'; +import { resolveSimulatorNameToId } from '../utils/simulator-resolver.ts'; +import { getDefaultCommandExecutor } from '../utils/execution/index.ts'; export interface RegisterToolCommandsOptions { workspaceRoot: string; @@ -96,9 +99,10 @@ function createBufferedHandlerContext( function writeJsonOutput( handlerContext: ToolHandlerContext, session: ReturnType, - outputStyle: OutputStyle, + options: { outputStyle: OutputStyle; verbose?: boolean }, ): boolean { const { structuredOutput } = handlerContext; + const suppressedTargetRefs = structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs; const envelope = structuredOutput ? toStructuredEnvelope( structuredOutput.result, @@ -106,7 +110,12 @@ function writeJsonOutput( structuredOutput.schemaVersion, { nextSteps: session.getNextSteps?.(), - outputStyle, + nextStepRuntime: session.getNextStepsRuntime?.(), + outputStyle: options.outputStyle, + runtimeSnapshot: options.verbose ? 'full' : 'compact', + ...(suppressedTargetRefs + ? { runtimeSnapshotSuppressedTargetRefs: suppressedTargetRefs } + : {}), }, ) : toStructuredEnvelope( @@ -117,7 +126,7 @@ function writeJsonOutput( }).result, STRUCTURED_ERROR_SCHEMA, STRUCTURED_ERROR_SCHEMA_VERSION, - { outputStyle }, + { outputStyle: options.outputStyle }, ); process.stdout.write(JSON.stringify(envelope, null, 2) + '\n'); @@ -251,12 +260,18 @@ function registerToolSubcommand( describe: 'Output format', }); + subYargs.option('verbose', { + type: 'boolean', + default: false, + describe: 'Render verbose output data when supported', + }); + // Group options for cleaner help display if (toolArgNames.length > 0) { subYargs.group(toolArgNames, 'Tool Arguments:'); } subYargs.group(['profile'], 'Session Defaults:'); - subYargs.group(['json', 'output'], 'Output Options:'); + subYargs.group(['json', 'output', 'verbose'], 'Output Options:'); // Add note about unsupported keys if any if (unsupportedKeys.length > 0) { @@ -288,6 +303,7 @@ function registerToolSubcommand( const socketPath = argv.socket as string; const logLevel = argv['log-level'] as string | undefined; const filePathRenderStyle = argv.filePathRenderStyle as FilePathRenderStyle | undefined; + const verboseOutput = argv.verbose === true; if ( profileOverride && @@ -322,6 +338,7 @@ function registerToolSubcommand( 'logLevel', 'file-path-render-style', 'filePathRenderStyle', + 'verbose', '_', '$0', ]); @@ -335,6 +352,10 @@ function registerToolSubcommand( // Merge: flag args first, then JSON overrides const explicitArgs = { ...toolParams, ...jsonArgs }; + const rawDefaults = resolveCliSessionDefaults({ + runtimeConfig: opts.runtimeConfig, + profileOverride, + }); const args = mergeCliSessionDefaults({ defaults: getCliSessionDefaultsForTool({ tool, @@ -344,6 +365,24 @@ function registerToolSubcommand( explicitArgs, }); + if ( + args.simulatorId === undefined && + tool.cliSchema.simulatorId !== undefined && + tool.cliSchema.simulatorName === undefined && + typeof rawDefaults.simulatorName === 'string' + ) { + const resolvedSimulator = await resolveSimulatorNameToId( + getDefaultCommandExecutor(), + rawDefaults.simulatorName, + ); + if (!resolvedSimulator.success) { + console.error(`Error: ${resolvedSimulator.error}`); + process.exitCode = 1; + return; + } + args.simulatorId = resolvedSimulator.simulatorId; + } + const missingRequiredFlags = requiredFlagNames.filter((flagName) => { const camelKey = convertArgvToToolParams({ [flagName]: true }); const [toolKey] = Object.keys(camelKey); @@ -373,6 +412,7 @@ function registerToolSubcommand( runtime: 'cli', outputStyle, filePathRenderStyle, + includeNextSteps: outputStyle !== 'minimal', }); const writeJsonlFragment = outputFormat === 'jsonl' @@ -406,7 +446,7 @@ function registerToolSubcommand( } if (outputFormat === 'json') { - if (writeJsonOutput(handlerContext, session, outputStyle)) { + if (writeJsonOutput(handlerContext, session, { outputStyle, verbose: verboseOutput })) { process.exitCode = 1; } return; diff --git a/src/cli/schema-to-yargs.ts b/src/cli/schema-to-yargs.ts index 175068097..e68fe8cbf 100644 --- a/src/cli/schema-to-yargs.ts +++ b/src/cli/schema-to-yargs.ts @@ -7,6 +7,16 @@ export interface YargsOptionConfig extends Options { type: 'string' | 'number' | 'boolean' | 'array'; } +function coerceNumberArray(value: unknown): number[] { + const values = Array.isArray(value) ? value : [value]; + return values.flatMap((entry) => + String(entry) + .split(',') + .map((item) => item.trim()) + .map((item) => (item === '' ? Number.NaN : Number(item))), + ); +} + export interface ZodToYargsOptionOptions { hasHydratedDefault?: boolean; } @@ -195,9 +205,17 @@ export function zodToYargsOption( const element = getArrayElement(unwrapped); if (element) { const elemTypeName = getZodTypeName(unwrap(element)); - if (elemTypeName === 'string' || elemTypeName === 'number') { + if (elemTypeName === 'string') { return { type: 'array', describe: description, demandOption: false }; } + if (elemTypeName === 'number') { + return { + type: 'array', + describe: description, + demandOption: false, + coerce: coerceNumberArray, + }; + } } // Complex array types - use --json fallback return null; diff --git a/src/core/__tests__/structured-output-schema.test.ts b/src/core/__tests__/structured-output-schema.test.ts index 006f38257..54bc6e97c 100644 --- a/src/core/__tests__/structured-output-schema.test.ts +++ b/src/core/__tests__/structured-output-schema.test.ts @@ -383,6 +383,125 @@ describe('structured output schema bundling', () => { ).toBe(true); }); + it('accepts ui automation v2 runtime snapshots and semantic action errors', () => { + const ajv = new Ajv2020({ allErrors: true, strict: true, validateSchema: true }); + const captureValidate = ajv.compile( + getMcpOutputSchema({ schema: 'xcodebuildmcp.output.capture-result', version: '2' }), + ); + const actionValidate = ajv.compile( + getMcpOutputSchema({ schema: 'xcodebuildmcp.output.ui-action-result', version: '2' }), + ); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIM-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 100, height: 40 }, + state: { enabled: true, selected: true, visible: true }, + actions: ['tap'], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Continue' }], + }, + }, + }), + ).toBe(true); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIM-1', + screenHash: 'screen-hash', + seq: 2, + }, + }, + }), + ).toBe(true); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: 'screen-hash', + seq: 2, + unchanged: true, + udid: 'SIM-1', + }, + }, + }), + ).toBe(true); + + expect( + actionValidate({ + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + didError: true, + error: 'Element ref was not found in the current snapshot.', + data: { + summary: { status: 'FAILED' }, + action: { type: 'tap', elementRef: 'e404' }, + artifacts: { simulatorId: 'SIM-1' }, + uiError: { + code: 'ELEMENT_REF_NOT_FOUND', + message: 'Element ref was not found in the current snapshot.', + recoveryHint: 'Run snapshot_ui again and retry with a current elementRef.', + elementRef: 'e404', + snapshotAgeMs: 1_000, + }, + }, + }), + ).toBe(true); + + expect( + actionValidate({ + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + action: { type: 'batch', stepCount: 2 }, + artifacts: { simulatorId: 'SIM-1' }, + }, + }), + ).toBe(true); + }); + it('accepts xcode bridge call-result artifacts', () => { const schema = getMcpOutputSchema({ schema: 'xcodebuildmcp.output.xcode-bridge-call-result', diff --git a/src/daemon.ts b/src/daemon.ts index e397e991f..efcdfea06 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -223,7 +223,7 @@ async function main(): Promise { const xcodeIdeWorkflowEnabled = daemonWorkflows.includes('xcode-ide'); const axeBinary = resolveAxeBinary(); const axeAvailable = axeBinary !== null; - const axeSource: 'env' | 'bundled' | 'path' | 'unavailable' = + const axeSource: 'env' | 'source' | 'bundled' | 'path' | 'unavailable' = axeBinary?.source ?? 'unavailable'; const xcodemakeAvailable = isXcodemakeBinaryAvailable(); const xcodemakeEnabled = isXcodemakeEnabled(); diff --git a/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts b/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts index b2325eed9..f9c677b82 100644 --- a/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts +++ b/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts @@ -183,6 +183,8 @@ describe('sendKeyboardShortcut', () => { expect(result.success).toBe(false); if (!result.success) { expect(result.error).toContain('iPhone 15 Pro'); + expect(result.error).toContain('without a device window'); + expect(result.error).toContain('retry the keyboard shortcut'); } expect(calls).toHaveLength(3); }); diff --git a/src/mcp/tools/simulator-management/_keyboard_shortcut.ts b/src/mcp/tools/simulator-management/_keyboard_shortcut.ts index a4ea377e3..1eb5c67ae 100644 --- a/src/mcp/tools/simulator-management/_keyboard_shortcut.ts +++ b/src/mcp/tools/simulator-management/_keyboard_shortcut.ts @@ -123,7 +123,7 @@ export async function sendKeyboardShortcut( if (focusResult.output.trim() === 'NO_WINDOW') { return { success: false, - error: `No Simulator window found for "${device.name}". Is the simulator window visible?`, + error: `No visible Simulator window found for "${device.name}". Simulator.app may be running without a device window; open the simulator device window manually, then retry the keyboard shortcut.`, }; } diff --git a/src/mcp/tools/simulator/__tests__/boot_sim.test.ts b/src/mcp/tools/simulator/__tests__/boot_sim.test.ts index 5e40e5af5..e7064d5db 100644 --- a/src/mcp/tools/simulator/__tests__/boot_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/boot_sim.test.ts @@ -8,6 +8,12 @@ import { sessionStore } from '../../../../utils/session-store.ts'; import { schema, handler, boot_simLogic } from '../boot_sim.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + describe('boot_sim tool', () => { beforeEach(() => { sessionStore.clear(); @@ -105,6 +111,43 @@ describe('boot_sim tool', () => { expect(result.isError).toBe(true); }); + it('should resolve simulatorName before booting', async () => { + const calls: Array<{ + command: string[]; + description?: string; + allowStderr?: boolean; + }> = []; + const mockExecutor = async ( + command: string[], + description?: string, + allowStderr?: boolean, + ) => { + calls.push({ command, description, allowStderr }); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ + success: true, + output: 'Simulator booted successfully', + }); + }; + + const result = await runLogic(() => + boot_simLogic({ simulatorName: 'iPhone 17' }, mockExecutor), + ); + + expect(result.isError).toBeFalsy(); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + install_app_sim: { simulatorId: 'resolved-uuid', appPath: 'PATH_TO_YOUR_APP' }, + launch_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'YOUR_APP_BUNDLE_ID' }, + }); + expect(calls.map((call) => call.command)).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'boot', 'resolved-uuid'], + ]); + }); + it('should verify command generation with mock executor', async () => { const calls: Array<{ command: string[]; diff --git a/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts index 21d892997..dcbc11b2b 100644 --- a/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts @@ -11,6 +11,12 @@ import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { schema, handler, install_app_simLogic } from '../install_app_sim.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + describe('install_app_sim tool', () => { beforeEach(() => { sessionStore.clear(); @@ -100,6 +106,53 @@ describe('install_app_sim tool', () => { ]); }); + it('should resolve simulatorName before installing', async () => { + const executorCalls: Array> = []; + const mockExecutor: CommandExecutor = (...args) => { + executorCalls.push(args); + const command = args[0]; + if (command.includes('list')) { + return Promise.resolve( + createMockCommandResponse({ success: true, output: availableSimulatorsJson }), + ); + } + if (command[0] === 'defaults') { + return Promise.resolve( + createMockCommandResponse({ success: true, output: 'io.sentry.myapp' }), + ); + } + return Promise.resolve( + createMockCommandResponse({ success: true, output: 'App installed' }), + ); + }; + + const mockFileSystem = createMockFileSystemExecutor({ + existsSync: () => true, + }); + + const result = await runLogic(() => + install_app_simLogic( + { + simulatorName: 'iPhone 17', + appPath: '/path/to/app.app', + }, + mockExecutor, + mockFileSystem, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + launch_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'io.sentry.myapp' }, + }); + expect(executorCalls.map((call) => call[0])).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'install', 'resolved-uuid', '/path/to/app.app'], + ['defaults', 'read', '/path/to/app.app/Info', 'CFBundleIdentifier'], + ]); + }); + it('should generate command with different simulator identifier', async () => { const executorCalls: Array> = []; const mockExecutor: CommandExecutor = (...args) => { diff --git a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts index d3408553d..a7633257f 100644 --- a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts @@ -1,11 +1,17 @@ import { describe, it, expect, beforeEach } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor } from '../../../../test-utils/mock-executors.ts'; +import { createMockCommandResponse } from '../../../../test-utils/mock-executors.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; import { schema, handler, launch_app_simLogic, type SimulatorLauncher } from '../launch_app_sim.ts'; import type { LaunchWithLoggingResult } from '../../../../utils/simulator-steps.ts'; import { runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + function createMockLauncher(overrides?: Partial): SimulatorLauncher { return async (_uuid, _bundleId, _executor, _opts?) => ({ success: true, @@ -143,6 +149,44 @@ describe('launch_app_sim tool', () => { expect(capturedEnv).toEqual({ STAGING_ENABLED: '1' }); }); + it('should resolve simulatorName before checking install and launching', async () => { + const executorCalls: string[][] = []; + const installCheckExecutor = async (command: string[]) => { + executorCalls.push(command); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ success: true, output: '/path/to/app/container' }); + }; + let launchedUuid: string | undefined; + const trackingLauncher: SimulatorLauncher = async (uuid, _bundleId, _executor, _opts?) => { + launchedUuid = uuid; + return { success: true, processId: 12345, logFilePath: '/tmp/test.log' }; + }; + + const result = await runLogic(() => + launch_app_simLogic( + { + simulatorName: 'iPhone 17', + bundleId: 'io.sentry.testapp', + }, + installCheckExecutor, + trackingLauncher, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(launchedUuid).toBe('resolved-uuid'); + expect(executorCalls).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'get_app_container', 'resolved-uuid', 'io.sentry.testapp', 'app'], + ]); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + stop_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'io.sentry.testapp' }, + }); + }); + it('should display friendly name when simulatorName is provided alongside resolved simulatorId', async () => { const installCheckExecutor = async () => ({ success: true, diff --git a/src/mcp/tools/simulator/__tests__/screenshot.test.ts b/src/mcp/tools/simulator/__tests__/screenshot.test.ts index 4432513e0..cdb390bb2 100644 --- a/src/mcp/tools/simulator/__tests__/screenshot.test.ts +++ b/src/mcp/tools/simulator/__tests__/screenshot.test.ts @@ -13,6 +13,15 @@ import { schema, handler, screenshotLogic } from '../../ui-automation/screenshot import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; describe('screenshot plugin', () => { + const bootedDeviceListJson = JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { udid: 'test-uuid', name: 'iPhone 15 Pro', state: 'Booted' }, + { udid: 'another-uuid', name: 'iPhone 15', state: 'Booted' }, + ], + }, + }); + beforeEach(() => { sessionStore.clear(); }); @@ -89,7 +98,11 @@ describe('screenshot plugin', () => { expect(capturedCommands).toHaveLength(5); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); + + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -98,10 +111,6 @@ describe('screenshot plugin', () => { '/tmp/screenshot_mock-uuid-123.png', ]); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); - expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -168,7 +177,11 @@ describe('screenshot plugin', () => { expect(capturedCommands).toHaveLength(5); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); + + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -177,10 +190,6 @@ describe('screenshot plugin', () => { '/tmp/screenshot_different-uuid-456.png', ]); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); - expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -234,21 +243,21 @@ describe('screenshot plugin', () => { ), ); - // Should execute all commands in sequence: screenshot, list devices, orientation detection, optimization, dimensions + // Should execute all commands in sequence: list devices, screenshot, orientation detection, optimization, dimensions expect(capturedCommands).toHaveLength(5); - const firstCommand = capturedCommands[0]; - expect(firstCommand).toHaveLength(6); - expect(firstCommand[0]).toBe('xcrun'); - expect(firstCommand[1]).toBe('simctl'); - expect(firstCommand[2]).toBe('io'); - expect(firstCommand[3]).toBe('test-uuid'); - expect(firstCommand[4]).toBe('screenshot'); - expect(firstCommand[5]).toMatch(/\/.*\/screenshot_.*\.png/); + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); + const screenshotCommand = capturedCommands[1]; + expect(screenshotCommand).toHaveLength(6); + expect(screenshotCommand[0]).toBe('xcrun'); + expect(screenshotCommand[1]).toBe('simctl'); + expect(screenshotCommand[2]).toBe('io'); + expect(screenshotCommand[3]).toBe('test-uuid'); + expect(screenshotCommand[4]).toBe('screenshot'); + expect(screenshotCommand[5]).toMatch(/\/.*\/screenshot_.*\.png/); expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -267,7 +276,9 @@ describe('screenshot plugin', () => { const mockImageBuffer = Buffer.from('fake-image-data'); const mockExecutor = createCommandMatchingMockExecutor({ - 'xcrun simctl': { success: true, output: 'Screenshot saved' }, + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, sips: { success: true, output: 'Image optimized' }, }); @@ -320,11 +331,21 @@ describe('screenshot plugin', () => { }); it('should handle command failure', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Command failed', - }); + const mockExecutor: CommandExecutor = async (command) => { + const cmdStr = command.join(' '); + if (cmdStr.includes('simctl list devices')) { + return { + success: true, + output: bootedDeviceListJson, + error: undefined, + process: mockProcess, + }; + } + if (cmdStr.includes('simctl io')) { + return { success: false, output: '', error: 'Command failed', process: mockProcess }; + } + return { success: true, output: '', error: undefined, process: mockProcess }; + }; const mockPathDeps = { tmpdir: () => '/tmp', @@ -354,10 +375,11 @@ describe('screenshot plugin', () => { }); it('should handle file read failure', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, + const mockExecutor = createCommandMatchingMockExecutor({ + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, + sips: { success: true, output: 'Image optimized' }, }); const mockFileSystemExecutor = createMockFileSystemExecutor({ @@ -446,18 +468,18 @@ describe('screenshot plugin', () => { expect(capturedArgs).toHaveLength(5); - expect(capturedArgs[0]).toEqual([ + expect(capturedArgs[0][0][0]).toBe('xcrun'); + expect(capturedArgs[0][0][1]).toBe('simctl'); + expect(capturedArgs[0][0][2]).toBe('list'); + expect(capturedArgs[0][1]).toBe('[Screenshot]: list devices'); + expect(capturedArgs[0][2]).toBe(false); + + expect(capturedArgs[1]).toEqual([ ['xcrun', 'simctl', 'io', 'test-uuid', 'screenshot', '/tmp/screenshot_mock-uuid-123.png'], '[Screenshot]: screenshot', false, ]); - expect(capturedArgs[1][0][0]).toBe('xcrun'); - expect(capturedArgs[1][0][1]).toBe('simctl'); - expect(capturedArgs[1][0][2]).toBe('list'); - expect(capturedArgs[1][1]).toBe('[Screenshot]: list devices'); - expect(capturedArgs[1][2]).toBe(false); - expect(capturedArgs[2][0][0]).toBe('swift'); expect(capturedArgs[2][0][1]).toBe('-e'); expect(capturedArgs[2][1]).toBe('[Screenshot]: detect orientation'); @@ -578,10 +600,11 @@ describe('screenshot plugin', () => { }); it('should handle file read error with fileSystemExecutor', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, + const mockExecutor = createCommandMatchingMockExecutor({ + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, + sips: { success: true, output: 'Image optimized' }, }); const mockFileSystemExecutor = createMockFileSystemExecutor({ diff --git a/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts index 8dc13ef9a..6673d4a8f 100644 --- a/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts @@ -22,6 +22,12 @@ import * as path from 'node:path'; import type { ChildProcess } from 'node:child_process'; import { setRuntimeInstanceForTests } from '../../../../utils/runtime-instance.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + function createTrackedChild(options?: { pid?: number; killImplementation?: (signal?: NodeJS.Signals | number) => boolean; @@ -191,6 +197,33 @@ describe('stop_app_sim tool', () => { expect(text).not.toContain('Tracked OSLog sessions cleaned up'); }); + it('should resolve simulatorName before stopping', async () => { + const calls: string[][] = []; + const mockExecutor: CommandExecutor = async (command) => { + calls.push(command); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ success: true, output: '' }); + }; + + const result = await runLogic(() => + stop_app_simLogic( + { + simulatorName: 'iPhone 17', + bundleId: 'io.sentry.App', + }, + mockExecutor, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(calls).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'terminate', 'resolved-uuid', 'io.sentry.App'], + ]); + }); + it('should display friendly name when simulatorName is provided alongside resolved simulatorId', async () => { const mockExecutor = createMockExecutor({ success: true, output: '' }); diff --git a/src/mcp/tools/simulator/boot_sim.ts b/src/mcp/tools/simulator/boot_sim.ts index dce3d3f34..d0cf77767 100644 --- a/src/mcp/tools/simulator/boot_sim.ts +++ b/src/mcp/tools/simulator/boot_sim.ts @@ -11,6 +11,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { createBasicDiagnostics } from '../../../utils/diagnostics.ts'; @@ -30,11 +31,12 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), }); type BootSimParams = z.infer; +type ResolvedBootSimParams = BootSimParams & { simulatorId: string }; type BootSimResult = SimulatorActionResultDomainResult; const publicSchemaObject = z.strictObject( @@ -45,7 +47,7 @@ const publicSchemaObject = z.strictObject( ); function createBootSimResult(params: { - simulatorId: string; + simulatorId?: string; didError: boolean; error?: string; diagnosticMessage?: string; @@ -63,9 +65,13 @@ function createBootSimResult(params: { ...(params.diagnosticMessage ? { diagnostics: createBasicDiagnostics({ errors: [params.diagnosticMessage] }) } : {}), - artifacts: { - simulatorId: params.simulatorId, - }, + ...(params.simulatorId + ? { + artifacts: { + simulatorId: params.simulatorId, + }, + } + : {}), }; } @@ -79,7 +85,7 @@ function setStructuredOutput(ctx: ToolHandlerContext, result: BootSimResult): vo export function createBootSimExecutor( executor: CommandExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { try { const result = await executor( @@ -118,11 +124,28 @@ export async function boot_simLogic( params: BootSimParams, executor: CommandExecutor, ): Promise { - log('info', `Starting xcrun simctl boot request for simulator ${params.simulatorId}`); - const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = createBootSimResult({ + didError: true, + error: 'Boot simulator operation failed.', + diagnosticMessage: `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + }); + setStructuredOutput(ctx, result); + log('error', `Error during boot simulator operation: ${result.error ?? 'Unknown error'}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedBootSimParams = { ...params, simulatorId: simulatorResult.uuid }; + log('info', `Starting xcrun simctl boot request for simulator ${resolvedParams.simulatorId}`); + const executeBootSim = createBootSimExecutor(executor); - const result = await executeBootSim(params); + const result = await executeBootSim(resolvedParams); setStructuredOutput(ctx, result); if (result.didError) { @@ -132,8 +155,8 @@ export async function boot_simLogic( ctx.nextStepParams = { open_sim: {}, - install_app_sim: { simulatorId: params.simulatorId, appPath: 'PATH_TO_YOUR_APP' }, - launch_app_sim: { simulatorId: params.simulatorId, bundleId: 'YOUR_APP_BUNDLE_ID' }, + install_app_sim: { simulatorId: resolvedParams.simulatorId, appPath: 'PATH_TO_YOUR_APP' }, + launch_app_sim: { simulatorId: resolvedParams.simulatorId, bundleId: 'YOUR_APP_BUNDLE_ID' }, }; } diff --git a/src/mcp/tools/simulator/install_app_sim.ts b/src/mcp/tools/simulator/install_app_sim.ts index 9faa8ba6a..029696d21 100644 --- a/src/mcp/tools/simulator/install_app_sim.ts +++ b/src/mcp/tools/simulator/install_app_sim.ts @@ -11,6 +11,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { installAppOnSimulator } from '../../../utils/simulator-steps.ts'; import { @@ -36,12 +37,13 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), appPath: z.string(), }); type InstallAppSimParams = z.infer; +type ResolvedInstallAppSimParams = InstallAppSimParams & { simulatorId: string }; const publicSchemaObject = z.strictObject( baseSchemaObject.omit({ @@ -56,8 +58,27 @@ export async function install_app_simLogic( fileSystem?: FileSystemExecutor, ): Promise { const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildInstallFailure( + { appPath: params.appPath }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setInstallResultStructuredOutput(ctx, result); + log('error', `Error during install app in simulator operation: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedInstallAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; const executeInstallAppSim = createInstallAppSimExecutor(executor, fileSystem); - const result = await executeInstallAppSim(params); + const result = await executeInstallAppSim(resolvedParams); setInstallResultStructuredOutput(ctx, result); @@ -73,7 +94,7 @@ export async function install_app_simLogic( ctx.nextStepParams = { open_sim: {}, launch_app_sim: { - simulatorId: params.simulatorId, + simulatorId: resolvedParams.simulatorId, bundleId: bundleId || 'YOUR_APP_BUNDLE_ID', }, }; @@ -103,7 +124,7 @@ async function extractBundleId( export function createInstallAppSimExecutor( executor: CommandExecutor, fileSystem?: FileSystemExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { const artifacts = { simulatorId: params.simulatorId, appPath: params.appPath }; diff --git a/src/mcp/tools/simulator/launch_app_sim.ts b/src/mcp/tools/simulator/launch_app_sim.ts index 065958b4b..0299bc75b 100644 --- a/src/mcp/tools/simulator/launch_app_sim.ts +++ b/src/mcp/tools/simulator/launch_app_sim.ts @@ -14,6 +14,7 @@ import { launchSimulatorAppWithLogging, type LaunchWithLoggingResult, } from '../../../utils/simulator-steps.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { buildLaunchFailure, @@ -49,7 +50,7 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), bundleId: z.string(), launchArgs: z.array(z.string()).optional(), @@ -57,6 +58,7 @@ const internalSchemaObject = z.object({ }); export type LaunchAppSimParams = z.infer; +type ResolvedLaunchAppSimParams = LaunchAppSimParams & { simulatorId: string }; type LaunchAppSimResult = LaunchResultDomainResult; export type SimulatorLauncher = typeof launchSimulatorAppWithLogging; @@ -67,8 +69,27 @@ export async function launch_app_simLogic( launcher: SimulatorLauncher = launchSimulatorAppWithLogging, ): Promise { const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildLaunchFailure( + { bundleId: params.bundleId }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setLaunchResultStructuredOutput(ctx, result); + log('error', `Error during launch app in simulator operation: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedLaunchAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; const executeLaunchAppSim = createLaunchAppSimExecutor(executor, launcher); - const result = await executeLaunchAppSim(params); + const result = await executeLaunchAppSim(resolvedParams); setLaunchResultStructuredOutput(ctx, result); @@ -82,12 +103,12 @@ export async function launch_app_simLogic( ctx.nextStepParams = { open_sim: {}, - stop_app_sim: { simulatorId: params.simulatorId, bundleId: params.bundleId }, + stop_app_sim: { simulatorId: resolvedParams.simulatorId, bundleId: params.bundleId }, }; } function buildSuccessArtifacts( - params: LaunchAppSimParams, + params: ResolvedLaunchAppSimParams, launchResult: LaunchWithLoggingResult, ): LaunchResultArtifacts { return { @@ -102,7 +123,7 @@ function buildSuccessArtifacts( export function createLaunchAppSimExecutor( executor: CommandExecutor, launcher: SimulatorLauncher = launchSimulatorAppWithLogging, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { log('info', `Starting xcrun simctl launch request for simulator ${params.simulatorId}`); diff --git a/src/mcp/tools/simulator/stop_app_sim.ts b/src/mcp/tools/simulator/stop_app_sim.ts index 15c57c43c..0f2f06bf7 100644 --- a/src/mcp/tools/simulator/stop_app_sim.ts +++ b/src/mcp/tools/simulator/stop_app_sim.ts @@ -10,6 +10,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { stopSimulatorLaunchOsLogSessionsForApp } from '../../../utils/log-capture/index.ts'; import { @@ -35,17 +36,18 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), bundleId: z.string(), }); export type StopAppSimParams = z.infer; +type ResolvedStopAppSimParams = StopAppSimParams & { simulatorId: string }; type StopAppSimResult = StopResultDomainResult; export function createStopAppSimExecutor( executor: CommandExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { const simulatorId = params.simulatorId; const artifacts = { simulatorId, bundleId: params.bundleId }; @@ -92,13 +94,32 @@ export async function stop_app_simLogic( params: StopAppSimParams, executor: CommandExecutor, ): Promise { - const simulatorId = params.simulatorId; + const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildStopFailure( + { bundleId: params.bundleId }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setStopResultStructuredOutput(ctx, result); + log('error', `Error stopping app in simulator: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedStopAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; + const simulatorId = resolvedParams.simulatorId; log('info', `Stopping app ${params.bundleId} in simulator ${simulatorId}`); - const ctx = getHandlerContext(); const executeStopAppSim = createStopAppSimExecutor(executor); - const result = await executeStopAppSim(params); + const result = await executeStopAppSim(resolvedParams); setStopResultStructuredOutput(ctx, result); if (result.didError) { diff --git a/src/mcp/tools/ui-automation/__tests__/batch.test.ts b/src/mcp/tools/ui-automation/__tests__/batch.test.ts new file mode 100644 index 000000000..d7148b80f --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/batch.test.ts @@ -0,0 +1,385 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import * as z from 'zod'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { batchLogic, createBatchExecutor, handler, schema } from '../batch.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +function actionCommands(calls: Array<{ command: string[] }>): string[][] { + return calls.map((call) => call.command).filter((command) => command[1] !== 'describe-ui'); +} + +async function runBatch( + params: Parameters[0], + executor = createTrackingExecutor().executor, + axeHelpers = createMockAxeHelpers(), +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => batchLogic(params, executor, axeHelpers)); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} + +describe('Batch UI Automation Tool', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes structured tap steps and rejects raw AXe strings', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('steps'); + expect(schema).toHaveProperty('axCache'); + expect(schema).not.toHaveProperty('tapStyle'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ steps: [{ action: 'tap', elementRef: 'e1' }] }).success).toBe( + true, + ); + expect( + schemaObject.safeParse({ + steps: [ + { action: 'tap', elementRef: 'e1', preDelay: 0.25, postDelay: 0.5 }, + { action: 'tap', elementRef: 'e2' }, + ], + axCache: 'perBatch', + waitTimeout: 2, + pollInterval: 0.25, + }).success, + ).toBe(true); + expect(schemaObject.safeParse({ steps: ['tap --id login'] }).success).toBe(false); + expect(schemaObject.safeParse({ steps: [] }).success).toBe(false); + expect(schemaObject.safeParse({ steps: [{ action: 'tap', elementRef: '' }] }).success).toBe( + false, + ); + expect( + schemaObject.safeParse({ steps: [{ action: 'swipe', elementRef: 'e1' }] }).success, + ).toBe(false); + expect( + schemaObject.safeParse({ steps: [{ action: 'tap', elementRef: 'e1' }], pollInterval: 0 }) + .success, + ).toBe(false); + }); + }); + + describe('Command Generation', () => { + it('pre-resolves element refs into AXe coordinate batch steps', async () => { + recordSnapshot([ + createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } }), + createNode({ frame: { x: 200, y: 300, width: 80, height: 60 }, AXLabel: 'Next' }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { + simulatorId, + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2', preDelay: 0.25, postDelay: 0.5 }, + ], + }, + executor, + ); + + expect(result).toMatchObject({ + didError: false, + action: { type: 'batch', stepCount: 2 }, + }); + expect(actionCommands(calls)).toEqual([ + [ + '/mocked/axe/path', + 'batch', + '--step', + 'tap -x 60 -y 40', + '--step', + 'tap -x 240 -y 330 --pre-delay 0.25 --post-delay 0.5', + '--udid', + simulatorId, + ], + ]); + }); + + it('uses touch down/up batch steps for switch refs', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + AXLabel: 'Reduce transparency', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + await runBatch({ simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, executor); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'batch', + '--step', + 'touch -x 307 -y 903 --down', + '--step', + 'touch -x 307 -y 903 --up', + '--udid', + simulatorId, + ]); + }); + + it('rejects delays for switch refs before AXe execution', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + AXLabel: 'Reduce transparency', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1', postDelay: 0.5 }] }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: + 'Remove preDelay/postDelay from switch steps, or wait between separate batch calls.', + }); + expect(calls).toEqual([]); + }); + + it('passes supported AXe batch options through unchanged', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + + await runBatch( + { + simulatorId, + steps: [{ action: 'tap', elementRef: 'e1' }], + axCache: 'perStep', + waitTimeout: 3, + pollInterval: 0.5, + }, + executor, + ); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'batch', + '--step', + 'tap -x 60 -y 40', + '--ax-cache', + 'perStep', + '--wait-timeout', + '3', + '--poll-interval', + '0.5', + '--udid', + simulatorId, + ]); + }); + }); + + describe('Runtime snapshot invalidation', () => { + it('preserves the cached runtime snapshot after a successful safe same-screen batch', async () => { + recordSnapshot([ + createNode({ type: 'Switch', role: 'AXSwitch', AXValue: '0' }), + createNode({ type: 'Switch', role: 'AXSwitch', AXValue: 'off' }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { + simulatorId, + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toBeUndefined(); + expect(calls.some((call) => call.command[1] === 'describe-ui')).toBe(false); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('records a fresh runtime snapshot after a successful arbitrary batch', async () => { + recordSnapshot([createNode()]); + + const result = await runBatch({ simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }); + + expect(result.didError).toBe(false); + expect(result.capture).toMatchObject({ type: 'runtime-snapshot', simulatorId }); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('does not preserve snapshots for inactive non-switch elements', async () => { + recordSnapshot([createNode({ AXValue: 'not selected' })]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toMatchObject({ type: 'runtime-snapshot', simulatorId }); + expect(calls.some((call) => call.command[1] === 'describe-ui')).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('pre-resolves all refs and fails before execution if any ref is invalid', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { + simulatorId, + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e404' }, + ], + }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('clears the cached runtime snapshot when AXe runs and reports batch failure', async () => { + recordSnapshot([createNode()]); + + const result = await runBatch( + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, + createFailingExecutor('step failed'), + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.diagnostics?.errors?.[0]?.message).toBe('step failed'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + + it('preserves the cached runtime snapshot when AXe is unavailable before execution', async () => { + recordSnapshot([createNode({ type: 'Switch', role: 'AXSwitch', AXValue: '0' })]); + const { executor } = createTrackingExecutor(); + + const result = await runBatch( + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, + executor, + createMockAxeHelpers({ getAxePathReturn: null }), + ); + + expect(result.didError).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('preserves the cached runtime snapshot when the debugger guard blocks before AXe runs', async () => { + recordSnapshot([createNode({ type: 'Switch', role: 'AXSwitch', AXValue: '0' })]); + const { calls, executor } = createTrackingExecutor(); + const debuggerManager = new DebuggerManager(); + vi.spyOn(debuggerManager, 'findSessionForSimulator').mockReturnValue({ + id: 'debug-session-1', + backend: 'dap', + simulatorId, + pid: 1234, + createdAt: 0, + lastUsedAt: 0, + }); + vi.spyOn(debuggerManager, 'getExecutionState').mockResolvedValue({ + status: 'stopped', + reason: 'breakpoint', + }); + const executeBatch = createBatchExecutor(executor, createMockAxeHelpers(), debuggerManager); + + const result = await executeBatch({ + simulatorId, + steps: [{ action: 'tap', elementRef: 'e1' }], + }); + + expect(result.didError).toBe(true); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + }); + + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { steps: [{ action: 'tap', elementRef: 'e1' }] }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + + it('ignores unrelated project session defaults before strict validation', async () => { + sessionStore.setDefaults({ + simulatorId, + projectPath: '/tmp/App.xcodeproj', + scheme: 'App', + simulatorName: 'iPhone 17 Pro', + simulatorPlatform: 'iOS Simulator', + }); + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + ( + handler as unknown as ( + args: Record, + executor: CommandExecutor, + ) => Promise + )({ steps: [{ action: 'tap', elementRef: 'e1' }] }, executor), + ); + + expect(ctx.structuredOutput?.result.didError).toBe(false); + expect(calls[0]?.command.slice(1)).toEqual([ + 'batch', + '--step', + 'tap -x 60 -y 40', + '--udid', + simulatorId, + ]); + }); + + it('rejects removed legacy top-level fields', async () => { + sessionStore.setDefaults({ simulatorId }); + + const result = await callHandler(handler, { + steps: [{ action: 'tap', elementRef: 'e1' }], + tapStyle: 'physical', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Parameter validation failed'); + expect(result.content[0].text).toContain('Unrecognized key'); + expect(result.content[0].text).toContain('tapStyle'); + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/button.test.ts b/src/mcp/tools/ui-automation/__tests__/button.test.ts index d83851720..c946a148e 100644 --- a/src/mcp/tools/ui-automation/__tests__/button.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/button.test.ts @@ -1,11 +1,11 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import * as z from 'zod'; import { createMockExecutor, createNoopExecutor, createMockCommandResponse, } from '../../../../test-utils/mock-executors.ts'; -import { schema, handler, buttonLogic } from '../button.ts'; +import { schema, handler, buttonLogic, createButtonExecutor } from '../button.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; @@ -23,6 +23,8 @@ describe('Button Plugin', () => { expect(schemaObj.safeParse({ buttonType: 'home', duration: 2.5 }).success).toBe(true); expect(schemaObj.safeParse({ buttonType: 'invalid-button' }).success).toBe(false); expect(schemaObj.safeParse({ buttonType: 'home', duration: -1 }).success).toBe(false); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 0 }).success).toBe(true); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 10.1 }).success).toBe(true); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', @@ -60,6 +62,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -97,6 +101,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -135,6 +141,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -171,6 +179,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -184,6 +194,46 @@ describe('Button Plugin', () => { }); }); + describe('Executor Behavior', () => { + it('waits briefly after successful button presses so system UI transitions can settle', async () => { + vi.useFakeTimers(); + try { + const mockExecutor = createMockExecutor({ + success: true, + output: 'button press completed', + error: undefined, + process: { pid: 12345 }, + }); + + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + const executeButton = createButtonExecutor(mockExecutor, mockAxeHelpers, undefined, 500); + let settled = false; + const resultPromise = executeButton({ + simulatorId: '12345678-1234-4234-8234-123456789012', + buttonType: 'home', + }).then((result) => { + settled = true; + return result; + }); + + await vi.advanceTimersByTimeAsync(499); + expect(settled).toBe(false); + + await vi.advanceTimersByTimeAsync(1); + const result = await resultPromise; + + expect(settled).toBe(true); + expect(result.didError).toBe(false); + } finally { + vi.useRealTimers(); + } + }); + }); + describe('Handler Behavior (Complete Literal Returns)', () => { it('should surface session default requirement when simulatorId is missing', async () => { const result = await callHandler(handler, { buttonType: 'home' }); @@ -259,6 +309,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -288,6 +340,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -309,6 +363,8 @@ describe('Button Plugin', () => { }, createNoopExecutor(), mockAxeHelpers, + undefined, + 0, ), ); @@ -337,6 +393,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -364,6 +422,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -391,6 +451,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -418,6 +480,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); diff --git a/src/mcp/tools/ui-automation/__tests__/drag.test.ts b/src/mcp/tools/ui-automation/__tests__/drag.test.ts new file mode 100644 index 000000000..876cd81bd --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/drag.test.ts @@ -0,0 +1,238 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import * as z from 'zod'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { dragLogic, handler, schema } from '../drag.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runDrag( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => dragLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} + +describe('Drag Tool', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes elementRef and direction without raw coordinate fields', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('direction'); + expect(schema).not.toHaveProperty('startX'); + expect(schema).not.toHaveProperty('startY'); + expect(schema).not.toHaveProperty('endX'); + expect(schema).not.toHaveProperty('endY'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ elementRef: 'e1', direction: 'up' }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', direction: 'diagonal' }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ direction: 'up' }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(false); + expect( + schemaObject.safeParse({ + elementRef: 'e1', + direction: 'down', + duration: 1.5, + distance: 0.5, + steps: 80, + preDelay: 0.5, + postDelay: 0.25, + }).success, + ).toBe(true); + expect( + schemaObject.safeParse({ elementRef: 'e1', direction: 'down', duration: 0 }).success, + ).toBe(false); + expect( + schemaObject.safeParse({ elementRef: 'e1', direction: 'down', distance: 0 }).success, + ).toBe(false); + expect( + schemaObject.safeParse({ elementRef: 'e1', direction: 'down', steps: 0 }).success, + ).toBe(false); + }); + }); + + describe('Command Generation', () => { + it('derives a viewport-relative upward drag from a sheet grabber', async () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Half screen', + frame: { x: 182, y: 446, width: 76, height: 24 }, + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag( + { simulatorId, elementRef: 'e2', direction: 'up', distance: 0.35 }, + executor, + ); + + expect(result).toMatchObject({ + didError: false, + action: { + type: 'drag', + elementRef: 'e2', + direction: 'up', + from: { x: 220, y: 458 }, + to: { x: 220, y: 123 }, + }, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'drag', + '--start-x', + '220', + '--start-y', + '458', + '--end-x', + '220', + '--end-y', + '123', + '--udid', + simulatorId, + ]); + }); + + it('uses within-element scroll points for scrollable drag targets', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 20, y: 255, width: 400, height: 637 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag( + { + simulatorId, + elementRef: 'e1', + direction: 'up', + distance: 0.7, + duration: 0.8, + steps: 80, + postDelay: 0.5, + }, + executor, + ); + + expect(result.action).toMatchObject({ + type: 'drag', + elementRef: 'e1', + direction: 'up', + from: { x: 220, y: 729 }, + to: { x: 220, y: 418 }, + durationSeconds: 0.8, + steps: 80, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'drag', + '--start-x', + '220', + '--start-y', + '729', + '--end-x', + '220', + '--end-y', + '418', + '--duration', + '0.8', + '--steps', + '80', + '--post-delay', + '0.5', + '--udid', + simulatorId, + ]); + }); + }); + + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag({ simulatorId, elementRef: 'e1', direction: 'up' }, executor); + + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); + }); + + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag({ simulatorId, elementRef: 'e404', direction: 'up' }, executor); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); + }); + }); + + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', direction: 'up' }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [createNode()], + }), + ]); + + const result = await runDrag( + { simulatorId, elementRef: 'e2', direction: 'up' }, + createFailingExecutor('drag failed'), + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e2', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts index a010bcd32..2eedc5d7a 100644 --- a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts @@ -37,7 +37,12 @@ describe('Gesture Plugin', () => { ).toBe(true); expect(schemaObj.safeParse({ preset: 'invalid-preset' }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', screenWidth: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', screenWidth: 2001 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', screenHeight: 3001 }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', duration: -1 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', duration: 0 }).success).toBe(true); + expect(schemaObj.safeParse({ preset: 'scroll-up', delta: 0 }).success).toBe(true); + expect(schemaObj.safeParse({ preset: 'scroll-up', delta: 201 }).success).toBe(false); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts index b9d8be0ec..ebdd2909e 100644 --- a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts @@ -36,6 +36,8 @@ describe('Key Press Tool', () => { expect(schemaObj.safeParse({ keyCode: 'invalid' }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: -1 }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: 256 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCode: 40, duration: 0 }).success).toBe(true); + expect(schemaObj.safeParse({ keyCode: 40, duration: 10.1 }).success).toBe(true); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts b/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts index 9e71d84a6..576469e1a 100644 --- a/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts @@ -29,6 +29,10 @@ describe('Key Sequence Tool', () => { expect(schemaObj.safeParse({ keyCodes: [-1] }).success).toBe(false); expect(schemaObj.safeParse({ keyCodes: [256] }).success).toBe(false); expect(schemaObj.safeParse({ keyCodes: [40], delay: -0.1 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCodes: [40], delay: 5.1 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCodes: Array.from({ length: 101 }, () => 40) }).success).toBe( + false, + ); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts index d6be95a7e..727ef2500 100644 --- a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts @@ -1,457 +1,201 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, long_pressLogic } from '../long_press.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runLongPress( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => long_pressLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} describe('Long Press Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); - describe('Export Field Validation (Literal)', () => { - it('should have handler function', () => { + describe('Schema Validation', () => { + it('exposes elementRef and duration without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('duration'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: 1500, - }).success, - ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 100.5, - y: 200, - duration: 1500, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200.5, - duration: 1500, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: 0, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: -100, - }).success, - ).toBe(false); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); - }); - }); - - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await callHandler(handler, { x: 100, y: 200, duration: 1500 }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); - - it('should surface validation errors once simulator default exists', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { x: 100, y: 200, duration: 0 }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('duration: Duration of the long press in milliseconds'); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 1500 }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 0 }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 10_001 }).success).toBe(false); + expect(schemaObject.safeParse({ duration: 1500 }).success).toBe(false); }); }); describe('Command Generation', () => { - it('should generate correct axe command for basic long press', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('long presses the referenced element center and converts milliseconds to AXe seconds', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1500 }, + executor, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(result).toMatchObject({ + didError: false, + action: { type: 'long-press', elementRef: 'e1', durationMs: 1500, x: 60, y: 40 }, + capture: { type: 'runtime-snapshot', simulatorId }, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '100', + '60', '-y', - '200', + '40', '--down', '--up', '--delay', '1.5', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for long press with different coordinates', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 50, - y: 75, - duration: 2000, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'touch', - '-x', - '50', - '-y', - '75', - '--down', - '--up', - '--delay', - '2', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('uses the switch activation point for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), ]); - }); - - it('should generate correct axe command for short duration long press', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 300, - y: 400, - duration: 500, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'touch', - '-x', - '300', - '-y', - '400', - '--down', - '--up', - '--delay', - '0.5', - '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); - - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 250, - duration: 3000, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', + expect(result.action).toMatchObject({ + type: 'long-press', + elementRef: 'e1', + durationMs: 1000, + x: 307, + y: 903, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '150', + '307', '-y', - '250', + '903', '--down', '--up', '--delay', - '3', + '1', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should return success for valid long press execution', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'long press completed', - error: '', - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Long press at (100, 200) for 1500ms simulated successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, - process: mockProcess, - }); - - const mockAxeHelpers = { - getAxePath: () => null, // Mock axe not found - getBundledAxeEnvironment: () => ({}), - }; + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - process: mockProcess, - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e404', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate long press at (100, 200).'); - expect(text).toContain('axe command failed'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle SystemError from command execution', async () => { - const mockExecutor = () => { - throw new Error('ENOENT: no such file or directory'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ role: 'AXApplication', type: 'Application' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle unexpected Error objects', async () => { - const mockExecutor = () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', duration: 1500 }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected string errors', async () => { - const mockExecutor = () => { - throw 'String error'; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode()]); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1500 }, + createFailingExecutor('long press failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts index 43244a351..065301319 100644 --- a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts @@ -1,9 +1,10 @@ import { describe, expect, it } from 'vitest'; import { + createCommandMatchingMockExecutor, createMockExecutor, createMockFileSystemExecutor, } from '../../../../test-utils/mock-executors.ts'; -import { runToolLogic } from '../../../../test-utils/test-helpers.ts'; +import { createMockToolHandlerContext, runToolLogic } from '../../../../test-utils/test-helpers.ts'; import { buttonLogic } from '../button.ts'; import { gestureLogic } from '../gesture.ts'; import { key_pressLogic } from '../key_press.ts'; @@ -15,8 +16,12 @@ import { swipeLogic } from '../swipe.ts'; import { tapLogic } from '../tap.ts'; import { touchLogic } from '../touch.ts'; import { type_textLogic } from '../type_text.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; +import { createNode, recordSnapshot } from './ui-action-test-helpers.ts'; const simulatorId = '12345678-1234-4234-8234-123456789012'; +const runtimeSnapshotOutput = + '{"elements":[{"type":"Button","role":"AXButton","frame":{"x":100,"y":200,"width":50,"height":30},"enabled":true,"children":[],"custom_actions":[]}]}'; function createMockAxeHelpers() { return { @@ -36,6 +41,8 @@ describe('ui automation non-streaming tools', () => { { simulatorId, buttonType: 'home' }, createMockExecutor({ success: true }), axeHelpers, + undefined, + 0, ), expectedText: "Hardware button 'home' pressed successfully.", }, @@ -71,59 +78,75 @@ describe('ui automation non-streaming tools', () => { }, { name: 'long_press', - run: () => - long_pressLogic( - { simulatorId, x: 100, y: 200, duration: 1500 }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return long_pressLogic( + { simulatorId, elementRef: 'e1', duration: 1500 }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Long press at (100, 200) for 1500ms simulated successfully.', + ); + }, + expectedText: 'Long press on elementRef e1 for 1500ms simulated successfully.', }, { name: 'swipe', - run: () => - swipeLogic( - { simulatorId, x1: 10, y1: 20, x2: 30, y2: 40 }, - createMockExecutor({ success: true }), + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + return swipeLogic( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + createMockExecutor({ success: true, output: runtimeSnapshotOutput }), axeHelpers, - ), - expectedText: 'Swipe from (10, 20) to (30, 40) simulated successfully.', + ); + }, + expectedText: 'Swipe up within elementRef e1 simulated successfully.', }, { name: 'tap', - run: () => - tapLogic( - { simulatorId, x: 100, y: 200 }, - createMockExecutor({ success: true }), + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return tapLogic( + { simulatorId, elementRef: 'e1' }, + createMockExecutor({ success: true, output: runtimeSnapshotOutput }), axeHelpers, - ), - expectedText: 'Tap at (100, 200) simulated successfully.', + ); + }, + expectedText: 'Tap on elementRef e1 simulated successfully.', }, { name: 'touch', - run: () => - touchLogic( - { simulatorId, x: 100, y: 200, down: true }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return touchLogic( + { simulatorId, elementRef: 'e1', down: true }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Touch event (touch down) at (100, 200) executed successfully.', + ); + }, + expectedText: 'Touch event (touch down) on elementRef e1 executed successfully.', }, { name: 'type_text', - run: () => - type_textLogic( - { simulatorId, text: 'Hello' }, - createMockExecutor({ success: true }), + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + return type_textLogic( + { simulatorId, elementRef: 'e1', text: 'Hello' }, + createMockExecutor({ success: true, output: runtimeSnapshotOutput }), axeHelpers, - ), - expectedText: 'Text typing simulated successfully.', + ); + }, + expectedText: 'Text typed into elementRef e1 (5 characters) successfully.', }, ]; for (const testCase of cases) { const { result } = await runToolLogic(testCase.run); expect(result.events, `${testCase.name} should not emit progress events`).toEqual([]); + expect(result.isError()).toBe(false); expect(result.text()).toContain(testCase.expectedText); } }); @@ -132,7 +155,19 @@ describe('ui automation non-streaming tools', () => { const { result } = await runToolLogic(() => screenshotLogic( { simulatorId, returnFormat: 'path' }, - createMockExecutor({ success: true, output: 'Screenshot saved' }), + createCommandMatchingMockExecutor({ + 'xcrun simctl list devices -j': { + output: JSON.stringify({ + devices: { + 'iOS 26.0': [{ udid: simulatorId, name: 'iPhone 17', state: 'Booted' }], + }, + }), + }, + 'xcrun simctl io': { output: 'Screenshot saved' }, + 'swift -e': { output: '368,800' }, + 'sips -Z': { output: 'optimized' }, + 'sips -g pixelWidth': { output: 'pixelWidth: 368\npixelHeight: 800' }, + }), createMockFileSystemExecutor(), { tmpdir: () => '/tmp', join: (...paths) => paths.join('/') }, { v4: () => 'test-uuid' }, @@ -143,8 +178,9 @@ describe('ui automation non-streaming tools', () => { expect(result.text()).toContain('Screenshot captured'); }); - it('returns snapshot_ui text from structured output without progress events', async () => { - const { result } = await runToolLogic(() => + it('returns snapshot_ui structured output without emitting progress events', async () => { + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic( { simulatorId, @@ -159,8 +195,17 @@ describe('ui automation non-streaming tools', () => { ); expect(result.events).toEqual([]); - expect(result.text()).toContain('Accessibility hierarchy retrieved successfully.'); - expect(result.text()).toContain('Accessibility Hierarchy'); - expect(result.text()).toContain('"type" : "Button"'); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + elements: [expect.objectContaining({ ref: 'e1' })], + }), + ); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts new file mode 100644 index 000000000..6980f93a1 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts @@ -0,0 +1,732 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import type { RuntimeSnapshotV1 } from '../../../../types/ui-snapshot.ts'; +import { createRuntimeSnapshotNextSteps } from '../shared/runtime-next-steps.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { createNode, recordSnapshot, simulatorId } from './ui-action-test-helpers.ts'; + +function currentRuntimeSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!.payload; +} + +function createScrollView(overrides: Partial = {}): AccessibilityNode { + return createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 390, height: 844 }, + AXIdentifier: 'scroll-view', + ...overrides, + }); +} + +function nestNode(node: AccessibilityNode, depth: number): AccessibilityNode { + let current = node; + for (let index = 0; index < depth; index += 1) { + current = createNode({ + type: 'Group', + role: 'AXGroup', + AXIdentifier: `container.${index}`, + frame: current.frame, + children: [current], + }); + } + return current; +} + +describe('runtime snapshot next steps', () => { + beforeEach(() => { + __resetRuntimeSnapshotStoreForTests(); + }); + + it('prefers tap and scroll examples from the active foreground container', () => { + recordSnapshot([ + createScrollView({ + AXIdentifier: 'weather.backgroundList', + children: [ + createNode({ + AXLabel: 'Background, Details', + AXIdentifier: 'weather.backgroundCard', + frame: { x: 20, y: 120, width: 350, height: 80 }, + }), + ], + }), + createScrollView({ + AXIdentifier: 'weather.settingsSheet', + frame: { x: 0, y: 420, width: 390, height: 424 }, + children: [ + createNode({ AXLabel: 'Close', frame: { x: 310, y: 430, width: 60, height: 40 } }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 480, width: 350, height: 40 }, + }), + createNode({ + AXLabel: 'London, England', + AXIdentifier: 'weather.locationCard', + frame: { x: 20, y: 540, width: 350, height: 80 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const foregroundScrollRef = snapshot.elements.find( + (element) => element.identifier === 'weather.settingsSheet', + )?.ref; + const foregroundCardRef = snapshot.elements.find( + (element) => element.identifier === 'weather.locationCard', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: foregroundCardRef }, + }); + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: foregroundScrollRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + + it('prioritizes real scrolling over low-information chrome taps', () => { + const snapshot: RuntimeSnapshotV1 = { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId, + screenHash: 'scrollable-main', + seq: 1, + capturedAtMs: 0, + expiresAtMs: 1, + actions: [], + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'Location', + identifier: 'example.locationButton', + frame: { x: 20, y: 70, width: 120, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + identifier: 'example.settingsButton', + frame: { x: 320, y: 70, width: 44, height: 44 }, + actions: ['tap'], + }, + ], + }; + const scrollRef = 'e1'; + const locationRef = 'e2'; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: scrollRef, + direction: 'up', + distance: 0.5, + }, + }); + expect(steps).toContainEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: locationRef }, + }); + }); + + it('prefers an identified sheet list over background scroll views in flattened sheets', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 110, width: 390, height: 210 }, + children: [ + createNode({ AXLabel: 'Now', frame: { x: 20, y: 130, width: 80, height: 40 } }), + ], + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.locationsSheet', + frame: { x: 0, y: 360, width: 390, height: 484 }, + children: [ + createNode({ AXLabel: 'Close', frame: { x: 320, y: 370, width: 44, height: 44 } }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'London', + frame: { x: 20, y: 430, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom', + AXValue: 'saved', + frame: { x: 20, y: 500, width: 350, height: 88 }, + }), + ], + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const sheetListRef = snapshot.elements.find( + (element) => element.identifier === 'example.locationsSheet', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: sheetListRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + + it('prefers a foreground sheet list over application root sheet scrolling', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 157, y: 300, width: 76, height: 8 }, + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.sheetList', + frame: { x: 0, y: 320, width: 390, height: 524 }, + children: [ + createNode({ AXLabel: 'Close', frame: { x: 320, y: 340, width: 44, height: 44 } }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 390, width: 300, height: 44 }, + }), + ], + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const rootRef = snapshot.elements.find((element) => element.role === 'application')?.ref; + const listRef = snapshot.elements.find( + (element) => element.identifier === 'example.sheetList', + )?.ref; + + expect(rootRef).toBeDefined(); + expect(listRef).toBeDefined(); + expect(snapshot.elements.find((element) => element.ref === rootRef)?.actions).not.toContain( + 'swipeWithin', + ); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: listRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + + it('does not suggest synthetic sheet scrolling when no real sheet scroll target exists', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.backgroundScroll', + frame: { x: 0, y: 80, width: 402, height: 260 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Locations', + AXIdentifier: 'example.locationsSheet', + frame: { x: 148, y: 104, width: 106, height: 32 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Edit', + AXIdentifier: 'example.locationsSheet', + frame: { x: 24, y: 96, width: 60, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: undefined, + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + AXLabel: 'Use current location', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 218, width: 362, height: 54 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'MY LOCATIONS ¡ 7', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 292, width: 160, height: 20 }, + }), + createNode({ + AXLabel: 'San Francisco, 1:24 PM ¡ Cloudy', + frame: { x: 20, y: 326, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 415, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Aspen, 2:24 PM ¡ Light Snow', + frame: { x: 20, y: 504, width: 362, height: 72 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const rootRef = snapshot.elements.find((element) => element.role === 'application')?.ref; + + expect(rootRef).toBeDefined(); + expect( + snapshot.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.elements.find((element) => element.ref === rootRef)?.actions).not.toContain( + 'swipeWithin', + ); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps.some((step) => step.tool === 'swipe')).toBe(false); + }); + + it('suggests expanding a collapsed foreground sheet via its real grabber', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Half screen', + frame: { x: 182, y: 446, width: 76, height: 24 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.sheet', + frame: { x: 374, y: 478, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search', + AXIdentifier: 'example.sheet', + frame: { x: 20, y: 518, width: 400, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Use current location', + AXIdentifier: 'example.sheet', + frame: { x: 20, y: 580, width: 400, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'First visible row', + frame: { x: 20, y: 650, width: 400, height: 72 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const grabberRef = snapshot.elements.find((element) => element.label === 'Sheet Grabber')?.ref; + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Expand foreground sheet', + tool: 'drag', + params: { + simulatorId, + elementRef: grabberRef, + direction: 'up', + distance: 0.35, + duration: 0.8, + steps: 80, + postDelay: 0.8, + }, + }); + expect(steps.some((step) => step.tool === 'swipe')).toBe(false); + expect(steps.some((step) => step.tool === 'batch')).toBe(false); + }); + + it('prefers composite dragging real foreground sheet scroll content after expansion', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Expanded', + frame: { x: 182, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 255, width: 400, height: 637 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Edit', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 374, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Use current location', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 240, width: 400, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'San Francisco, 1:24 PM ¡ Mostly Sunny', + frame: { x: 20, y: 326, width: 400, height: 72 }, + }), + ], + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const sheetScrollRef = snapshot.elements.find( + (element) => element.identifier === 'example.locationsSheet', + )?.ref; + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Drag visible sheet content', + tool: 'drag', + params: { + simulatorId, + elementRef: sheetScrollRef, + direction: 'up', + distance: 0.7, + duration: 0.8, + steps: 80, + postDelay: 0.5, + }, + }); + }); + + it('prefers a vertical list over a small horizontal scroll view for upward scroll guidance', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.horizontalScroller', + frame: { x: 20, y: 100, width: 350, height: 120 }, + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.verticalList', + frame: { x: 0, y: 240, width: 390, height: 520 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const verticalListRef = snapshot.elements.find( + (element) => element.identifier === 'example.verticalList', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: verticalListRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + + it('keeps unselected tabs available as screen-changing tap suggestions', () => { + recordSnapshot([ + createNode({ + type: 'Tab', + role: 'AXTab', + AXLabel: 'Current', + AXValue: 'selected', + AXSelected: true, + }), + createNode({ + type: 'Tab', + role: 'AXTab', + AXLabel: 'Search', + AXValue: '0', + AXSelected: false, + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const searchTabRef = snapshot.elements.find((element) => element.label === 'Search')?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: searchTabRef }, + }); + }); + + it('promotes visible switches as a batch next step', () => { + recordSnapshot([ + createScrollView({ + AXIdentifier: 'settings.sheet', + children: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Atmospheric animations', + AXValue: '1', + }), + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Severe weather alerts', + AXValue: '1', + }), + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const switchRefs = snapshot.elements + .filter((element) => element.role === 'switch') + .map((element) => element.ref); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Batch visible switch toggles', + tool: 'batch', + params: { + simulatorId, + steps: switchRefs.slice(0, 2).map((elementRef) => ({ + action: 'tap', + elementRef, + })), + }, + }); + expect(steps.find((step) => step.tool === 'tap')).toBeUndefined(); + }); + + it('uses hierarchy depth only as a foreground-root tie breaker', () => { + recordSnapshot([ + nestNode( + createScrollView({ + AXIdentifier: 'deep.stateControls', + frame: { x: 0, y: 0, width: 390, height: 80 }, + children: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Nested switch', + AXValue: '0', + }), + ], + }), + 40, + ), + createScrollView({ + AXIdentifier: 'shallow.searchPanel', + frame: { x: 0, y: 100, width: 390, height: 500 }, + children: [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 130, width: 350, height: 40 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const shallowSearchRef = snapshot.elements.find( + (element) => element.identifier === 'shallow.searchPanel', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: shallowSearchRef, + direction: 'up', + distance: 0.5, + }, + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts new file mode 100644 index 000000000..cb0561d48 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -0,0 +1,1115 @@ +import { describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import { + createRuntimeSnapshotRecord, + extractAccessibilityHierarchy, + getPrimaryRuntimeElement, + parseRuntimeSnapshotResponse, + getRuntimeElementActivationPoint, + getRuntimeElementDirectionalDragPoints, + getRuntimeElementSwipePoints, + RuntimeSnapshotParseError, +} from '../shared/runtime-snapshot.ts'; + +const simulatorId = '12345678-1234-4234-8234-123456789012'; + +function createNode(overrides: Partial = {}): AccessibilityNode { + return { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + ...overrides, + }; +} + +describe('runtime snapshot normalization', () => { + it('flattens AX hierarchy into RuntimeSnapshotV1 public elements', () => { + const child = createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Email', + AXValue: 'user@example.com', + AXUniqueId: 'email-field', + AXSelected: true, + frame: { x: 20, y: 80, width: 220, height: 44 }, + }); + const root = createNode({ + type: 'Window', + role: 'AXWindow', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [child], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + }), + ); + expect(snapshot.payload.elements.map((element) => element.ref)).toEqual(['e1', 'e2']); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + ref: 'e2', + role: 'text-field', + label: 'Email', + value: 'user@example.com', + identifier: 'email-field', + frame: { x: 20, y: 80, width: 220, height: 44 }, + state: { enabled: true, selected: true, visible: true }, + actions: expect.arrayContaining(['tap', 'typeText', 'longPress', 'touch']), + }), + ); + expect(snapshot.payload.screenHash).toMatch(/^[a-z0-9]+$/); + expect(snapshot.payload.seq).toBe(0); + expect(snapshot.payload.actions).toContainEqual({ + action: 'typeText', + elementRef: 'e2', + label: 'Email', + }); + expect(snapshot.elements[1]?.rawNode).toBe(child); + expect('rawNode' in snapshot.payload.elements[1]!).toBe(false); + expect(snapshot.elementsByRef.get('e2')?.rawNode).toBe(child); + }); + + it('reads AXIdentifier as a stable runtime element identifier', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXIdentifier: 'weather.detailsButton' })], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ identifier: 'weather.detailsButton' }), + ); + }); + + it('classifies context menu items as menu controls instead of text', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'MenuItem', + role: 'AXMenuItem', + role_description: 'context menu item', + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'menu', + actions: expect.arrayContaining(['longPress', 'touch']), + }), + ); + expect(snapshot.payload.elements[0]?.actions).not.toContain('tap'); + }); + + it('derives deterministic screen hashes from normalized UI content', () => { + const uiHierarchy = [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Cancel' })]; + + const first = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy, nowMs: 1_000 }); + const second = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy, nowMs: 2_000 }); + const changed = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Done' })], + nowMs: 1_000, + }); + + expect(first.payload.screenHash).toBe(second.payload.screenHash); + expect(first.payload.screenHash).not.toBe(changed.payload.screenHash); + }); + + it('parses AXe describe-ui response envelopes', () => { + const responseText = JSON.stringify({ + elements: [createNode({ AXLabel: 'Continue' })], + }); + + const hierarchy = extractAccessibilityHierarchy(responseText); + + expect(hierarchy).toHaveLength(1); + expect(hierarchy[0]?.AXLabel).toBe('Continue'); + }); + + it('throws typed parse errors for malformed describe-ui responses', () => { + expect(() => extractAccessibilityHierarchy('not json')).toThrow(RuntimeSnapshotParseError); + expect(() => extractAccessibilityHierarchy(JSON.stringify({ value: [] }))).toThrow( + RuntimeSnapshotParseError, + ); + expect(() => extractAccessibilityHierarchy(JSON.stringify({}))).toThrow( + RuntimeSnapshotParseError, + ); + }); + + it('allows empty describe-ui arrays only when the caller opts in', () => { + expect(extractAccessibilityHierarchy(JSON.stringify([]))).toEqual([]); + expect(extractAccessibilityHierarchy(JSON.stringify({ elements: [] }))).toEqual([]); + expect(() => parseRuntimeSnapshotResponse({ simulatorId, responseText: '[]' })).toThrow( + RuntimeSnapshotParseError, + ); + + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId, + responseText: '{"elements": []}', + allowEmpty: true, + }); + + expect(snapshot.payload.elements).toEqual([]); + expect(snapshot.payload.actions).toEqual([]); + }); + + it('selects the primary element for semantic next steps', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXLabel: 'Continue' })], + nowMs: 1_000, + }); + + expect(getPrimaryRuntimeElement(snapshot.payload, 'tap')?.label).toBe('Continue'); + expect(getPrimaryRuntimeElement(snapshot.payload, 'typeText')).toBe( + snapshot.payload.elements[0], + ); + }); + + it('infers swipeWithin on top-level application roots with semantic vertical overflow', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Details available below', + frame: { x: 40, y: 920, width: 220, height: 24 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'application', + label: 'Example', + actions: ['swipeWithin'], + }), + ); + expect(snapshot.payload.actions).toContainEqual({ + action: 'swipeWithin', + elementRef: 'e1', + label: 'Example', + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toEqual({ + ok: true, + from: { x: 195, y: 717 }, + to: { x: 195, y: 127 }, + }); + }); + + it('infers swipeWithin on top-level windows with semantic vertical overflow', () => { + const root = createNode({ + type: 'Window', + role: 'AXWindow', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'More content below', + frame: { x: 140, y: 920, width: 160, height: 24 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'window', + label: 'Example', + actions: ['swipeWithin'], + }), + ); + }); + + it('does not infer swipeWithin when descendants fit inside the container', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Visible label', + frame: { x: 20, y: 200, width: 120, height: 20 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).toEqual([]); + }); + + it('does not infer root viewport swipeWithin from anonymous geometry-only overflow', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXIdentifier: undefined, + frame: { x: 20, y: 920, width: 240, height: 80 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).toEqual([]); + }); + + it('does not infer root viewport swipeWithin when a better descendant scroll target exists', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.contentPanel', + frame: { x: 0, y: 100, width: 390, height: 600 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Additional details below', + frame: { x: 40, y: 920, width: 220, height: 24 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).not.toContain('swipeWithin'); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'app.contentPanel', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('does not synthesize a foreground sheet scroll region without a real scroll descendant', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Expanded', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + frame: { x: 36, y: 603, width: 330, height: 28 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'application', + label: 'Example', + actions: [], + }), + ); + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.payload.actions.some((action) => action.action === 'swipeWithin')).toBe(false); + }); + + it('does not synthesize a locations sheet scroll region over tappable rows', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.backgroundScroll', + frame: { x: 0, y: 80, width: 402, height: 260 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Locations', + AXIdentifier: 'example.locationsSheet', + frame: { x: 148, y: 104, width: 106, height: 32 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Use current location', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 218, width: 362, height: 54 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'MY LOCATIONS ¡ 7', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 292, width: 160, height: 20 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'San Francisco, 1:24 PM ¡ Cloudy', + frame: { x: 20, y: 326, width: 362, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 415, width: 362, height: 72 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Body note', + frame: { x: 20, y: 600, width: 362, height: 44 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.payload.elements[0]?.actions).not.toContain('swipeWithin'); + expect(snapshot.payload.elements.find((element) => element.role === 'scroll-view')).toEqual( + expect.objectContaining({ + identifier: 'example.backgroundScroll', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('does not advertise synthetic scrolling for live-shaped locations sheets', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 182, y: 360, width: 76, height: 25 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Locations', + AXIdentifier: 'example.locationsSheet', + frame: { x: 168, y: 408, width: 106, height: 32 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 374, y: 400, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 450, width: 400, height: 44 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'MY LOCATIONS ¡ 8', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 566, width: 160, height: 20 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'MY LOCATION, San Francisco, 1:24 PM ¡ Mostly Sunny', + frame: { x: 20, y: 596, width: 400, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 686, width: 400, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Aspen, 2:24 PM ¡ Light Snow', + frame: { x: 20, y: 776, width: 400, height: 72 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.payload.elements[0]?.actions).not.toContain('swipeWithin'); + expect(snapshot.payload.actions.some((action) => action.action === 'swipeWithin')).toBe(false); + }); + + it('does not synthesize sheet host swipe frames when the grabber is near the bottom', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Expanded', + frame: { x: 157, y: 620, width: 76, height: 5 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).toEqual([]); + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + }); + + it('removes actions from elements outside the viewport', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + frame: { x: 40, y: 890, width: 300, height: 30 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'switch', + label: 'Reduce transparency', + value: '0', + state: expect.objectContaining({ visible: false }), + actions: [], + }), + ); + }); + + it('does not re-add swipeWithin to offscreen containers', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Offscreen panel', + frame: { x: 0, y: 900, width: 300, height: 200 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflowing child', + frame: { x: 10, y: 1160, width: 100, height: 20 }, + }), + ], + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Offscreen panel', + state: expect.objectContaining({ visible: false }), + actions: [], + }), + ); + expect(snapshot.payload.actions).not.toContainEqual({ + action: 'swipeWithin', + elementRef: 'e2', + label: 'Offscreen panel', + }); + }); + + it('removes point-based actions from clipped elements with offscreen activation points', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Lisbon', + frame: { x: 20, y: 839.33, width: 362, height: 89 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'button', + label: 'Lisbon', + state: expect.objectContaining({ visible: true }), + actions: [], + }), + ); + }); + + it('uses an upper activation point for bottom-clipped visible targets', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + frame: { x: 324.87, y: 786.62, width: 49.93, height: 85.46 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]?.actions).toContain('tap'); + expect(getRuntimeElementActivationPoint(snapshot.elements[1]!)).toEqual({ x: 350, y: 795 }); + }); + + it('does not mark unlabeled custom-action internals as tap targets', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXUniqueId: undefined, + identifier: undefined, + frame: { x: 30, y: 450, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + createNode({ + type: 'Other', + role: 'AXGroup', + AXUniqueId: 'label-view', + frame: { x: 30, y: 500, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + createNode({ + type: 'Other', + role: 'AXGroup', + AXUniqueId: 'named-custom-target', + frame: { x: 30, y: 550, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['tap']), + }), + ); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'other', + identifier: 'label-view', + actions: expect.not.arrayContaining(['tap']), + }), + ); + expect(snapshot.payload.elements[2]).toEqual( + expect.objectContaining({ + role: 'other', + identifier: 'named-custom-target', + actions: expect.arrayContaining(['tap']), + }), + ); + }); + + it('does not mark standalone other elements as swipeable', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Suggested', + frame: { x: 30, y: 450, width: 80, height: 32 }, + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Suggested', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('does not infer swipeWithin on small other wrappers with overflowing descendants', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + frame: { x: 0, y: 0, width: 80, height: 80 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 100, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('infers swipeWithin on other containers with overflowing descendants', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Scrollable panel', + frame: { x: 0, y: 0, width: 200, height: 200 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 260, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Scrollable panel', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('classifies generic containers with scroll-view identifiers as scroll views', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXIdentifier: 'app.mainScrollView', + AXLabel: undefined, + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Visible child', + frame: { x: 20, y: 120, width: 120, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'app.mainScrollView', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('keeps an unlabeled other swipe target as fallback when no better scroll ref exists', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXUniqueId: undefined, + frame: { x: 0, y: 0, width: 200, height: 200 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 260, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('removes unlabeled other swipe targets when better scroll refs exist', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXUniqueId: undefined, + frame: { x: 0, y: 0, width: 300, height: 300 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Generic overflow', + frame: { x: 10, y: 360, width: 120, height: 20 }, + }), + ], + }), + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'weather.locationsSheet', + frame: { x: 0, y: 400, width: 390, height: 300 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + expect(snapshot.payload.elements[3]).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'weather.locationsSheet', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('derives trailing activation points for wide switch rows', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementActivationPoint(snapshot.elements[0]!)).toEqual({ x: 307, y: 903 }); + }); + + it('uses normalized distance to shorten swipe strokes within safe endpoints', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up', 0.5)).toEqual({ + ok: true, + from: { x: 100, y: 270 }, + to: { x: 100, y: 130 }, + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up', 0.8)).toEqual({ + ok: true, + from: { x: 100, y: 312 }, + to: { x: 100, y: 88 }, + }); + }); + + it('uses viewport-relative directional drag points for small chrome targets', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 182, y: 446, width: 76, height: 24 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect( + getRuntimeElementDirectionalDragPoints( + snapshot.elements[1]!, + 'up', + 0.35, + snapshot.elements[0]!.publicElement.frame, + ), + ).toEqual({ + ok: true, + from: { x: 220, y: 458 }, + to: { x: 220, y: 123 }, + }); + }); + + it('keeps full-screen swipe points away from unsafe viewport edges', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'down')).toEqual({ + ok: true, + from: { x: 201, y: 131 }, + to: { x: 201, y: 743 }, + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'left')).toEqual({ + ok: true, + from: { x: 342, y: 524 }, + to: { x: 60, y: 524 }, + }); + }); + + it('rejects unsafe swipe point derivation', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 1, height: 1 }, + }), + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 2, height: 100 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toMatchObject({ + ok: false, + message: expect.stringContaining('too small'), + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[1]!, 'right')).toMatchObject({ + ok: false, + message: expect.stringContaining('non-degenerate'), + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts b/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts index c7e62669f..c3017dcc7 100644 --- a/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts @@ -1,7 +1,6 @@ import { describe, it, expect, beforeEach } from 'vitest'; import * as z from 'zod'; import { - createMockExecutor, createMockFileSystemExecutor, mockProcess, } from '../../../../test-utils/mock-executors.ts'; @@ -16,6 +15,43 @@ import { } from '../screenshot.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +function isDeviceListCommand(command: string[]): boolean { + return command.join(' ') === 'xcrun simctl list devices -j'; +} + +function bootedDeviceListJson(simulatorId: string): string { + return JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { + udid: simulatorId, + name: 'iPhone 15 Pro', + state: 'Booted', + }, + ], + }, + }); +} + +function createBootedScreenshotMockExecutor(simulatorId: string) { + return async (command: string[]) => { + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson(simulatorId), + error: undefined, + process: mockProcess, + }; + } + return { + success: true, + output: 'Screenshot saved', + error: undefined, + process: mockProcess, + }; + }; +} + describe('Screenshot Plugin', () => { beforeEach(() => { sessionStore.clear(); @@ -68,6 +104,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('12345678-1234-4234-8234-123456789012'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -93,8 +137,7 @@ describe('Screenshot Plugin', () => { ), ); - // Should capture the screenshot command first - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -108,6 +151,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -133,7 +184,7 @@ describe('Screenshot Plugin', () => { ), ); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -147,6 +198,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('98765432-1098-7654-3210-987654321098'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -175,7 +234,7 @@ describe('Screenshot Plugin', () => { ), ); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -189,6 +248,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('12345678-1234-4234-8234-123456789012'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -215,24 +282,22 @@ describe('Screenshot Plugin', () => { ); // Verify the command structure but not the exact UUID since it's generated - expect(capturedCommands[0].slice(0, 5)).toEqual([ + expect(capturedCommands[1].slice(0, 5)).toEqual([ 'xcrun', 'simctl', 'io', '12345678-1234-4234-8234-123456789012', 'screenshot', ]); - expect(capturedCommands[0][5]).toMatch(/^\/tmp\/screenshot_[a-f0-9-]+\.png$/); + expect(capturedCommands[1][5]).toMatch(/^\/tmp\/screenshot_[a-f0-9-]+\.png$/); }); }); describe('Handler Behavior (Complete Literal Returns)', () => { it('should handle file reading errors', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Screenshot saved', - error: undefined, - }); + const mockExecutor = createBootedScreenshotMockExecutor( + '12345678-1234-4234-8234-123456789012', + ); const mockFileSystemExecutor = createMockFileSystemExecutor({ readFile: async () => { @@ -260,11 +325,9 @@ describe('Screenshot Plugin', () => { it('should handle file cleanup errors gracefully', async () => { const mockImageBuffer = Buffer.from('fake-image-data', 'utf8'); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Screenshot saved', - error: undefined, - }); + const mockExecutor = createBootedScreenshotMockExecutor( + '12345678-1234-4234-8234-123456789012', + ); const mockFileSystemExecutor = createMockFileSystemExecutor({ readFile: async () => mockImageBuffer.toString('utf8'), @@ -366,6 +429,45 @@ describe('Screenshot Plugin', () => { ).toBe(true); }); + it('fails before screenshot capture when the simulator is shutdown', async () => { + const capturedCommands: string[][] = []; + const mockExecutor = async (command: string[]) => { + capturedCommands.push(command); + return { + success: true, + output: JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { + udid: '12345678-1234-4234-8234-123456789012', + name: 'iPhone 15 Pro', + state: 'Shutdown', + }, + ], + }, + }), + error: undefined, + process: mockProcess, + }; + }; + + const result = await runLogic(() => + screenshotLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + }, + mockExecutor, + createMockFileSystemExecutor(), + ), + ); + + expect(result.isError).toBe(true); + const text = allText(result); + expect(text).toContain('Failed to capture screenshot.'); + expect(text).toContain('is Shutdown'); + expect(capturedCommands).toEqual([['xcrun', 'simctl', 'list', 'devices', '-j']]); + }); + it('should handle SystemError from command execution', async () => { const mockExecutor = async () => { throw new SystemError('System error occurred'); @@ -614,20 +716,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -689,20 +791,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -756,20 +858,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -819,20 +921,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; diff --git a/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts b/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts new file mode 100644 index 000000000..b7795f53f --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts @@ -0,0 +1,177 @@ +import { describe, expect, it } from 'vitest'; +import { mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { + createSemanticTapBatchSteps, + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, + isRecoverableAxeSelectorError, +} from '../shared/semantic-tap.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, + simulatorId, +} from './ui-action-test-helpers.ts'; + +function createElements(nodes = [createNode()]) { + return createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: 1_000 }).elements; +} + +describe('semantic tap helpers', () => { + it('recognizes recoverable AXe selector failures', () => { + expect( + isRecoverableAxeSelectorError( + new Error('Multiple (2) accessibility elements matched selector'), + ), + ).toBe(true); + expect( + isRecoverableAxeSelectorError({ + axeOutput: 'No accessibility element matched --label Continue', + }), + ).toBe(true); + expect(isRecoverableAxeSelectorError(new Error('Simulator is not booted'))).toBe(false); + }); + + it('uses a unique semantic selector before coordinates', () => { + const [element] = createElements([ + createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }), + ]); + + const command = createSemanticTapCommand(element!, 'e1', ['--duration', '0.1'], [element!]); + + expect(command.selectorArgs).toEqual([ + 'tap', + '--id', + 'continue.button', + '--element-type', + 'Button', + '--duration', + '0.1', + ]); + expect(command.primaryArgs).toBe(command.selectorArgs); + expect(command.usedSelector).toBe(true); + }); + + it('falls back to coordinates when semantic selectors are duplicated', () => { + const elements = createElements([ + createNode({ AXUniqueId: 'duplicate.button', AXLabel: 'Duplicate' }), + createNode({ + AXUniqueId: 'duplicate.button', + AXLabel: 'Duplicate', + frame: { x: 20, y: 80, width: 100, height: 40 }, + }), + ]); + + const command = createSemanticTapCommand(elements[0]!, 'e1', [], elements); + + expect(command.selectorArgs).toBeNull(); + expect(command.primaryArgs).toEqual(['tap', '-x', '60', '-y', '40']); + expect(command.usedSelector).toBe(false); + }); + + it('represents switch taps as down/up touch batch steps', () => { + const [element] = createElements([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Alerts', + frame: { x: 10, y: 20, width: 200, height: 40 }, + }), + ]); + + const command = createSemanticTapCommand(element!, 'e1'); + + expect(command.selectorArgs).toBeNull(); + expect(command.coordinateArgs).toEqual(['touch', '-x', '158', '-y', '40', '--down', '--up']); + expect(createSemanticTapBatchSteps(command)).toEqual([ + 'touch -x 158 -y 40 --down', + 'touch -x 158 -y 40 --up', + ]); + }); + + it('uses the executed command name for switch touch commands', async () => { + const [element] = createElements([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Alerts', + frame: { x: 10, y: 20, width: 200, height: 40 }, + }), + ]); + const command = createSemanticTapCommand(element!, 'e1'); + const { calls, executor } = createSequencedExecutor([{ success: true, output: 'ok' }]); + + await executeSemanticTapWithAmbiguityFallback({ + command, + simulatorId, + executor, + axeHelpers: createMockAxeHelpers(), + }); + + expect(calls[0]).toEqual( + expect.objectContaining({ + command: [ + '/mocked/axe/path', + 'touch', + '-x', + '158', + '-y', + '40', + '--down', + '--up', + '--udid', + simulatorId, + ], + logPrefix: '[AXe]: touch', + }), + ); + }); + + it('retries recoverable selector failures with coordinates', async () => { + const [element] = createElements([ + createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }), + ]); + const command = createSemanticTapCommand(element!, 'e1', [], [element!]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Multiple (2) accessibility elements matched selector' }, + { success: true, output: 'ok' }, + ]); + + await executeSemanticTapWithAmbiguityFallback({ + command, + simulatorId, + executor, + axeHelpers: createMockAxeHelpers(), + }); + + expect(calls.map((call) => call.command.slice(1, -2))).toEqual([ + ['tap', '--id', 'continue.button', '--element-type', 'Button'], + ['tap', '-x', '60', '-y', '40'], + ]); + expect(calls.map((call) => call.logPrefix)).toEqual(['[AXe]: tap', '[AXe]: tap']); + }); + + it('does not retry unrecoverable selector failures', async () => { + const [element] = createElements([ + createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }), + ]); + const command = createSemanticTapCommand(element!, 'e1', [], [element!]); + const calls: string[][] = []; + const executor: CommandExecutor = async (commandArgs) => { + calls.push(commandArgs); + return { success: false, output: '', error: 'Simulator is not booted', process: mockProcess }; + }; + + await expect( + executeSemanticTapWithAmbiguityFallback({ + command, + simulatorId, + executor, + axeHelpers: createMockAxeHelpers(), + }), + ).rejects.toThrow("axe command 'tap' failed."); + expect(calls).toHaveLength(1); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts new file mode 100644 index 000000000..c03ed6219 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts @@ -0,0 +1,142 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + clearRuntimeSnapshot, + getRuntimeSnapshot, + getRuntimeSnapshotLookup, + getSnapshotUiWarning, + recordRuntimeSnapshot, + resolveElementRef, +} from '../shared/snapshot-ui-state.ts'; + +const simulatorId = '12345678-1234-4234-8234-123456789012'; + +const node: AccessibilityNode = { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + AXLabel: 'Continue', +}; + +describe('runtime snapshot store', () => { + beforeEach(() => { + __resetRuntimeSnapshotStoreForTests(); + }); + + it('stores runtime snapshots by simulator id', () => { + const nowMs = Date.now(); + const snapshot = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs }); + + recordRuntimeSnapshot(snapshot); + + expect(getRuntimeSnapshot(simulatorId, nowMs + 1_000)).toBe(snapshot); + expect(getRuntimeSnapshotLookup(simulatorId, nowMs + 1_000)).toEqual({ + status: 'available', + snapshot, + snapshotAgeMs: 1_000, + }); + expect(getSnapshotUiWarning(simulatorId)).toBeNull(); + }); + + it('assigns monotonic snapshot sequences when recording snapshots', () => { + const first = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs: 1_000 }); + const second = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs: 2_000 }); + + recordRuntimeSnapshot(first); + clearRuntimeSnapshot(simulatorId); + recordRuntimeSnapshot(second); + + expect(first.seq).toBe(1); + expect(first.payload.seq).toBe(1); + expect(second.seq).toBe(2); + expect(second.payload.seq).toBe(2); + expect(getRuntimeSnapshot(simulatorId, 2_000)).toBe(second); + }); + + it('expires stale snapshots and clears them from the store', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + expect(getRuntimeSnapshotLookup(simulatorId, 62_000)).toEqual({ + status: 'expired', + snapshot: null, + snapshotAgeMs: 61_000, + }); + expect(getRuntimeSnapshot(simulatorId, 62_000)).toBeNull(); + }); + + it('clears snapshots explicitly', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + clearRuntimeSnapshot(simulatorId); + + expect(getRuntimeSnapshotLookup(simulatorId)).toEqual({ status: 'missing', snapshot: null }); + }); + + it('resolves actionable element refs', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + expect(resolveElementRef(simulatorId, 'e1', 'tap', 2_000)).toEqual({ + ok: true, + snapshot, + element: snapshot.elements[0], + snapshotAgeMs: 1_000, + }); + }); + + it('returns typed recoverable errors for missing, expired, not-found, and not-actionable refs', () => { + expect(resolveElementRef(simulatorId, 'e1', 'tap', 1_000)).toEqual({ + ok: false, + error: expect.objectContaining({ code: 'SNAPSHOT_MISSING' }), + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + expect(resolveElementRef(simulatorId, 'e1', 'tap', 62_000)).toEqual({ + ok: false, + error: expect.objectContaining({ code: 'SNAPSHOT_EXPIRED', snapshotAgeMs: 61_000 }), + }); + + recordRuntimeSnapshot(snapshot); + expect(resolveElementRef(simulatorId, 'e404', 'tap', 2_000)).toEqual({ + ok: false, + error: expect.objectContaining({ + code: 'ELEMENT_REF_NOT_FOUND', + elementRef: 'e404', + snapshotAgeMs: 1_000, + }), + }); + + expect(resolveElementRef(simulatorId, 'e1', 'typeText', 2_000)).toEqual({ + ok: false, + error: expect.objectContaining({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + snapshotAgeMs: 1_000, + }), + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index 42a816db3..ff17cf752 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -2,9 +2,40 @@ import { describe, it, expect } from 'vitest'; import * as z from 'zod'; import { createMockExecutor, createNoopExecutor } from '../../../../test-utils/mock-executors.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import type { DebuggerBackend } from '../../../../utils/debugger/backends/DebuggerBackend.ts'; +import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; import { schema, handler, snapshot_uiLogic } from '../snapshot_ui.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +import { + allText, + callHandler, + createMockToolHandlerContext, + runLogic, +} from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; + +async function createStoppedDebuggerManager(simulatorId: string): Promise { + const backend: DebuggerBackend = { + kind: 'lldb-cli', + attach: async () => {}, + detach: async () => {}, + runCommand: async () => '', + resume: async () => {}, + addBreakpoint: async (spec) => ({ id: 1, spec, rawOutput: '' }), + removeBreakpoint: async () => '', + getStack: async () => '', + getVariables: async () => '', + getExecutionState: async () => ({ status: 'stopped', reason: 'breakpoint' }), + dispose: async () => {}, + }; + const manager = new DebuggerManager({ backendFactory: async () => backend }); + const session = await manager.createSession({ simulatorId, pid: 12345 }); + manager.setCurrentSession(session.id); + return manager; +} describe('Snapshot UI Plugin', () => { describe('Export Field Validation (Literal)', () => { @@ -16,6 +47,7 @@ describe('Snapshot UI Plugin', () => { const schemaObject = z.object(schema); expect(schemaObject.safeParse({}).success).toBe(true); + expect(schemaObject.safeParse({ sinceScreenHash: 'screen-hash' }).success).toBe(true); const withSimId = schemaObject.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', @@ -69,7 +101,9 @@ describe('Snapshot UI Plugin', () => { return mockExecutor(...args); }; - const result = await runLogic(() => + __resetRuntimeSnapshotStoreForTests(); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic( { simulatorId: '12345678-1234-4234-8234-123456789012', @@ -86,18 +120,1289 @@ describe('Snapshot UI Plugin', () => { { env: {} }, ]); - expect(result.isError).toBeFalsy(); - const text = allText(result); - expect(text).toContain('Accessibility hierarchy retrieved successfully.'); - expect(text).toContain('Accessibility Hierarchy'); - expect(text).toContain('"type" : "Button"'); - expect(text).toContain('"width" : 50'); - expect(text).toContain('Use frame coordinates for tap/swipe'); - expect(result.nextStepParams).toEqual({ - snapshot_ui: { simulatorId: '12345678-1234-4234-8234-123456789012' }, - tap: { simulatorId: '12345678-1234-4234-8234-123456789012', x: 0, y: 0 }, - screenshot: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + expect(result.isError()).toBe(false); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + expect(ctx.structuredOutput?.result.kind).toBe('capture-result'); + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash: expect.any(String), + seq: 1, + elements: [ + expect.objectContaining({ + ref: 'e1', + role: 'button', + frame: { x: 100, y: 200, width: 50, height: 30 }, + state: { enabled: true, visible: true }, + actions: expect.arrayContaining(['tap']), + }), + ], + }), + ); + expect( + capture && 'type' in capture && capture.type === 'runtime-snapshot' ? capture.actions : [], + ).toContainEqual({ action: 'tap', elementRef: 'e1' }); + expect( + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? 'rawNode' in capture.elements[0]! + : true, + ).toBe(false); + const storedSnapshot = getRuntimeSnapshot('12345678-1234-4234-8234-123456789012'); + expect(storedSnapshot?.payload).toBe(capture); + const elementRef = + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? capture.elements[0]?.ref + : undefined; + expect(ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Tap an elementRef', + tool: 'tap', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef, + }, + }, + ]); + }); + + it('should return unchanged capture when sinceScreenHash matches the current screen hash', async () => { + const uiHierarchy = + '{"elements": [{"type": "Button", "frame": {"x": 100, "y": 200, "width": 50, "height": 30}}]}'; + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const first = createMockToolHandlerContext(); + await first.run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + const firstCapture = + first.ctx.structuredOutput?.result.kind === 'capture-result' + ? first.ctx.structuredOutput.result.capture + : undefined; + const screenHash = + firstCapture && 'screenHash' in firstCapture ? firstCapture.screenHash : undefined; + expect(screenHash).toEqual(expect.any(String)); + + const second = createMockToolHandlerContext(); + await second.run(() => + snapshot_uiLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + sinceScreenHash: screenHash, + }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + second.ctx.structuredOutput?.result.kind === 'capture-result' + ? second.ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual({ + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash, + seq: 2, + }); + expect(getRuntimeSnapshot('12345678-1234-4234-8234-123456789012')?.seq).toBe(2); + expect(second.ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Tap an elementRef', + tool: 'tap', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e1', + }, + }, + ]); + }); + + it('should return full runtime snapshot when sinceScreenHash differs from the current screen hash', async () => { + const uiHierarchy = + '{"elements": [{"type": "Button", "frame": {"x": 100, "y": 200, "width": 50, "height": 30}}]}'; + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + sinceScreenHash: 'different-screen-hash', + }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash: expect.any(String), + seq: 1, + elements: [expect.objectContaining({ ref: 'e1' })], + }), + ); + }); + + it('should omit tap next-step guidance when no tap targets exist', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Loading content...', + frame: { x: 20, y: 100, width: 200, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + ]); + }); + + it('should include scroll guidance for generic containers with scroll-view identifiers', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Other', + role: 'AXGroup', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Visible content', + frame: { x: 20, y: 160, width: 140, height: 24 }, + }, + ], + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + AXIdentifier: 'app.settingsButton', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + const scrollElement = + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? capture.elements[0] + : undefined; + expect(scrollElement).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'app.mainScrollView', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e1', + direction: 'up', + distance: 0.5, + }); + }); + + it('should include root viewport scroll guidance for semantic vertical overflow', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + { + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Additional details below', + frame: { x: 40, y: 920, width: 220, height: 24 }, + }, + ], + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + const rootElement = + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? capture.elements[0] + : undefined; + expect(rootElement).toEqual( + expect.objectContaining({ + role: 'application', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e1', + direction: 'up', + distance: 0.5, + }); + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'swipe', + 'tap', + ]); + expect(ctx.nextSteps?.some((step) => step.tool === 'screenshot')).toBe(false); + }); + + it('should include scroll guidance before screenshots when scrollable content is present', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 120, width: 390, height: 600 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Open Details', + frame: { x: 20, y: 180, width: 200, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e1', + direction: 'up', + distance: 0.5, + }); + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'swipe', + 'tap', + ]); + }); + + it('should prioritize scroll guidance over screen-changing tap guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 120, width: 390, height: 600 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + AXIdentifier: 'app.settingsButton', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'swipe', + 'tap', + ]); + }); + + it('should prefer foreground container guidance over background controls', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Background item, older screen content', + frame: { x: 20, y: 100, width: 300, height: 80 }, + }, + ], + }, + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.foregroundPanel', + frame: { x: 0, y: 320, width: 390, height: 524 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 320, y: 340, width: 44, height: 44 }, + }, + { + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 390, width: 300, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Foreground result, current panel content', + frame: { x: 20, y: 450, width: 320, height: 80 }, + }, + ], + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e7', }); + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e4', + direction: 'up', + distance: 0.5, + }); + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'tap', + 'swipe', + ]); + }); + + it('should keep state-changing controls out of generic tap guidance while promoting switch batches', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce Motion', + AXValue: '0', + frame: { x: 20, y: 40, width: 300, height: 44 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce Transparency', + AXValue: '0', + frame: { x: 20, y: 100, width: 300, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toEqual({ + label: 'Batch visible switch toggles', + tool: 'batch', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + }); + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + const targets = + capture && 'type' in capture && capture.type === 'runtime-snapshot' ? capture.actions : []; + expect(targets).toContainEqual(expect.objectContaining({ action: 'tap', elementRef: 'e1' })); + expect(targets).toContainEqual(expect.objectContaining({ action: 'tap', elementRef: 'e2' })); + }); + + it('should promote visible switches into batch while keeping generic tap on content', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + frame: { x: 20, y: 40, width: 100, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '°F', + AXValue: 'selected', + frame: { x: 20, y: 100, width: 100, height: 44 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Already Enabled', + AXValue: '1', + AXUniqueId: 'settings.enabledRowSwitch', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + AXUniqueId: 'app.contentRow', + frame: { x: 20, y: 210, width: 300, height: 80 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Use Celsius', + AXValue: '0', + AXUniqueId: 'settings.useCelsiusRowSwitch', + frame: { x: 20, y: 310, width: 300, height: 44 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Severe Weather Alerts', + AXValue: '0', + AXUniqueId: 'settings.alertsRowSwitch', + frame: { x: 20, y: 370, width: 300, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toEqual({ + label: 'Batch visible switch toggles', + tool: 'batch', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + steps: [ + { action: 'tap', elementRef: 'e3' }, + { action: 'tap', elementRef: 'e5' }, + ], + }, + }); + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should keep single tap guidance without batch when only one safe batch target exists', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + frame: { x: 20, y: 40, width: 100, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland', + frame: { x: 20, y: 100, width: 100, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should prefer a non-text-field tap target in next steps', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 40, width: 200, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Submit', + frame: { x: 20, y: 100, width: 100, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should prefer a useful digit over calculator utility controls for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'C', + frame: { x: 20, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Âą', + frame: { x: 100, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '%', + frame: { x: 180, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '7', + frame: { x: 20, y: 120, width: 70, height: 70 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should not promote segmented choices as generic tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: '°F', + AXValue: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '°C', + AXValue: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); + }); + + it('should skip low-value controls for tap next-step guidance when another tap target exists', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 150, y: 10, width: 80, height: 20 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 300, y: 40, width: 60, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Clear search', + frame: { x: 30, y: 90, width: 120, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Berlin, Germany', + frame: { x: 20, y: 150, width: 320, height: 80 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should not prefer destructive controls for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + AXIdentifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 140, width: 300, height: 80 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should not suggest the sheet grabber as a tap next step', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 150, y: 10, width: 80, height: 20 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 300, y: 40, width: 60, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should prefer content-rich cards over navigation and state-changing controls for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland', + AXIdentifier: 'app.navigationButton', + frame: { x: 20, y: 40, width: 160, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + AXIdentifier: 'app.settingsButton', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'PRECIP., 78%, Next 24 hours', + AXIdentifier: 'app.summaryCard', + frame: { x: 20, y: 260, width: 340, height: 140 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Severe Weather Alerts', + AXValue: '0', + frame: { x: 20, y: 440, width: 300, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e3', + }); + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); + }); + + it('should preserve runtime snapshot store when AXe output cannot be parsed', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + const previousSnapshot = getRuntimeSnapshot(simulatorId); + expect(previousSnapshot).not.toBeNull(); + + const invalidJsonExecutor = createMockExecutor({ + success: true, + output: 'not json', + error: undefined, + process: { pid: 12345 }, + }); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic({ simulatorId }, invalidJsonExecutor, mockAxeHelpers)); + + expect(result.isError()).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError + : undefined, + ).toEqual( + expect.objectContaining({ + code: 'SNAPSHOT_PARSE_FAILED', + recoveryHint: 'Run snapshot_ui again after the app is fully launched and responsive.', + }), + ); + }); + + it('should accept empty AXe payloads and replace a prior runtime snapshot', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + expect(getRuntimeSnapshot(simulatorId)?.payload.elements).toHaveLength(1); + + for (const output of ['[]', '{"elements": []}']) { + const emptyExecutor = createMockExecutor({ + success: true, + output, + error: undefined, + process: { pid: 12345 }, + }); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic({ simulatorId }, emptyExecutor, mockAxeHelpers)); + + expect(result.isError()).toBe(false); + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + elements: [], + actions: [], + }), + ); + expect(getRuntimeSnapshot(simulatorId)?.payload).toBe(capture); + } + }); + + it('should preserve runtime snapshot store when AXe returns a non-array payload', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + const previousSnapshot = getRuntimeSnapshot(simulatorId); + + const invalidExecutor = createMockExecutor({ + success: true, + output: '{}', + error: undefined, + process: { pid: 12345 }, + }); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic({ simulatorId }, invalidExecutor, mockAxeHelpers)); + + expect(result.isError()).toBe(true); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError?.code + : undefined, + ).toBe('SNAPSHOT_PARSE_FAILED'); + expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); + }); + + it('should preserve runtime snapshot store when the debugger guard blocks before AXe runs', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + const previousSnapshot = getRuntimeSnapshot(simulatorId); + const stoppedDebugger = await createStoppedDebuggerManager(simulatorId); + const guardedExecutor: CommandExecutor = async () => { + throw new Error('AXe should not run when debugger guard blocks'); + }; + + try { + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic({ simulatorId }, guardedExecutor, mockAxeHelpers, stoppedDebugger), + ); + + expect(result.isError()).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError + : undefined, + ).toEqual( + expect.objectContaining({ + code: 'ACTION_FAILED', + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }), + ); + } finally { + await stoppedDebugger.disposeAll(); + } }); it('should handle DependencyError when axe is not available', async () => { diff --git a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts index 5fe3fb62c..37cbfc143 100644 --- a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts @@ -1,515 +1,398 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; -import { SystemError } from '../../../../utils/errors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; - -import { schema, handler, type AxeHelpers, swipeLogic, type SwipeParams } from '../swipe.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; - -function createMockAxeHelpers(): AxeHelpers { - return { - getAxePath: () => '/mocked/axe/path', - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; -} - -function createMockAxeHelpersWithNullPath(): AxeHelpers { - return { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { schema, handler, swipeLogic } from '../swipe.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runSwipe( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => swipeLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Swipe Tool', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes withinElementRef and direction without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('withinElementRef'); + expect(schema).toHaveProperty('direction'); + expect(schema).not.toHaveProperty('x1'); + expect(schema).not.toHaveProperty('y1'); + expect(schema).not.toHaveProperty('x2'); + expect(schema).not.toHaveProperty('y2'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - + expect(schemaObject.safeParse({ withinElementRef: 'e1', direction: 'up' }).success).toBe( + true, + ); + expect( + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'diagonal' }).success, + ).toBe(false); + expect(schemaObject.safeParse({ direction: 'up' }).success).toBe(false); + expect(schemaObject.safeParse({ withinElementRef: 'e1' }).success).toBe(false); expect( schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, + withinElementRef: 'e1', + direction: 'down', + duration: 1.5, + distance: 0.5, + preDelay: 0.5, + postDelay: 0.25, }).success, ).toBe(true); - expect( - schemaObject.safeParse({ - x1: 100.5, - y1: 200, - x2: 300, - y2: 400, - }).success, + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', duration: 0 }).success, ).toBe(false); - expect( - schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: -1, - }).success, + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', distance: 0 }).success, ).toBe(false); - expect( - schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: 1.5, - delta: 10, - preDelay: 0.5, - postDelay: 0.2, - }).success, - ).toBe(true); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', distance: 1.1 }) + .success, + ).toBe(false); + expect( + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', preDelay: 10.1 }) + .success, + ).toBe(false); }); }); describe('Command Generation', () => { - it('should generate correct axe command for basic swipe', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - trackingExecutor, - mockAxeHelpers, - ), + it('derives safe upward swipe points within the referenced element', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(capturedCommand).toEqual([ + expect(result).toMatchObject({ + didError: false, + action: { + type: 'swipe', + withinElementRef: 'e1', + direction: 'up', + from: { x: 100, y: 340 }, + to: { x: 100, y: 60 }, + }, + }); + expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', '100', '--start-y', - '200', + '340', '--end-x', - '300', + '100', '--end-y', - '400', + '60', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for swipe with duration', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 50, - y1: 75, - x2: 250, - y2: 350, - duration: 1.5, - }, - trackingExecutor, - mockAxeHelpers, - ), + it('preserves optional AXe swipe flags without forwarding distance as AXe delta', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runSwipe( + { + simulatorId, + withinElementRef: 'e1', + direction: 'right', + duration: 2, + distance: 0.5, + preDelay: 0.5, + postDelay: 0.25, + }, + executor, ); - expect(capturedCommand).toEqual([ + expect(result.action).toMatchObject({ + type: 'swipe', + withinElementRef: 'e1', + direction: 'right', + from: { x: 65, y: 200 }, + to: { x: 135, y: 200 }, + durationSeconds: 2, + }); + expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', - '50', + '65', '--start-y', - '75', + '200', '--end-x', - '250', + '135', '--end-y', - '350', + '200', '--duration', - '1.5', + '2', + '--pre-delay', + '0.5', + '--post-delay', + '0.25', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for swipe with all optional parameters', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 0, - y1: 0, - x2: 500, - y2: 800, - duration: 2.0, - delta: 10, - preDelay: 0.5, - postDelay: 0.3, - }, - trackingExecutor, - mockAxeHelpers, - ), + it('uses distance as a normalized stroke fraction for endpoint calculation', async () => { + const { calls, executor } = createTrackingExecutor(); + + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up', distance: 0.5 }, + executor, + ); + + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up', distance: 0.8 }, + executor, ); - expect(capturedCommand).toEqual([ + expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', - '0', + '100', '--start-y', - '0', + '270', '--end-x', - '500', + '100', '--end-y', - '800', - '--duration', - '2', - '--delta', - '10', - '--pre-delay', - '0.5', - '--post-delay', - '0.3', + '130', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); - }); - - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - swipeLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - x1: 150, - y1: 250, - x2: 400, - y2: 600, - delta: 5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', + expect(calls[2]?.command).toEqual([ + '/mocked/axe/path', 'swipe', '--start-x', - '150', + '100', '--start-y', - '250', + '312', '--end-x', - '400', + '100', '--end-y', - '600', - '--delta', - '5', + '88', '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + simulatorId, ]); }); }); - describe('Handler Behavior', () => { - it('should return error for missing simulatorId via handler', async () => { - const result = await callHandler(handler, { x1: 100, y1: 200, x2: 300, y2: 400 }); - - expect(result.isError).toBe(true); - expect(result.content[0].type).toBe('text'); - expect(allText(result)).toContain('Missing required session defaults'); - expect(allText(result)).toContain('simulatorId is required'); - expect(allText(result)).toContain('session-set-defaults'); - }); + describe('Resolution failures', () => { + it('returns TARGET_NOT_ACTIONABLE without calling AXe when the frame is too small', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 1, height: 1 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - it('should return validation error for missing x1 once simulator default exists', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, + ); - const result = await callHandler(handler, { - y1: 200, - x2: 300, - y2: 400, + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); - - expect(result.isError).toBe(true); - expect(result.content[0].type).toBe('text'); - expect(allText(result)).toContain('Parameter validation failed'); - expect(allText(result)).toContain('x1: Invalid input: expected number, received undefined'); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); - it('should return success for valid swipe execution', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', - }); - - const mockAxeHelpers = createMockAxeHelpers(); + it('returns TARGET_NOT_ACTIONABLE without calling AXe when derived swipe points are degenerate', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 2, height: 100 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'right' }, + executor, ); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Swipe from (100, 200) to (300, 400) simulated successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(calls).toEqual([]); }); + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - it('should return success for swipe with duration', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', - }); + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, + ); - const mockAxeHelpers = createMockAxeHelpers(); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); + }); - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: 1.5, - }, - mockExecutor, - mockAxeHelpers, - ), + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot( + [createNode({ type: 'ScrollView', role: 'AXScrollArea' })], + Date.now() - 61_000, ); + const { calls, executor } = createTrackingExecutor(); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Swipe from (100, 200) to (300, 400) duration=1.5s simulated successfully.', + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', - }); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); + }); - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e404', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - }); - - const mockAxeHelpers = createMockAxeHelpers(); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ type: 'Button', role: 'AXButton' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate swipe.'); - expect(text).toContain('axe command failed'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle SystemError from command execution', async () => { - // Override the executor to throw SystemError for this test - const systemErrorExecutor = async () => { - throw new SystemError('System error occurred'); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - systemErrorExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { withinElementRef: 'e1', direction: 'up' }); expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: System error occurred'); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected Error objects', async () => { - // Override the executor to throw an unexpected Error for this test - const unexpectedErrorExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = createMockAxeHelpers(); + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - unexpectedErrorExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + createFailingExecutor('swipe failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: Unexpected error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); - it('should handle unexpected string errors', async () => { - // Override the executor to throw a string error for this test - const stringErrorExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = createMockAxeHelpers(); + it('suggests the next action from the post-swipe runtime snapshot', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + const { ctx, run } = createMockToolHandlerContext(); - const result = await runLogic(() => + await run(() => swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - stringErrorExecutor, - mockAxeHelpers, + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + createTrackingExecutor().executor, + createMockAxeHelpers(), ), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + expect(result.capture).toMatchObject({ + type: 'runtime-snapshot', + simulatorId, + }); + expect(ctx.nextSteps).toEqual([ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: 'e1' }, + }, + ]); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/tap.test.ts b/src/mcp/tools/ui-automation/__tests__/tap.test.ts index 062933648..1a23523a6 100644 --- a/src/mcp/tools/ui-automation/__tests__/tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/tap.test.ts @@ -1,198 +1,86 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; - -import { schema, handler, type AxeHelpers, tapLogic } from '../tap.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; - -function createMockAxeHelpers(): AxeHelpers { - return { - getAxePath: () => '/mocked/axe/path', - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { schema, handler, tapLogic } from '../tap.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createSequencedExecutor, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +function actionCommands(calls: Array<{ command: string[] }>): string[][] { + return calls.map((call) => call.command).filter((command) => command[1] !== 'describe-ui'); } -function createMockAxeHelpersWithNullPath(): AxeHelpers { - return { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; +async function runTap( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => tapLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Tap Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes elementRef-only targeting fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); + expect(schema).not.toHaveProperty('id'); + expect(schema).not.toHaveProperty('label'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - - expect(schemaObject.safeParse({ x: 100, y: 200 }).success).toBe(true); - - expect(schemaObject.safeParse({ id: 'loginButton' }).success).toBe(true); - - expect(schemaObject.safeParse({ label: 'Log in' }).success).toBe(true); - - expect(schemaObject.safeParse({ x: 100, y: 200, id: 'loginButton' }).success).toBe(true); - - expect( - schemaObject.safeParse({ x: 100, y: 200, id: 'loginButton', label: 'Log in' }).success, - ).toBe(true); - + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(true); + expect(schemaObject.safeParse({}).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: '' }).success).toBe(false); expect( - schemaObject.safeParse({ - x: 100, - y: 200, - preDelay: 0.5, - postDelay: 1, - }).success, + schemaObject.safeParse({ elementRef: 'e1', preDelay: 0.5, postDelay: 1 }).success, ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 3.14, - y: 200, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 3.14, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - preDelay: -1, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - postDelay: -1, - }).success, - ).toBe(false); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', preDelay: 10.1 }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', postDelay: 10.1 }).success).toBe(false); }); }); describe('Command Generation', () => { - let callHistory: Array<{ - command: string[]; - logPrefix?: string; - useShell?: boolean; - opts?: { env?: Record; cwd?: string }; - }>; - - beforeEach(() => { - callHistory = []; - }); - - it('should generate correct axe command with minimal parameters', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); + it('uses AXe id targeting when the referenced element has an identifier', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { calls, executor } = createTrackingExecutor(); - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ - '/mocked/axe/path', - 'tap', - '-x', - '100', - '-y', - '200', - '--udid', - '12345678-1234-4234-8234-123456789012', - ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); - }); - - it('should generate correct axe command with element id target', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', + expect(result).toMatchObject({ + didError: false, + action: { type: 'tap', elementRef: 'e1', x: 60, y: 40 }, }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - id: 'loginButton', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ + expect(actionCommands(calls)).toHaveLength(1); + expect(calls[0]).toEqual({ command: [ '/mocked/axe/path', 'tap', '--id', - 'loginButton', + 'continue-button', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], logPrefix: '[AXe]: tap', useShell: false, @@ -200,515 +88,406 @@ describe('Tap Plugin', () => { }); }); - it('should generate correct axe command with element label target', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); + it('preserves the cached runtime snapshot after a successful tap', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { executor } = createTrackingExecutor(); - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - const mockAxeHelpers = createMockAxeHelpers(); + expect(result.didError).toBe(false); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - label: 'Log in', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + it('reports post-action snapshot parse failures without failing the tap action', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: 'tap succeeded' }, + { success: true, output: 'not json' }, + ]); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ - '/mocked/axe/path', - 'tap', - '--label', - 'Log in', - '--udid', - '12345678-1234-4234-8234-123456789012', - ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - it('should prefer coordinates over id/label when both are provided', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', + expect(result.didError).toBe(false); + expect(result.uiError).toMatchObject({ + code: 'SNAPSHOT_PARSE_FAILED', + recoveryHint: expect.stringContaining('snapshot_ui'), }); + expect(result.diagnostics?.warnings?.[0]?.message).toContain( + 'UI action succeeded, but the refreshed runtime snapshot could not be parsed.', + ); + expect(result.capture).toBeUndefined(); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(actionCommands(calls)).toHaveLength(1); + }); - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; + it('reports post-action snapshot capture failures without failing the tap action', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { executor } = createSequencedExecutor([ + { success: true, output: 'tap succeeded' }, + { success: false, error: 'describe-ui failed' }, + ]); - const mockAxeHelpers = createMockAxeHelpers(); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 120, - y: 240, - id: 'loginButton', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + expect(result.didError).toBe(false); + expect(result.uiError).toMatchObject({ + code: 'SNAPSHOT_CAPTURE_FAILED', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.error).toBeNull(); + expect(result.capture).toBeUndefined(); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('includes element type when tapping a referenced element with a shared identifier', async () => { + recordSnapshot([ + createNode({ + type: 'Group', + role: 'AXGroup', + AXUniqueId: 'shared-action', + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXUniqueId: 'shared-action', + AXLabel: 'Continue', + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '120', - '-y', - '240', + '--id', + 'shared-action', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ]); }); - it('should generate correct axe command with pre-delay', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); + it('uses coordinates immediately when the snapshot already has duplicate selector matches', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + AXUniqueId: 'trash', + AXLabel: 'Remove', + }), + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 300, y: 400, width: 50, height: 80 }, + AXUniqueId: 'trash', + AXLabel: 'Remove', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '325', '-y', '440', '--udid', simulatorId], + ]); + }); - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 300, - preDelay: 0.5, - }, - wrappedExecutor, - mockAxeHelpers, - ), + it('falls back to the resolved center when selector tap is ambiguous', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'shared-action', + }), + ]); + const { calls, executor } = createSequencedExecutor( + [ + { success: false, error: 'Multiple accessibility elements matched selector' }, + { success: true, output: 'tapped by coordinate' }, + ], + { describeUiAfterSequence: true }, ); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '150', - '-y', - '300', - '--pre-delay', - '0.5', + '--id', + 'shared-action', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - it('should generate correct axe command with post-delay', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( + it('falls back to the resolved center when selector tap reports a parenthesized match count', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'weather.locationsSheet', + AXLabel: 'Clear search', + }), + ]); + const { calls, executor } = createSequencedExecutor( + [ { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 250, - y: 400, - postDelay: 1.0, + success: false, + error: + "Multiple (2) accessibility elements matched --id 'weather.locationsSheet'. No tap performed.", }, - wrappedExecutor, - mockAxeHelpers, - ), + { success: true, output: 'tapped by coordinate' }, + ], + { describeUiAfterSequence: true }, ); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '250', - '-y', - '400', - '--post-delay', - '1', + '--id', + 'weather.locationsSheet', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - it('should generate correct axe command with both delays', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( + it('falls back to the resolved center when selector tap reports no match', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: undefined, + AXIdentifier: undefined, + AXLabel: 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°', + }), + ]); + const { calls, executor } = createSequencedExecutor( + [ { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 350, - y: 500, - preDelay: 0.3, - postDelay: 0.7, + success: false, + error: + "No accessibility element matched --label 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°'. No tap performed.", }, - wrappedExecutor, - mockAxeHelpers, - ), + { success: true, output: 'tapped by coordinate' }, + ], + { describeUiAfterSequence: true }, ); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '350', - '-y', - '500', - '--pre-delay', - '0.3', - '--post-delay', - '0.7', + '--label', + 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - }); - - describe('Plugin Handler Validation', () => { - it('should require simulatorId session default when not provided', async () => { - const result = await callHandler(handler, { - x: 100, - y: 200, - }); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); + it('does not fall back for unrelated failures that mention multiple', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'shared-action', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Failed after multiple retry attempts' }, + { success: true, output: 'should not run' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(true); + expect(actionCommands(calls)).toHaveLength(1); + expect(actionCommands(calls)[0]).toEqual([ + '/mocked/axe/path', + 'tap', + '--id', + 'shared-action', + '--element-type', + 'Button', + '--udid', + simulatorId, + ]); }); - it('should return validation error for missing x coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { - y: 200, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: X coordinate is required when y is provided.'); + it('falls back to the referenced element center when no identifier exists', async () => { + recordSnapshot([ + createNode({ frame: { x: 10, y: 20, width: 100, height: 40 }, AXLabel: undefined }), + ]); + const { calls, executor } = createTrackingExecutor(); + + await runTap({ simulatorId, elementRef: 'e1', preDelay: 0.25, postDelay: 0.5 }, executor); + + expect(actionCommands(calls)).toHaveLength(1); + expect(actionCommands(calls)[0]).toEqual([ + '/mocked/axe/path', + 'tap', + '-x', + '60', + '-y', + '40', + '--pre-delay', + '0.25', + '--post-delay', + '0.5', + '--udid', + simulatorId, + ]); }); - it('should return validation error for missing y coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { - x: 100, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('y: Y coordinate is required when x is provided.'); + it('uses a touch down/up activation for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + AXLabel: 'Reduce transparency', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.action).toMatchObject({ type: 'tap', elementRef: 'e1', x: 307, y: 903 }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'touch', + '-x', + '307', + '-y', + '903', + '--down', + '--up', + '--udid', + simulatorId, + ]); }); + }); - it('should return validation error when both id and label are provided without coordinates', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const result = await callHandler(handler, { - id: 'loginButton', - label: 'Log in', - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('id: Provide either id or label, not both.'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return validation error for non-integer x coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const result = await callHandler(handler, { - x: 3.14, - y: 200, - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: X coordinate must be an integer'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should return validation error for non-integer y coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const result = await callHandler(handler, { - x: 100, - y: 3.14, - }); + const result = await runTap({ simulatorId, elementRef: 'e404' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('y: Y coordinate must be an integer'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should return validation error for negative preDelay', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ enabled: false })]); + const { calls, executor } = createTrackingExecutor(); - const result = await callHandler(handler, { - x: 100, - y: 200, - preDelay: -1, - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('preDelay: Pre-delay must be non-negative'); - }); - - it('should return validation error for negative postDelay', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { - x: 100, - y: 200, - postDelay: -1, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('postDelay: Post-delay must be non-negative'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should return DependencyError when axe binary is not found', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - error: undefined, - }); - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - preDelay: 0.5, - postDelay: 1.0, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default before validation', async () => { + const result = await callHandler(handler, { elementRef: 'e1' }); expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle DependencyError when axe binary not found (second test)', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Coordinates out of bounds', - }); - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); + it('returns UI_STATE_CHANGED when identifier-based AXe tap fails after ref resolution', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTap( + { simulatorId, elementRef: 'e1' }, + createFailingExecutor('element not found'), ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (third test)', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'System error occurred', + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'UI_STATE_CHANGED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); - it('should handle DependencyError when axe binary not found (fourth test)', async () => { - const mockExecutor = async () => { - throw new Error('ENOENT: no such file or directory'); - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); + it('returns ACTION_FAILED when coordinate-based AXe tap fails after ref resolution', async () => { + recordSnapshot([createNode({ AXLabel: undefined })]); - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (fifth test)', async () => { - const mockExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTap( + { simulatorId, elementRef: 'e1' }, + createFailingExecutor('tap failed'), ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (sixth test)', async () => { - const mockExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/touch.test.ts b/src/mcp/tools/ui-automation/__tests__/touch.test.ts index 2e89f730a..40dbf9f85 100644 --- a/src/mcp/tools/ui-automation/__tests__/touch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/touch.test.ts @@ -1,657 +1,260 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, touchLogic } from '../touch.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runTouch( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => touchLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} describe('Touch Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes elementRef and touch flags without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); - - it('should validate schema fields with safeParse', () => { - const schemaObj = z.object(schema); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - down: true, - }).success, - ).toBe(true); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - up: true, - }).success, - ).toBe(true); - - expect( - schemaObj.safeParse({ - x: 100.5, - y: 200, - down: true, - }).success, - ).toBe(false); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200.5, - down: true, - }).success, - ).toBe(false); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - down: true, - delay: -1, - }).success, - ).toBe(false); - - const withSimId = schemaObj.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('down'); + expect(schema).toHaveProperty('up'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', up: true }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true, delay: -1 }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true, delay: 10.1 }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ down: true }).success).toBe(false); }); }); - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await callHandler(handler, { - x: 100, - y: 200, - down: true, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); + describe('Command Generation', () => { + it('touches down at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - it('should surface parameter validation errors when defaults exist', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - const result = await callHandler(handler, { - y: 200, - down: true, + expect(result).toMatchObject({ + didError: false, + action: { type: 'touch', elementRef: 'e1', event: 'touch down', x: 60, y: 40 }, }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: Invalid input: expected number, received undefined'); - }); - }); - - describe('Command Generation', () => { - it('should generate correct axe command for touch down', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '100', + '60', '-y', - '200', + '40', '--down', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for touch up', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 250, - up: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'touch', - '-x', - '150', - '-y', - '250', - '--up', - '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); + it('touches up at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command for touch down+up', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 300, - y: 400, - down: true, - up: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', up: true }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(result.action).toMatchObject({ + type: 'touch', + elementRef: 'e1', + event: 'touch up', + x: 60, + y: 40, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '300', + '60', '-y', - '400', - '--down', + '40', '--up', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for touch with delay', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 50, - y: 75, - down: true, - up: true, - delay: 1.5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + it('touches down and up with delay at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + await runTouch({ simulatorId, elementRef: 'e1', down: true, up: true, delay: 1.5 }, executor); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '50', + '60', '-y', - '75', + '40', '--down', '--up', '--delay', '1.5', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - x: 0, - y: 0, - up: true, - delay: 0.5, - }, - trackingExecutor, - mockAxeHelpers, - ), + it('uses the switch activation point for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTouch( + { simulatorId, elementRef: 'e1', down: true, up: true }, + executor, ); - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', + expect(result.action).toMatchObject({ + type: 'touch', + elementRef: 'e1', + event: 'touch down+up', + x: 307, + y: 903, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '0', + '307', '-y', - '0', + '903', + '--down', '--up', - '--delay', - '0.5', '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + simulatorId, ]); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should handle axe dependency error', async () => { - const mockExecutor = createMockExecutor({ success: true }); - const mockAxeHelpers = { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); + describe('Resolution failures', () => { + it('keeps down/up validation before snapshot resolution', async () => { + const { calls, executor } = createTrackingExecutor(); - it('should successfully perform touch down', async () => { - const mockExecutor = createMockExecutor({ success: true, output: 'Touch down completed' }); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.error).toBe('At least one of "down" or "up" must be true'); + expect(result.action).toEqual({ type: 'touch', elementRef: 'e1' }); + expect(result.uiError).toBeUndefined(); + expect(calls).toEqual([]); }); - it('should successfully perform touch up', async () => { - const mockExecutor = createMockExecutor({ success: true, output: 'Touch up completed' }); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch up) at (100, 200) executed successfully.', - ); - }); + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - it('should return error when neither down nor up is specified', async () => { - const mockExecutor = createMockExecutor({ success: true }); - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBe(true); - expect(allText(result)).toContain('At least one of "down" or "up" must be true'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return success for touch down event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should return success for touch up event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e404', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch up) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should return success for touch down+up event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down+up) at (100, 200) executed successfully.', - ); - }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ role: 'AXApplication', type: 'Application' })]); + const { calls, executor } = createTrackingExecutor(); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ success: true }); - - const mockAxeHelpers = { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', + describe('Handler Behavior', () => { + it('rejects delay unless both down and up are true before AXe runs', async () => { + const result = await callHandler(handler, { + simulatorId, + elementRef: 'e1', + down: true, + delay: 1, }); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to execute touch event.'); - expect(text).toContain('axe command failed'); - }); - - it('should handle SystemError from command execution', async () => { - const mockExecutor = async () => { - throw new Error('System error occurred'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), + expect(result.content[0].text).toContain( + 'Delay can only be used when both down and up are true', ); - - expect(result.isError).toBe(true); }); - it('should handle unexpected Error objects', async () => { - const mockExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', down: true }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected string errors', async () => { - const mockExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode()]); + + const result = await runTouch( + { simulatorId, elementRef: 'e1', down: true }, + createFailingExecutor('touch failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts index 18f481bd5..ba32c4d4d 100644 --- a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts @@ -1,481 +1,465 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { - createMockExecutor, - createNoopExecutor, - mockProcess, -} from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, type_textLogic } from '../type_text.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; - -// Mock axe helpers for dependency injection -function createMockAxeHelpers( - overrides: { - getAxePathReturn?: string | null; - getBundledAxeEnvironmentReturn?: Record; - } = {}, -) { - return { - getAxePath: () => - overrides.getAxePathReturn !== undefined ? overrides.getAxePathReturn : '/usr/local/bin/axe', - getBundledAxeEnvironment: () => overrides.getBundledAxeEnvironmentReturn ?? {}, - }; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +function actionCommands(calls: Array<{ command: string[] }>): string[][] { + return calls.map((call) => call.command).filter((command) => command[1] !== 'describe-ui'); } -// Mock executor that tracks rejections for testing -function createRejectingExecutor(error: any) { - return async () => { - throw error; - }; +async function runTypeText( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => type_textLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Type Text Tool', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('requires elementRef and text', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('text'); + expect(schema).toHaveProperty('replaceExisting'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - + expect(schemaObject.safeParse({ elementRef: 'e1', text: 'Hello World' }).success).toBe(true); expect( - schemaObject.safeParse({ - text: 'Hello World', - }).success, + schemaObject.safeParse({ elementRef: 'e1', text: 'Hello World', replaceExisting: true }) + .success, ).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', text: '' }).success).toBe(false); + expect(schemaObject.safeParse({ text: 'Hello World' }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(false); + }); + }); - expect( - schemaObject.safeParse({ - text: '', - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - text: 123, - }).success, - ).toBe(false); + describe('Command Generation', () => { + it('focuses the referenced text field by identifier, then types text', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Email', + AXUniqueId: 'email-field', + }), + ]); + const { calls, executor } = createTrackingExecutor(); - expect(schemaObject.safeParse({}).success).toBe(false); + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'user@example.com' }, + executor, + ); - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', + expect(result).toMatchObject({ + didError: false, + action: { type: 'type-text', elementRef: 'e1', textLength: 16 }, }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'email-field', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'user@example.com', '--udid', simulatorId], + ]); }); - }); - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await callHandler(handler, { text: 'Hello' }); + it('types all AXe-supported US keyboard punctuation characters', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + const text = 'Az09 !@#$%^&*()_+-={}[]|\\:";\'<>?,./`~'; - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text }, executor); - it('should surface validation errors when defaults exist', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + expect(result).toMatchObject({ + didError: false, + action: { type: 'type-text', elementRef: 'e1', textLength: text.length }, + }); + expect(actionCommands(calls)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--label', + 'Search', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', text, '--udid', simulatorId], + ]); + }); - const result = await callHandler(handler, {}); + it('rejects unsupported AXe typing characters before focusing or typing', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + const text = 'Tokyo Reykjavík 42'; - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('text: Invalid input: expected string, received undefined'); - }); - }); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text }, executor); - describe('Command Generation', () => { - it('should generate correct axe command for basic text typing', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + message: expect.stringContaining('US keyboard characters'), + elementRef: 'e1', + recoveryHint: expect.stringContaining('US keyboard'), }); + expect(result.action).toEqual({ + type: 'type-text', + elementRef: 'e1', + textLength: text.length, + }); + expect(calls).toEqual([]); + expect(JSON.stringify(result)).not.toContain('Tokyo'); + expect(JSON.stringify(result)).not.toContain('Reykjavík'); + }); - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'Hello World', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('includes text field type when focusing a referenced field with a shared identifier', async () => { + recordSnapshot([ + createNode({ + type: 'Group', + role: 'AXGroup', + AXUniqueId: 'locationSearchField', + children: [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXUniqueId: 'locationSearchField', + AXLabel: 'Search for a city', + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'locationSearchField', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for text with special characters', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('focuses by coordinates immediately when the snapshot already has duplicate selector matches', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'locationSearchField', + AXLabel: 'Search', + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 40, y: 200, width: 180, height: 40 }, + AXUniqueId: 'locationSearchField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'user@example.com', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'user@example.com', - '--udid', - '12345678-1234-4234-8234-123456789012', + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '130', '-y', '220', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for text with numbers and symbols', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Password123!@#', - }, - trackingExecutor, - mockAxeHelpers, - ), + it('falls back to the resolved center when selector focus is ambiguous', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'locationSearchField', + }), + ]); + const { calls, executor } = createSequencedExecutor( + [ + { success: false, error: 'Multiple 2 accessibility elements matched selector' }, + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ], + { describeUiAfterSequence: true }, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'Password123!@#', - '--udid', - '12345678-1234-4234-8234-123456789012', + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'London' }, executor); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'locationSearchField', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for long text', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const longText = - 'This is a very long text that needs to be typed into the simulator for testing purposes.'; - - await runLogic(() => - type_textLogic( + it('falls back to the resolved center when selector focus reports no match', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: undefined, + AXIdentifier: undefined, + AXLabel: 'Search for a city', + }), + ]); + const { calls, executor } = createSequencedExecutor( + [ { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: longText, + success: false, + error: + "No accessibility element matched --label 'Search for a city'. No tap performed.", }, - trackingExecutor, - mockAxeHelpers, - ), + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ], + { describeUiAfterSequence: true }, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - longText, - '--udid', - '12345678-1234-4234-8234-123456789012', + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Portland' }, + executor, + ); + + expect(result.didError).toBe(false); + expect(actionCommands(calls)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--label', + 'Search for a city', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'Portland', '--udid', simulatorId], ]); }); - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/path/to/bundled/axe', - getBundledAxeEnvironmentReturn: { AXE_PATH: '/some/path' }, - }); + it('selects existing text before typing when replaceExisting is true', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXValue: 'Tokyo', + AXLabel: undefined, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - type_textLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - text: 'Test message', - }, - trackingExecutor, - mockAxeHelpers, - ), + await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Portland', replaceExisting: true }, + executor, ); - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', - 'type', - 'Test message', - '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + expect(actionCommands(calls)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--value', + 'Tokyo', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + [ + '/mocked/axe/path', + 'key-combo', + '--modifiers', + '227', + '--key', + '4', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'Portland', '--udid', simulatorId], ]); }); - }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should handle axe dependency error', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: null, - }); + it('focuses the referenced text field by center when no identifier exists', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXLabel: undefined, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - createNoopExecutor(), - mockAxeHelpers, - ), - ); + await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(actionCommands(calls)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'Hello', '--udid', simulatorId], + ]); }); + }); - it('should successfully type text', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Text typed successfully', - error: undefined, - }); + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain('Text typing simulated successfully.'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return success for valid text typing', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createMockExecutor({ - success: true, - output: 'Text typed successfully', - error: undefined, - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain('Text typing simulated successfully.'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe binary not found', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: null, - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - createNoopExecutor(), - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e404', text: 'Hello' }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle AxeError from command execution', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Text field not found', - }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ type: 'Button', role: 'AXButton' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate text typing.'); - expect(text).toContain('Text field not found'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle SystemError from command execution', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createRejectingExecutor(new Error('ENOENT: no such file or directory')); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', text: 'Hello' }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected Error objects', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createRejectingExecutor(new Error('Unexpected error')); + it('returns ACTION_FAILED when focusing the resolved field fails', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'focus failed' }, + ]); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Secret123' }, + executor, ); - expect(result.isError).toBe(true); - }); - - it('should handle unexpected string errors', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); + expect(calls).toHaveLength(1); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(result.action).toEqual({ type: 'type-text', elementRef: 'e1', textLength: 9 }); + }); - const mockExecutor = createRejectingExecutor('String error'); + it('returns ACTION_FAILED when typing fails after focus succeeds', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: 'focused' }, + { success: false, error: 'typing failed' }, + ]); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Secret123' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(calls).toHaveLength(2); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(result.action).toEqual({ type: 'type-text', elementRef: 'e1', textLength: 9 }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-incomplete-completion-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-incomplete-completion-next-steps.test.ts new file mode 100644 index 000000000..52d207c13 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-incomplete-completion-next-steps.test.ts @@ -0,0 +1,266 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { + AccessibilityNode, + UiActionResultDomainResult, +} from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { toStructuredEnvelope } from '../../../../utils/structured-output-envelope.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { createCaptureSuccessResult } from '../shared/domain-result.ts'; +import { + createRuntimeSnapshotNextSteps, + getForegroundCompletionSuppressedRuntimeTargetRefs, +} from '../shared/runtime-next-steps.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { tapLogic } from '../tap.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, +} from './ui-action-test-helpers.ts'; + +const simulatorId = '57F882E8-F858-4F57-98D4-8164D5915C43'; + +function createSearchResultBeforeCompletionNodes(): AccessibilityNode[] { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.searchSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Result query', + AXIdentifier: 'example.searchSheet', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'Example result, detail text', + AXValue: 'not saved', + frame: { x: 20, y: 218, width: 280, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Add', + AXIdentifier: 'example.searchSheet', + frame: { x: 322, y: 232, width: 60, height: 44 }, + }), + ], + }), + ]; +} + +function createMixedCompletionSheetNodes(): AccessibilityNode[] { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.searchSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + AXLabel: 'Existing result, detail text', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 280, height: 72 }, + }), + createNode({ + AXLabel: 'New result, detail text', + AXValue: 'not saved', + frame: { x: 20, y: 306, width: 280, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Add', + AXIdentifier: 'example.searchSheet', + frame: { x: 322, y: 320, width: 60, height: 44 }, + }), + ], + }), + ]; +} + +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + +function currentSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!; +} + +function sameSearchResultExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { + success: true, + output: JSON.stringify({ elements: createSearchResultBeforeCompletionNodes() }), + }, + ]).executor; +} + +function compactCaptureList( + envelope: ReturnType, + key: 'targets' | 'text' | 'evidence', +): string[] { + const data = envelope.data; + if (!data || typeof data !== 'object' || !('capture' in data)) { + throw new Error('Expected structured output capture.'); + } + + const capture = (data as { capture?: unknown }).capture; + if (!capture || typeof capture !== 'object' || !(key in capture)) { + return []; + } + + const entries = (capture as Record)[key]; + if (!Array.isArray(entries)) { + throw new Error(`Expected compact runtime snapshot ${key} array.`); + } + + return entries.filter((entry): entry is string => typeof entry === 'string'); +} + +function compactTargets(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'targets'); +} + +function compactText(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'text'); +} + +function compactEvidence(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'evidence'); +} + +describe('UI action incomplete completion next steps', () => { + beforeEach(() => { + sessionStore.clear(); + }); + + it('prefers Add when foreground completion rows contain mixed complete and incomplete states', () => { + recordSnapshot(createMixedCompletionSheetNodes()); + const snapshot = currentSnapshot().payload; + const addRef = snapshot.elements.find((element) => element.label === 'Add')?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + const savedRef = snapshot.elements.find((element) => element.value === 'saved')?.ref; + const notSavedRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + expect(addRef).toBeDefined(); + expect(closeRef).toBeDefined(); + expect(savedRef).toBeDefined(); + expect(notSavedRef).toBeDefined(); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + const suppressedRefs = getForegroundCompletionSuppressedRuntimeTargetRefs({ + simulatorId, + runtimeSnapshot: snapshot, + }); + + expect(steps[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: addRef }, + }); + expect(steps[0]?.params?.elementRef).not.toBe(closeRef); + expect(suppressedRefs).toEqual([notSavedRef]); + expect(suppressedRefs).not.toContain(savedRef); + }); + + it('keeps ordinary unsuppressed rows actionable in compact targets', () => { + recordSnapshot(createSearchResultBeforeCompletionNodes()); + const snapshot = currentSnapshot().payload; + const rowRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + expect(rowRef).toBeDefined(); + + const result = createCaptureSuccessResult(simulatorId, { capture: snapshot }); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2'); + + expect(compactTargets(envelope).some((target) => target.startsWith(`${rowRef}|tap|`))).toBe( + true, + ); + }); + + it('does not repeat a no-op incomplete foreground row tap and prefers Add', async () => { + recordSnapshot(createSearchResultBeforeCompletionNodes()); + const snapshot = currentSnapshot().payload; + const rowRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + const addRef = snapshot.elements.find((element) => element.label === 'Add')?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + expect(rowRef).toBeDefined(); + expect(addRef).toBeDefined(); + expect(closeRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + tapLogic( + { simulatorId, elementRef: rowRef! }, + sameSearchResultExecutor(), + createMockAxeHelpers(), + ), + ); + + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.ui-action-result', '2', { + nextSteps: ctx.nextSteps, + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + + expect(ctx.nextSteps?.[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: addRef }, + }); + expect(ctx.nextSteps?.[0]?.params?.elementRef).not.toBe(closeRef); + expect(compactTargets(envelope).some((target) => target.startsWith(`${rowRef}|tap|`))).toBe( + false, + ); + expect(compactTargets(envelope).some((target) => target.startsWith(`${addRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('not saved'))).toBe(false); + const notSavedEvidenceLine = compactEvidence(envelope).find((line) => + line.includes('not saved'), + ); + expect(notSavedEvidenceLine).toBeDefined(); + expect(notSavedEvidenceLine?.startsWith(`${rowRef}|`)).toBe(false); + expect(notSavedEvidenceLine?.split('|')).toHaveLength(4); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-next-steps.test.ts new file mode 100644 index 000000000..f3a0b2055 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-next-steps.test.ts @@ -0,0 +1,485 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { + AccessibilityNode, + UiActionResultDomainResult, +} from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { toStructuredEnvelope } from '../../../../utils/structured-output-envelope.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + setUiActionStructuredOutput, + setCaptureStructuredOutput, + createUiActionSuccessResult, + createCaptureSuccessResult, +} from '../shared/domain-result.ts'; +import { createRuntimeSnapshotNextSteps } from '../shared/runtime-next-steps.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { tapLogic } from '../tap.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, +} from './ui-action-test-helpers.ts'; + +const simulatorId = '9A9F6BF3-A1F8-4AC7-8B32-37EDC7F4F511'; + +function createLocationsSheetNodes() { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + AXLabel: 'Background, Details', + AXIdentifier: 'example.backgroundCard', + frame: { x: 20, y: 120, width: 362, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Edit', + AXIdentifier: 'example.locationsSheet', + frame: { x: 24, y: 96, width: 60, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: undefined, + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 326, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Aspen, 2:24 PM ¡ Light Snow', + frame: { x: 20, y: 415, width: 362, height: 72 }, + }), + ], + }), + ]; +} + +function createSearchResultBeforeAddNodes() { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Clear search', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 150, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'London', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom, 9:24 PM ¡ Light Rain', + AXValue: 'not saved', + frame: { x: 20, y: 218, width: 280, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Add', + AXIdentifier: 'example.locationsSheet', + frame: { x: 322, y: 232, width: 60, height: 44 }, + }), + ], + }), + ]; +} + +function createSavedSearchResultSheetNodes() { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Clear search', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 150, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'London', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom, 9:24 PM ¡ Light Rain', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 362, height: 72 }, + }), + ], + }), + ]; +} + +function currentSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!; +} + +function compactCaptureList( + envelope: ReturnType, + key: 'targets' | 'text' | 'evidence', +): string[] { + const data = envelope.data; + if (!data || typeof data !== 'object' || !('capture' in data)) { + throw new Error('Expected structured output capture.'); + } + + const capture = (data as { capture?: unknown }).capture; + if (!capture || typeof capture !== 'object' || !(key in capture)) { + return []; + } + + const entries = (capture as Record)[key]; + if (!Array.isArray(entries)) { + throw new Error(`Expected compact runtime snapshot ${key} array.`); + } + + return entries.filter((entry): entry is string => typeof entry === 'string'); +} + +function compactTargets(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'targets'); +} + +function compactText(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'text'); +} + +function compactEvidence(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'evidence'); +} + +function sameSheetExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { success: true, output: JSON.stringify({ elements: createLocationsSheetNodes() }) }, + ]).executor; +} + +function addSearchResultExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { + success: true, + output: JSON.stringify({ elements: createSavedSearchResultSheetNodes() }), + }, + ]).executor; +} + +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + +describe('UI action no-op next steps', () => { + beforeEach(() => { + sessionStore.clear(); + }); + + it('filters background taps when a foreground sheet is active', () => { + recordSnapshot(createLocationsSheetNodes()); + const snapshot = currentSnapshot().payload; + const backgroundRef = snapshot.elements.find( + (element) => element.identifier === 'example.backgroundCard', + )?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(backgroundRef).toBeDefined(); + expect(closeRef).toBeDefined(); + expect(steps[0]?.tool).toBe('tap'); + expect(steps[0]?.params?.elementRef).not.toBe(backgroundRef); + expect( + steps.some((step) => step.tool === 'tap' && step.params?.elementRef === backgroundRef), + ).toBe(false); + }); + + it('prefers Add over a not-saved foreground-sheet result row', () => { + recordSnapshot(createSearchResultBeforeAddNodes()); + const snapshot = currentSnapshot().payload; + const addRef = snapshot.elements.find((element) => element.label === 'Add')?.ref; + const rowRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + expect(addRef).toBeDefined(); + expect(rowRef).toBeDefined(); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: addRef }, + }); + expect(steps.some((step) => step.tool === 'tap' && step.params?.elementRef === rowRef)).toBe( + false, + ); + + const { ctx } = createMockToolHandlerContext(); + const result = createCaptureSuccessResult(simulatorId, { capture: snapshot }); + setCaptureStructuredOutput(ctx, result); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + + expect(snapshot.elements.find((element) => element.ref === rowRef)?.actions).toContain('tap'); + expect(compactTargets(envelope).some((target) => target.startsWith(`${rowRef}|tap|`))).toBe( + false, + ); + expect(compactTargets(envelope).some((target) => target.startsWith(`${addRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('not saved'))).toBe(false); + const notSavedEvidenceLine = compactEvidence(envelope).find((line) => + line.includes('not saved'), + ); + expect(notSavedEvidenceLine).toBeDefined(); + expect(notSavedEvidenceLine?.startsWith(`${rowRef}|`)).toBe(false); + expect(notSavedEvidenceLine?.split('|')).toHaveLength(4); + }); + + it('keeps completed foreground-sheet rows actionable in regular snapshot affordances', () => { + recordSnapshot(createSavedSearchResultSheetNodes()); + const snapshot = currentSnapshot().payload; + const savedRowRef = snapshot.elements.find((element) => element.value === 'saved')?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + const clearSearchRef = snapshot.elements.find( + (element) => element.label === 'Clear search', + )?.ref; + expect(savedRowRef).toBeDefined(); + expect(closeRef).toBeDefined(); + expect(clearSearchRef).toBeDefined(); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + expect(steps[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: savedRowRef }, + }); + + const { ctx } = createMockToolHandlerContext(); + const result = createCaptureSuccessResult(simulatorId, { capture: snapshot }); + setCaptureStructuredOutput(ctx, result); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + + expect(snapshot.elements.find((element) => element.ref === savedRowRef)?.actions).toContain( + 'tap', + ); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${savedRowRef}|tap|`)), + ).toBe(true); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${clearSearchRef}|tap|`)), + ).toBe(true); + expect(compactTargets(envelope).some((target) => target.startsWith(`${closeRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('saved'))).toBe(false); + }); + + it('does not demote a saved foreground-sheet result row after adding it', async () => { + recordSnapshot(createSearchResultBeforeAddNodes()); + const addRef = currentSnapshot().payload.elements.find( + (element) => element.label === 'Add', + )?.ref; + expect(addRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + tapLogic( + { simulatorId, elementRef: addRef! }, + addSearchResultExecutor(), + createMockAxeHelpers(), + ), + ); + + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + const capture = result.capture; + if (!capture || !('elements' in capture)) { + throw new Error('Expected runtime snapshot capture.'); + } + const closeRef = capture.elements.find((element) => element.label === 'Close')?.ref; + const clearSearchRef = capture.elements.find( + (element) => element.label === 'Clear search', + )?.ref; + const savedRow = capture.elements.find((element) => element.value === 'saved'); + expect(closeRef).toBeDefined(); + expect(clearSearchRef).toBeDefined(); + expect(savedRow).toBeDefined(); + expect(savedRow?.actions).toContain('tap'); + expect( + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + ).toBeUndefined(); + expect(ctx.nextSteps).toEqual([ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: savedRow?.ref }, + }, + ]); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.ui-action-result', '2', { + nextSteps: ctx.nextSteps, + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${savedRow?.ref}|tap|`)), + ).toBe(true); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${clearSearchRef}|tap|`)), + ).toBe(true); + expect(compactTargets(envelope).some((target) => target.startsWith(`${closeRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('saved'))).toBe(false); + }); + + it('does not repeat a no-op foreground row tap or promote dismiss over remaining content', async () => { + recordSnapshot(createLocationsSheetNodes()); + const rowRef = currentSnapshot().payload.elements.find((element) => + element.label?.startsWith('London'), + )?.ref; + const remainingContentRef = currentSnapshot().payload.elements.find((element) => + element.label?.startsWith('Portland'), + )?.ref; + const closeRef = currentSnapshot().payload.elements.find( + (element) => element.label === 'Close', + )?.ref; + expect(rowRef).toBeDefined(); + expect(remainingContentRef).toBeDefined(); + expect(closeRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + tapLogic({ simulatorId, elementRef: rowRef! }, sameSheetExecutor(), createMockAxeHelpers()), + ); + + expect(ctx.nextSteps?.[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: remainingContentRef }, + }); + expect(ctx.nextSteps?.[0]?.params?.elementRef).not.toBe(closeRef); + expect(ctx.nextSteps?.some((step) => step.tool === 'batch')).toBe(false); + expect( + ctx.nextSteps?.some((step) => step.tool === 'tap' && step.params?.elementRef === rowRef), + ).toBe(false); + expect(ctx.nextSteps?.some((step) => step.tool === 'swipe')).toBe(false); + }); + + it('keeps ordinary post-action next steps when the screen hash changes', () => { + recordSnapshot(createLocationsSheetNodes()); + const previousSnapshot = currentSnapshot().payload; + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Continue', + frame: { x: 20, y: 120, width: 200, height: 44 }, + }), + ]); + const changedSnapshot = currentSnapshot().payload; + const result = createUiActionSuccessResult({ type: 'tap', elementRef: 'e5' }, simulatorId, [], { + capture: changedSnapshot, + previousRuntimeSnapshot: previousSnapshot, + }); + const { ctx } = createMockToolHandlerContext(); + + setUiActionStructuredOutput(ctx, result); + + expect(ctx.nextSteps).toEqual([ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: 'e1' }, + }, + ]); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-swipe-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-swipe-next-steps.test.ts new file mode 100644 index 000000000..ea41fb7bc --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-swipe-next-steps.test.ts @@ -0,0 +1,134 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { + AccessibilityNode, + UiActionResultDomainResult, +} from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { swipeLogic } from '../swipe.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, +} from './ui-action-test-helpers.ts'; + +const simulatorId = '044E0C26-0917-4812-B6D8-F5E22BA2E387'; + +function createForegroundSheetWithRealListNodes(): AccessibilityNode[] { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.locationsSheet', + frame: { x: 0, y: 96, width: 402, height: 720 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 362, height: 72 }, + }), + ], + }), + ], + }), + ]; +} + +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + +function currentSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!; +} + +function sameSheetExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { + success: true, + output: JSON.stringify({ elements: createForegroundSheetWithRealListNodes() }), + }, + ]).executor; +} + +describe('UI action no-op swipe next steps', () => { + beforeEach(() => { + sessionStore.clear(); + }); + + it('does not repeat a no-op foreground sheet swipe or promote dismiss over visible content', async () => { + recordSnapshot(createForegroundSheetWithRealListNodes()); + const listRef = currentSnapshot().payload.elements.find( + (element) => element.identifier === 'example.locationsSheet', + )?.ref; + const contentRef = currentSnapshot().payload.elements.find((element) => + element.label?.startsWith('London'), + )?.ref; + const closeRef = currentSnapshot().payload.elements.find( + (element) => element.label === 'Close', + )?.ref; + expect(listRef).toBeDefined(); + expect(contentRef).toBeDefined(); + expect(closeRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + swipeLogic( + { simulatorId, withinElementRef: listRef!, direction: 'up', distance: 0.7 }, + sameSheetExecutor(), + createMockAxeHelpers(), + ), + ); + + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + expect(result.action).toMatchObject({ + type: 'swipe', + withinElementRef: listRef, + direction: 'up', + }); + expect(ctx.nextSteps?.[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: contentRef }, + }); + expect(ctx.nextSteps?.[0]?.params?.elementRef).not.toBe(closeRef); + expect( + ctx.nextSteps?.some( + (step) => step.tool === 'swipe' && step.params?.withinElementRef === listRef, + ), + ).toBe(false); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts new file mode 100644 index 000000000..e07a0a9fc --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts @@ -0,0 +1,105 @@ +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import type { CommandExecOptions, CommandExecutor } from '../../../../utils/execution/index.ts'; +import { mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { AxeHelpers } from '../shared/axe-command.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; + +export const simulatorId = '12345678-1234-4234-8234-123456789012'; + +export interface CapturedCommandCall { + command: string[]; + logPrefix?: string; + useShell?: boolean; + opts?: CommandExecOptions; +} + +export function createMockAxeHelpers( + overrides: { + getAxePathReturn?: string | null; + getBundledAxeEnvironmentReturn?: Record; + } = {}, +): AxeHelpers { + return { + getAxePath: () => + overrides.getAxePathReturn !== undefined ? overrides.getAxePathReturn : '/mocked/axe/path', + getBundledAxeEnvironment: () => + overrides.getBundledAxeEnvironmentReturn ?? { SOME_ENV: 'value' }, + }; +} + +export function createTrackingExecutor(): { + calls: CapturedCommandCall[]; + executor: CommandExecutor; +} { + const calls: CapturedCommandCall[] = []; + const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { + calls.push({ command, logPrefix, useShell, opts }); + if (command[1] === 'describe-ui') { + return { + success: true, + output: JSON.stringify({ elements: [createNode()] }), + error: undefined, + process: mockProcess, + }; + } + return { success: true, output: 'ok', error: undefined, process: mockProcess }; + }; + + return { calls, executor }; +} + +export function createFailingExecutor(error: string): CommandExecutor { + return async () => ({ success: false, output: '', error, process: mockProcess }); +} + +export function createSequencedExecutor( + results: Array<{ success: boolean; output?: string; error?: string }>, + options: { describeUiAfterSequence?: boolean } = {}, +): { + calls: CapturedCommandCall[]; + executor: CommandExecutor; +} { + const calls: CapturedCommandCall[] = []; + let index = 0; + const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { + calls.push({ command, logPrefix, useShell, opts }); + if (options.describeUiAfterSequence === true && command[1] === 'describe-ui') { + return { + success: true, + output: JSON.stringify({ elements: [createNode()] }), + error: undefined, + process: mockProcess, + }; + } + const result = results[index] ?? results.at(-1) ?? { success: true }; + index += 1; + return { + success: result.success, + output: result.output ?? '', + error: result.error, + process: mockProcess, + }; + }; + + return { calls, executor }; +} + +export function createNode(overrides: Partial = {}): AccessibilityNode { + return { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + AXLabel: 'Continue', + ...overrides, + }; +} + +export function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} diff --git a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts new file mode 100644 index 000000000..6a75e3d8c --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts @@ -0,0 +1,948 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import * as z from 'zod'; +import type { + AccessibilityNode, + CaptureResultDomainResult, +} from '../../../../types/domain-results.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import type { DebuggerBackend } from '../../../../utils/debugger/backends/DebuggerBackend.ts'; +import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, + recordRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { handler, schema, wait_for_uiLogic } from '../wait_for_ui.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, +} from './ui-action-test-helpers.ts'; + +const simulatorId = '12E2CB7E-780E-467B-BE90-2917AB236F77'; + +function hierarchyJson(nodes: Array>): string { + return JSON.stringify({ elements: nodes }); +} + +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + +function createTiming(startMs = 0): { + timing: { now: () => number; sleep: (durationMs: number) => Promise }; + getNow: () => number; +} { + let nowMs = startMs; + return { + timing: { + now: () => nowMs, + sleep: async (durationMs) => { + nowMs += durationMs; + }, + }, + getNow: () => nowMs, + }; +} + +async function createStoppedDebuggerManager(): Promise { + const backend: DebuggerBackend = { + kind: 'lldb-cli', + attach: async () => {}, + detach: async () => {}, + runCommand: async () => '', + resume: async () => {}, + addBreakpoint: async (spec) => ({ id: 1, spec, rawOutput: '' }), + removeBreakpoint: async () => '', + getStack: async () => '', + getVariables: async () => '', + getExecutionState: async () => ({ status: 'stopped', reason: 'breakpoint' }), + dispose: async () => {}, + }; + const manager = new DebuggerManager({ backendFactory: async () => backend }); + const session = await manager.createSession({ simulatorId, pid: 12345 }); + manager.setCurrentSession(session.id); + return manager; +} + +async function runWaitForUi( + params: Parameters[0], + executor: CommandExecutor, + timing = createTiming().timing, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => wait_for_uiLogic(params, executor, createMockAxeHelpers(), undefined, timing)); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as CaptureResultDomainResult; +} + +function firstRuntimeLabel(result: CaptureResultDomainResult): string | undefined { + return result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' + ? result.capture.elements[0]?.label + : undefined; +} + +describe('Wait for UI Plugin', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes public selector fields without simulatorId in the public schema', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('predicate'); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('identifier'); + expect(schema).toHaveProperty('label'); + expect(schema).toHaveProperty('role'); + expect(schema).toHaveProperty('value'); + expect(schema).toHaveProperty('text'); + expect(schema).not.toHaveProperty('simulatorId'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ predicate: 'settled' }).success).toBe(true); + expect( + schemaObject.safeParse({ predicate: 'exists', identifier: 'continue-button' }).success, + ).toBe(true); + expect( + schemaObject.safeParse({ predicate: 'gone', label: 'Loading', role: 'text' }).success, + ).toBe(true); + expect(schemaObject.safeParse({ predicate: 'textContains', text: 'Ready' }).success).toBe( + true, + ); + }); + + it('requires simulatorId session default before validation', async () => { + const result = await callHandler(handler, { predicate: 'settled' }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + + it('requires textContains text through handler validation', async () => { + const result = await callHandler(handler, { + simulatorId, + predicate: 'textContains', + identifier: 'status', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('textContains waits require text'); + }); + + it('rejects whitespace-only text through handler validation', async () => { + const result = await callHandler(handler, { + simulatorId, + predicate: 'textContains', + identifier: 'status', + text: ' ', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('textContains waits require text'); + }); + + it('allows text on gone waits for loading messages', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + ( + handler as unknown as ( + args: Record, + executor: CommandExecutor, + ) => Promise + )({ simulatorId, predicate: 'gone', text: 'Loading', timeoutMs: 0 }, executor), + ); + + expect(ctx.structuredOutput?.result.didError).toBe(false); + }); + + it('rejects unknown fields instead of silently broadening wait selectors', async () => { + const result = await callHandler(handler, { + simulatorId, + predicate: 'textContains', + text: 'Portland', + selector: { role: 'button' }, + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Unrecognized key: "selector"'); + }); + + it('ignores unrelated project session defaults before strict validation', async () => { + sessionStore.setDefaults({ + simulatorId, + projectPath: '/tmp/App.xcodeproj', + scheme: 'App', + simulatorName: 'iPhone 17 Pro', + simulatorPlatform: 'iOS Simulator', + }); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + ( + handler as unknown as ( + args: Record, + executor: CommandExecutor, + ) => Promise + )({ predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, executor), + ); + + expect(ctx.structuredOutput?.result.didError).toBe(false); + expect(calls[0]?.command.slice(1)).toEqual(['describe-ui', '--udid', simulatorId]); + }); + }); + + it('uses the resolved simulatorId in next-step params', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + const { result, run } = createMockToolHandlerContext(); + + await run(() => + wait_for_uiLogic( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + createMockAxeHelpers(), + undefined, + createTiming().timing, + ), + ); + + expect(result.nextStepParams).toEqual({ + snapshot_ui: { simulatorId }, + wait_for_ui: { simulatorId, predicate: 'settled' }, + }); + }); + + it('does not suggest follow-up steps when the wait fails', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Loading' })]) }, + ]); + const { result, ctx, run } = createMockToolHandlerContext(); + + await run(() => + wait_for_uiLogic( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + createMockAxeHelpers(), + undefined, + createTiming().timing, + ), + ); + + expect(ctx.structuredOutput?.result.didError).toBe(true); + expect(result.nextStepParams).toBeUndefined(); + }); + + it('converts elementRef to identifier before polling', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button', AXLabel: 'Continue' })], 0); + const { calls, executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXUniqueId: 'continue-button', AXLabel: 'Continue now' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + screenHash: expect.any(String), + seq: 2, + elements: [expect.objectContaining({ ref: 'e1', identifier: 'continue-button' })], + }), + ); + expect(calls[0]?.command).toEqual(['/mocked/axe/path', 'describe-ui', '--udid', simulatorId]); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('converts elementRef to label plus role when no identifier exists', async () => { + recordSnapshot([createNode({ AXLabel: 'Continue', AXUniqueId: undefined })], 0); + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([createNode({ AXLabel: 'Continue', AXUniqueId: undefined })]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(firstRuntimeLabel(result)).toBe('Continue'); + }); + + it('converts elementRef to value plus role when no identifier or label exists', async () => { + recordSnapshot( + [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + title: null, + help: null, + AXValue: 'Email', + AXUniqueId: undefined, + }), + ], + 0, + ); + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + title: null, + help: null, + AXValue: 'Email', + AXUniqueId: undefined, + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('rejects elementRef without a stable identifier, label, or value selector', async () => { + recordSnapshot( + [ + createNode({ + AXLabel: null, + title: null, + help: null, + AXValue: null, + AXUniqueId: undefined, + }), + ], + 0, + ); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode()]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_FOUND', elementRef: 'e1' }); + expect(calls).toEqual([]); + }); + + it('matches explicit selector fields by exact AND', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Submit', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Submit', role: 'AXButton', type: 'Button' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', label: 'Submit', role: 'button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('allows multiple matches for exists', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', label: 'Duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('succeeds for gone when selector count is zero', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', label: 'Loading', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + }); + + it('succeeds for selector-free gone when no element contains text', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', text: 'Loading weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + }); + + it('times out for selector-free gone while an element contains text', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Loading weather...' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', text: 'Loading weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + candidates: [expect.objectContaining({ label: 'Loading weather...' })], + }); + }); + + it('succeeds for gone when selector matches remain but none contain text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Loading weather...', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Ready', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', role: 'text', text: 'Searching weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + }); + + it('times out for gone when selector matches contain text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Loading weather...', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Ready', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', role: 'text', text: 'Loading weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + candidates: [expect.objectContaining({ label: 'Loading weather...' })], + }); + }); + + it('returns TARGET_AMBIGUOUS when focused selector matches multiple elements', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', label: 'Duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: expect.arrayContaining([ + expect.objectContaining({ label: 'Duplicate' }), + expect.objectContaining({ label: 'Duplicate' }), + ]), + }); + }); + + it('returns TARGET_NOT_ACTIONABLE when focused state is unavailable', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + AXUniqueId: 'email-field', + role: 'AXTextField', + type: 'TextField', + AXLabel: null, + AXValue: 'hello@example.com', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', identifier: 'email-field', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + message: 'The matched runtime UI element does not expose focus state.', + candidates: [expect.objectContaining({ identifier: 'email-field' })], + }); + }); + + it('succeeds for focused when the matched element is focused', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + AXUniqueId: 'email-field', + role: 'AXTextField', + type: 'TextField', + AXLabel: null, + AXValue: 'hello@example.com', + AXFocused: true, + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', identifier: 'email-field', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('times out with latest snapshot and candidates for unresolved enabled state', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([createNode({ AXUniqueId: 'login-button', enabled: false })]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', identifier: 'login-button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + timeoutMs: 0, + candidates: [expect.objectContaining({ identifier: 'login-button' })], + }); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('includes empty candidates and exact-match guidance for selector timeouts with zero matches', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXUniqueId: 'other-button' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', identifier: 'missing-button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + candidates: [], + recoveryHint: + 'Selector fields match exact values. Use textContains for partial visible text, inspect the latest runtime snapshot, or adjust the wait selector.', + }); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + }); + + it('checks textContains against normalized case-insensitive value before label', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXUniqueId: 'status', AXLabel: 'Loading', AXValue: 'Server Ready' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { + simulatorId, + predicate: 'textContains', + identifier: 'status', + text: 'server ready', + timeoutMs: 0, + }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('narrows selector matches by text before treating textContains as ambiguous', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Close', role: 'AXButton', type: 'Button' }), + createNode({ + AXLabel: 'Lisbon, Portugal, 9:24 PM ¡ Sunny', + role: 'AXButton', + type: 'Button', + }), + createNode({ AXLabel: 'Clear search', role: 'AXButton', type: 'Button' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'button', text: 'Lisbon', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('returns TARGET_AMBIGUOUS for textContains when selector plus text still matches multiple elements', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Lisbon saved', role: 'AXButton', type: 'Button' }), + createNode({ AXLabel: 'Lisbon details', role: 'AXButton', type: 'Button' }), + createNode({ AXLabel: 'Lisbon', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'button', text: 'Lisbon', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: [ + expect.objectContaining({ label: 'Lisbon saved' }), + expect.objectContaining({ label: 'Lisbon details' }), + ], + }); + }); + + it('supports selector-free textContains when exactly one element matches', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Header' }), + createNode({ AXLabel: 'Light rain is expected around 2 PM.' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'Light rain', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + expect(result.waitMatch).toMatchObject({ + predicate: 'textContains', + matches: [expect.objectContaining({ label: 'Light rain is expected around 2 PM.' })], + }); + }); + + it('succeeds for selector-free textContains when multiple candidates share matching visible text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'You just pressed the button!' }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + AXValue: 'You just pressed the button!', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'you just pressed', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toMatchObject({ + predicate: 'textContains', + matches: [ + expect.objectContaining({ label: 'You just pressed the button!' }), + expect.objectContaining({ value: 'You just pressed the button!' }), + ], + }); + }); + + it('succeeds for selector textContains when multiple candidates share matching visible text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate status', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Duplicate status', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'text', text: 'duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('succeeds for selector-free textContains when multiple candidates exactly match', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Hello from rs1' }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + AXValue: 'Hello from rs1', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'hello from rs1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('returns TARGET_AMBIGUOUS for selector-free textContains with mixed partial matches', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready' }), + createNode({ AXLabel: 'Ready now' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: [ + expect.objectContaining({ label: 'Ready' }), + expect.objectContaining({ label: 'Ready now' }), + ], + }); + }); + + it('preserves the runtime store when every poll returns unparsable UI', async () => { + recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const previousSnapshot = getRuntimeSnapshot(simulatorId, 0); + const { executor } = createSequencedExecutor([{ success: true, output: 'not json' }]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'settled', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toEqual( + expect.objectContaining({ + code: 'SNAPSHOT_PARSE_FAILED', + recoveryHint: 'Retry after the app is fully launched and responsive.', + }), + ); + expect(getRuntimeSnapshot(simulatorId, 0)).toBe(previousSnapshot); + }); + + it('records empty UI payloads and times out with empty candidates', async () => { + recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const { executor } = createSequencedExecutor([{ success: true, output: '[]' }]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', label: 'Ready', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'WAIT_TIMEOUT', candidates: [] }); + expect(result.capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + elements: [], + actions: [], + }), + ); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('succeeds for gone when an empty UI payload has no matching elements', async () => { + const { executor } = createSequencedExecutor([{ success: true, output: '{"elements": []}' }]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', label: 'Loading', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + expect(result.capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + elements: [], + actions: [], + }), + ); + }); + + it('preserves the runtime store when the debugger guard blocks before polling', async () => { + recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const previousSnapshot = getRuntimeSnapshot(simulatorId, 0); + const stoppedDebugger = await createStoppedDebuggerManager(); + const guardedExecutor: CommandExecutor = async () => { + throw new Error('AXe should not run when debugger guard blocks'); + }; + + try { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + wait_for_uiLogic( + { simulatorId, predicate: 'settled', timeoutMs: 0 }, + guardedExecutor, + createMockAxeHelpers(), + stoppedDebugger, + createTiming().timing, + ), + ); + + const result = ctx.structuredOutput?.result as CaptureResultDomainResult; + expect(result.didError).toBe(true); + expect(result.uiError).toEqual( + expect.objectContaining({ + code: 'ACTION_FAILED', + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }), + ); + expect(getRuntimeSnapshot(simulatorId, 0)).toBe(previousSnapshot); + } finally { + await stoppedDebugger.disposeAll(); + } + }); + + it('waits until runtime snapshot element signatures remain settled', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Loading', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + ]); + const { timing, getNow } = createTiming(); + + const result = await runWaitForUi( + { + simulatorId, + predicate: 'settled', + timeoutMs: 500, + pollIntervalMs: 100, + settledDurationMs: 100, + }, + executor, + timing, + ); + + expect(result.didError).toBe(false); + expect(getNow()).toBe(200); + expect(firstRuntimeLabel(result)).toBe('Ready'); + }); +}); diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts new file mode 100644 index 000000000..76687d4d9 --- /dev/null +++ b/src/mcp/tools/ui-automation/batch.ts @@ -0,0 +1,275 @@ +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { createSemanticTapBatchSteps, createSemanticTapCommand } from './shared/semantic-tap.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; +import { + createUiActionFailureResult, + createUiActionSuccessResult, + createUiAutomationRecoverableError, + mapAxeCommandError, + setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, +} from './shared/domain-result.ts'; + +const batchStepSchema = z.strictObject({ + action: z.literal('tap'), + elementRef: z + .string() + .min(1, { message: 'elementRef must be non-empty' }) + .describe('Runtime elementRef from the latest snapshot_ui or wait_for_ui output'), + preDelay: z + .number() + .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) + .optional() + .describe('Seconds before this step. Omit for switch elementRefs.'), + postDelay: z + .number() + .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) + .optional() + .describe('Seconds after this step. Omit for switch elementRefs.'), +}); + +const batchSchema = z.strictObject({ + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + steps: z + .array(batchStepSchema) + .min(1, { message: 'At least one batch step is required' }) + .max(100, { message: 'At most 100 batch steps are supported' }) + .describe( + 'Required array of step objects, for example [{"action":"tap","elementRef":"e1"}]. Do not use commands or raw command strings.', + ), + axCache: z.enum(['perBatch', 'perStep', 'none']).optional(), + waitTimeout: z.number().min(0, { message: 'waitTimeout must be non-negative' }).optional(), + pollInterval: z.number().positive({ message: 'pollInterval must be greater than 0' }).optional(), +}); + +type BatchParams = z.infer; +type BatchResult = UiActionResultDomainResult; + +const LOG_PREFIX = '[AXe]'; + +function compactBatchElementValue(value: string | undefined): string { + return value?.trim().toLowerCase() ?? ''; +} + +function isSafeSameScreenBatchElement(element: { + role?: string; + state?: { selected?: boolean }; + value?: string; +}): boolean { + const value = compactBatchElementValue(element.value); + const isAlreadyActive = + element.state?.selected === true || value === 'selected' || value === '1' || value === 'on'; + if (isAlreadyActive || element.role === 'tab') { + return false; + } + + if (element.role !== 'switch') { + return false; + } + + return ( + element.state?.selected === false || + value === 'not selected' || + value === '0' || + value === 'off' + ); +} + +function buildBatchCommandArgs(params: BatchParams, resolvedSteps: readonly string[]): string[] { + const commandArgs = ['batch']; + for (const step of resolvedSteps) { + commandArgs.push('--step', step); + } + if (params.axCache !== undefined) { + commandArgs.push('--ax-cache', params.axCache); + } + if (params.waitTimeout !== undefined) { + commandArgs.push('--wait-timeout', String(params.waitTimeout)); + } + if (params.pollInterval !== undefined) { + commandArgs.push('--poll-interval', String(params.pollInterval)); + } + return commandArgs; +} + +function resolveBatchSteps( + params: BatchParams, +): { ok: true; steps: string[]; preserveSnapshot: boolean } | { ok: false; result: BatchResult } { + const resolvedSteps: string[] = []; + let preserveSnapshot = true; + + for (const step of params.steps) { + const resolution = resolveElementRef(params.simulatorId, step.elementRef, 'tap'); + if (!resolution.ok) { + return { + ok: false, + result: createUiActionFailureResult( + { type: 'batch' as const, stepCount: params.steps.length }, + params.simulatorId, + resolution.error.message, + { uiError: resolution.error }, + ), + }; + } + + const usesTouchActivation = resolution.element.publicElement.role === 'switch'; + preserveSnapshot &&= isSafeSameScreenBatchElement(resolution.element.publicElement); + if (usesTouchActivation && (step.preDelay !== undefined || step.postDelay !== undefined)) { + const message = + 'preDelay and postDelay are not supported for switch elementRefs because switches execute as touch down/up batch steps.'; + return { + ok: false, + result: createUiActionFailureResult( + { type: 'batch' as const, stepCount: params.steps.length }, + params.simulatorId, + message, + { + uiError: { + code: 'ACTION_FAILED', + message, + recoveryHint: + 'Remove preDelay/postDelay from switch steps, or wait between separate batch calls.', + elementRef: step.elementRef, + }, + }, + ), + }; + } + + const extraArgs: string[] = []; + if (step.preDelay !== undefined) { + extraArgs.push('--pre-delay', String(step.preDelay)); + } + if (step.postDelay !== undefined) { + extraArgs.push('--post-delay', String(step.postDelay)); + } + + const tapCommand = createSemanticTapCommand( + resolution.element, + step.elementRef, + extraArgs, + resolution.snapshot.elements, + ); + resolvedSteps.push(...createSemanticTapBatchSteps(tapCommand)); + } + + return { ok: true, steps: resolvedSteps, preserveSnapshot }; +} + +export function createBatchExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): NonStreamingExecutor { + return async (params) => { + const toolName = 'batch'; + const { simulatorId, steps } = params; + const action = { type: 'batch' as const, stepCount: steps.length }; + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); + } + + const resolvedSteps = resolveBatchSteps(params); + if (!resolvedSteps.ok) { + return resolvedSteps.result; + } + + const commandArgs = buildBatchCommandArgs(params, resolvedSteps.steps); + log('info', `${LOG_PREFIX}/${toolName}: Starting ${steps.length} step batch on ${simulatorId}`); + + try { + await executeAxeCommand(commandArgs, simulatorId, 'batch', executor, axeHelpers); + if (!resolvedSteps.preserveSnapshot) { + clearRuntimeSnapshot(simulatorId); + } + log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to execute AXe batch with ${steps.length} steps.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message, { + details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + }), + }); + } + + if (resolvedSteps.preserveSnapshot) { + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); + }; +} + +export async function batchLogic( + params: BatchParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): Promise { + const ctx = getHandlerContext(); + const executeBatch = createBatchExecutor(executor, axeHelpers, debuggerManager); + const result = await executeBatch(params); + + setUiActionStructuredOutput(ctx, result); +} + +const publicSchemaObject = z.strictObject(batchSchema.omit({ simulatorId: true } as const).shape); + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: batchSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(batchSchema), + logicFunction: (params: BatchParams, executor: CommandExecutor) => + batchLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/mcp/tools/ui-automation/button.ts b/src/mcp/tools/ui-automation/button.ts index 82ca07bbd..e3d52e3c9 100644 --- a/src/mcp/tools/ui-automation/button.ts +++ b/src/mcp/tools/ui-automation/button.ts @@ -12,6 +12,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -20,6 +21,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const buttonSchema = z.object({ @@ -38,11 +40,19 @@ type ButtonParams = z.infer; type ButtonResult = UiActionResultDomainResult; const LOG_PREFIX = '[AXe]'; +const DEFAULT_BUTTON_SETTLE_DELAY_MS = 750; + +function delayMs(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} export function createButtonExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + settleDelayMs = DEFAULT_BUTTON_SETTLE_DELAY_MS, ): NonStreamingExecutor { return async (params) => { const toolName = 'button'; @@ -67,9 +77,16 @@ export function createButtonExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'button', executor, axeHelpers); + if (settleDelayMs > 0) { + await delayMs(settleDelayMs); + } + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to press button '${buttonType}'.`, }); @@ -86,9 +103,10 @@ export async function buttonLogic( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + settleDelayMs = DEFAULT_BUTTON_SETTLE_DELAY_MS, ): Promise { const ctx = getHandlerContext(); - const executeButton = createButtonExecutor(executor, axeHelpers, debuggerManager); + const executeButton = createButtonExecutor(executor, axeHelpers, debuggerManager, settleDelayMs); const result = await executeButton(params); setUiActionStructuredOutput(ctx, result); diff --git a/src/mcp/tools/ui-automation/drag.ts b/src/mcp/tools/ui-automation/drag.ts new file mode 100644 index 000000000..80a20404e --- /dev/null +++ b/src/mcp/tools/ui-automation/drag.ts @@ -0,0 +1,244 @@ +/** + * UI Testing Plugin: Drag + * + * Drags from a semantic UI element from the runtime snapshot store. + */ + +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { + getRuntimeElementDirectionalDragPoints, + getRuntimeElementCenter, + getRuntimeElementSwipePoints, +} from './shared/runtime-snapshot.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +export type { AxeHelpers } from './shared/axe-command.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; +import { + createUiActionFailureResult, + createUiActionSuccessResult, + createUiAutomationRecoverableError, + mapAxeCommandError, + setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, +} from './shared/domain-result.ts'; + +const dragSchema = z.object({ + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + elementRef: z + .string() + .min(1, { message: 'elementRef must be non-empty' }) + .describe('Runtime elementRef from the latest snapshot_ui or wait_for_ui output'), + direction: z + .enum(['up', 'down', 'left', 'right']) + .describe('Drag direction: up, down, left, or right'), + duration: z + .number() + .positive({ message: 'Duration must be greater than 0 seconds' }) + .optional() + .describe('seconds'), + distance: z + .number() + .positive({ message: 'Distance must be greater than 0' }) + .max(1, { message: 'Distance must be at most 1' }) + .optional() + .describe( + 'Normalized drag distance greater than 0 and up to 1 within the resolved element or viewport', + ), + steps: z + .number() + .int({ message: 'Steps must be an integer' }) + .min(1, { message: 'Steps must be at least 1' }) + .max(1000, { message: 'Steps must be at most 1000' }) + .optional(), + preDelay: z + .number() + .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) + .optional() + .describe('seconds'), + postDelay: z + .number() + .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) + .optional() + .describe('seconds'), +}); + +export type DragParams = z.infer; +type DragResult = UiActionResultDomainResult; + +const publicSchemaObject = z.strictObject(dragSchema.omit({ simulatorId: true } as const).shape); + +const LOG_PREFIX = '[AXe]'; + +export function createDragExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): NonStreamingExecutor { + return async (params) => { + const toolName = 'drag'; + const { simulatorId, elementRef, direction, duration, distance, steps, preDelay, postDelay } = + params; + const unresolvedAction = { + type: 'drag' as const, + elementRef, + direction, + ...(duration !== undefined ? { durationSeconds: duration } : {}), + ...(steps !== undefined ? { steps } : {}), + }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'touch'); + if (!resolution.ok) { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const viewportFrame = resolution.snapshot.elements[0]?.publicElement.frame; + const points = resolution.element.publicElement.actions.includes('swipeWithin') + ? getRuntimeElementSwipePoints(resolution.element, direction, distance) + : getRuntimeElementDirectionalDragPoints( + resolution.element, + direction, + distance, + viewportFrame, + ); + if (!points.ok) { + const uiError = createUiAutomationRecoverableError({ + code: 'TARGET_NOT_ACTIONABLE', + message: points.message, + elementRef, + }); + return createUiActionFailureResult(unresolvedAction, simulatorId, points.message, { + uiError, + }); + } + + const action = { + ...unresolvedAction, + from: points.from, + to: points.to, + }; + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); + } + + const commandArgs = [ + 'drag', + '--start-x', + String(points.from.x), + '--start-y', + String(points.from.y), + '--end-x', + String(points.to.x), + '--end-y', + String(points.to.y), + ]; + if (duration !== undefined) { + commandArgs.push('--duration', String(duration)); + } + if (steps !== undefined) { + commandArgs.push('--steps', String(steps)); + } + if (preDelay !== undefined) { + commandArgs.push('--pre-delay', String(preDelay)); + } + if (postDelay !== undefined) { + commandArgs.push('--post-delay', String(postDelay)); + } + + const target = getRuntimeElementCenter(resolution.element); + const optionsText = duration !== undefined ? ` duration=${duration}s` : ''; + log( + 'info', + `${LOG_PREFIX}/${toolName}: Starting ${direction} drag from ${elementRef} at (${target.x}, ${target.y})${optionsText} on ${simulatorId}`, + ); + + try { + await executeAxeCommand(commandArgs, simulatorId, 'drag', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); + log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to simulate ${direction} drag from ${elementRef}.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message, { + details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), + }); + } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + previousRuntimeSnapshot: resolution.snapshot.payload, + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); + }; +} + +export async function dragLogic( + params: DragParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): Promise { + const ctx = getHandlerContext(); + const executeDrag = createDragExecutor(executor, axeHelpers, debuggerManager); + const result = await executeDrag(params); + + setUiActionStructuredOutput(ctx, result); +} + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: dragSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(dragSchema), + logicFunction: (params: DragParams, executor: CommandExecutor) => + dragLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/mcp/tools/ui-automation/gesture.ts b/src/mcp/tools/ui-automation/gesture.ts index 447074cb3..46a521197 100644 --- a/src/mcp/tools/ui-automation/gesture.ts +++ b/src/mcp/tools/ui-automation/gesture.ts @@ -19,6 +19,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -27,6 +28,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const gestureSchema = z.object({ @@ -49,6 +51,7 @@ const gestureSchema = z.object({ .number() .int() .min(1) + .max(2000) .optional() .describe( 'Screen width in pixels. Used for gesture calculations. Auto-detected if not provided.', @@ -57,6 +60,7 @@ const gestureSchema = z.object({ .number() .int() .min(1) + .max(3000) .optional() .describe( 'Screen height in pixels. Used for gesture calculations. Auto-detected if not provided.', @@ -64,21 +68,25 @@ const gestureSchema = z.object({ duration: z .number() .min(0, { message: 'Duration must be non-negative' }) + .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('Duration of the gesture in seconds.'), delta: z .number() .min(0, { message: 'Delta must be non-negative' }) + .max(200, { message: 'Delta must be at most 200' }) .optional() .describe('Distance to move in pixels.'), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('Delay before starting the gesture in seconds.'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('Delay after completing the gesture in seconds.'), }); @@ -132,9 +140,13 @@ export function createGestureExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'gesture', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to execute gesture '${preset}'.`, }); diff --git a/src/mcp/tools/ui-automation/key_press.ts b/src/mcp/tools/ui-automation/key_press.ts index 7c8afc647..93fd89890 100644 --- a/src/mcp/tools/ui-automation/key_press.ts +++ b/src/mcp/tools/ui-automation/key_press.ts @@ -12,6 +12,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -20,6 +21,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const keyPressSchema = z.object({ @@ -29,7 +31,7 @@ const keyPressSchema = z.object({ .int({ message: 'HID keycode to press (0-255)' }) .min(0) .max(255) - .describe('HID keycode'), + .describe('HID keycode. Common values: 40 Return/Enter, 42 Backspace, 43 Tab, 44 Space.'), duration: z .number() .min(0, { message: 'Duration must be non-negative' }) @@ -70,9 +72,13 @@ export function createKeyPressExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'key', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to simulate key press (code: ${keyCode}).`, }); diff --git a/src/mcp/tools/ui-automation/key_sequence.ts b/src/mcp/tools/ui-automation/key_sequence.ts index 95cafe611..998ad7f56 100644 --- a/src/mcp/tools/ui-automation/key_sequence.ts +++ b/src/mcp/tools/ui-automation/key_sequence.ts @@ -18,6 +18,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -26,6 +27,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const keySequenceSchema = z.object({ @@ -33,8 +35,13 @@ const keySequenceSchema = z.object({ keyCodes: z .array(z.number().int().min(0).max(255)) .min(1, { message: 'At least one key code required' }) - .describe('HID keycodes'), - delay: z.number().min(0, { message: 'Delay must be non-negative' }).optional(), + .max(100, { message: 'At most 100 key codes are supported' }) + .describe('HID keycodes. Common values: 40 Return/Enter, 42 Backspace, 43 Tab, 44 Space.'), + delay: z + .number() + .min(0, { message: 'Delay must be non-negative' }) + .max(5, { message: 'Delay must be at most 5 seconds' }) + .optional(), }); type KeySequenceParams = z.infer; @@ -73,9 +80,13 @@ export function createKeySequenceExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'key-sequence', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to execute key sequence.', }); diff --git a/src/mcp/tools/ui-automation/long_press.ts b/src/mcp/tools/ui-automation/long_press.ts index 4a202066d..9bd5039ec 100644 --- a/src/mcp/tools/ui-automation/long_press.ts +++ b/src/mcp/tools/ui-automation/long_press.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Long Press * - * Long press at specific coordinates for given duration (ms). - * Use snapshot_ui for precise coordinates (don't guess from screenshots). + * Long presses a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,25 +17,29 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const longPressSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z.number().int({ message: 'X coordinate for the long press' }), - y: z.number().int({ message: 'Y coordinate for the long press' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), duration: z .number() - .positive({ message: 'Duration of the long press in milliseconds' }) + .positive({ message: 'Duration must be greater than 0 milliseconds' }) + .max(10_000, { message: 'Duration must be at most 10000 milliseconds' }) .describe('milliseconds'), }); @@ -56,8 +59,18 @@ export function createLongPressExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'long_press'; - const { simulatorId, x, y, duration } = params; - const action = { type: 'long-press' as const, x, y, durationMs: duration }; + const { simulatorId, elementRef, duration } = params; + const unresolvedAction = { type: 'long-press' as const, elementRef, durationMs: duration }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'longPress'); + if (!resolution.ok) { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const center = getRuntimeElementActivationPoint(resolution.element); + const action = { ...unresolvedAction, x: center.x, y: center.y }; const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -68,13 +81,13 @@ export function createLongPressExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const delayInSeconds = Number(duration) / 1000; + const delayInSeconds = duration / 1000; const commandArgs = [ 'touch', '-x', - String(x), + String(center.x), '-y', - String(y), + String(center.y), '--down', '--up', '--delay', @@ -83,25 +96,45 @@ export function createLongPressExecutor( log( 'info', - `${LOG_PREFIX}/${toolName}: Starting for (${x}, ${y}), ${duration}ms on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting for elementRef ${elementRef}, ${duration}ms on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'touch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => `Failed to simulate long press at (${x}, ${y}).`, + axeFailureMessage: () => `Failed to simulate long press on elementRef ${elementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/mcp/tools/ui-automation/screenshot.ts b/src/mcp/tools/ui-automation/screenshot.ts index 426b5663d..945c269f9 100644 --- a/src/mcp/tools/ui-automation/screenshot.ts +++ b/src/mcp/tools/ui-automation/screenshot.ts @@ -60,6 +60,44 @@ interface SimctlDeviceList { devices: Record; } +async function getSimulatorDeviceForSimulatorId( + simulatorId: string, + executor: CommandExecutor, +): Promise { + const listCommand = ['xcrun', 'simctl', 'list', 'devices', '-j']; + const result = await executor(listCommand, `${LOG_PREFIX}: list devices`, false); + + if (!result.success || !result.output) { + return null; + } + + const data = JSON.parse(result.output) as SimctlDeviceList; + for (const devices of Object.values(data.devices)) { + const match = devices.find((device) => device.udid === simulatorId); + if (match) { + return match; + } + } + + return null; +} + +async function assertSimulatorBooted( + simulatorId: string, + executor: CommandExecutor, +): Promise { + const device = await getSimulatorDeviceForSimulatorId(simulatorId, executor); + if (!device) { + throw new SystemError(`Simulator ${simulatorId} was not found.`); + } + if (device.state !== 'Booted') { + throw new SystemError( + `Simulator ${simulatorId} is ${device.state ?? 'not booted'}. Boot the simulator and try again.`, + ); + } + return device; +} + function escapeSwiftStringLiteral(value: string): string { return value .replace(/\\/g, '\\\\') @@ -96,21 +134,10 @@ export async function getDeviceNameForSimulatorId( executor: CommandExecutor, ): Promise { try { - const listCommand = ['xcrun', 'simctl', 'list', 'devices', '-j']; - const result = await executor(listCommand, `${LOG_PREFIX}: list devices`, false); - - if (result.success && result.output) { - const data = JSON.parse(result.output) as SimctlDeviceList; - const devices = data.devices; - - for (const runtime of Object.keys(devices)) { - for (const device of devices[runtime]) { - if (device.udid === simulatorId) { - log('info', `${LOG_PREFIX}: Found device name "${device.name}" for ${simulatorId}`); - return device.name; - } - } - } + const device = await getSimulatorDeviceForSimulatorId(simulatorId, executor); + if (device) { + log('info', `${LOG_PREFIX}: Found device name "${device.name}" for ${simulatorId}`); + return device.name; } log('warn', `${LOG_PREFIX}: Could not find device name for ${simulatorId}`); return null; @@ -219,6 +246,7 @@ export function createScreenshotExecutor( ); try { + const simulatorDevice = await assertSimulatorBooted(simulatorId, executor); const result = await executor(commandArgs, `${LOG_PREFIX}: screenshot`, false); if (!result.success) { @@ -228,8 +256,7 @@ export function createScreenshotExecutor( log('info', `${LOG_PREFIX}/screenshot: Success for ${simulatorId}`); try { - const deviceName = await getDeviceNameForSimulatorId(simulatorId, executor); - const isLandscape = await detectLandscapeMode(executor, deviceName ?? undefined); + const isLandscape = await detectLandscapeMode(executor, simulatorDevice.name); if (isLandscape) { log('info', `${LOG_PREFIX}/screenshot: Landscape mode detected, rotating +90`); const rotated = await rotateImage(screenshotPath, 90, executor); diff --git a/src/mcp/tools/ui-automation/shared/domain-result.ts b/src/mcp/tools/ui-automation/shared/domain-result.ts index 0700a9896..8d1eb2d20 100644 --- a/src/mcp/tools/ui-automation/shared/domain-result.ts +++ b/src/mcp/tools/ui-automation/shared/domain-result.ts @@ -1,4 +1,5 @@ -import type { ToolHandlerContext } from '../../../../rendering/types.ts'; +import type { RenderHints, ToolHandlerContext } from '../../../../rendering/types.ts'; +import type { NextStep } from '../../../../types/common.ts'; import type { BasicDiagnostics, CapturePayload, @@ -6,12 +7,34 @@ import type { UiAction, UiActionResultDomainResult, } from '../../../../types/domain-results.ts'; +import type { + RuntimeElementV1, + RuntimeSnapshotV1, + UiAutomationRecoverableError, + UiAutomationRecoverableErrorCode, + UiWaitMatch, +} from '../../../../types/ui-snapshot.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { createBasicDiagnostics } from '../../../../utils/diagnostics.ts'; import { AxeError, DependencyError, SystemError } from '../../../../utils/errors.ts'; +import { + createRuntimeSnapshotNextSteps, + getForegroundCompletionSuppressedRuntimeTargetRefs, +} from './runtime-next-steps.ts'; +import type { + RuntimeSnapshotNextStepActionContext, + RuntimeSnapshotNextStepActionTarget, +} from './runtime-next-steps.ts'; const UI_ACTION_SCHEMA = 'xcodebuildmcp.output.ui-action-result'; const CAPTURE_SCHEMA = 'xcodebuildmcp.output.capture-result'; +const REFRESH_SNAPSHOT_RECOVERY_HINT = + 'Run snapshot_ui again and retry with a current element reference from the refreshed snapshot.'; + +const uiActionNextStepContexts = new WeakMap< + UiActionResultDomainResult, + RuntimeSnapshotNextStepActionContext +>(); function createDiagnostics( warnings: readonly string[] = [], @@ -28,20 +51,108 @@ function compact(values: Array): string[] { return values.filter((value): value is string => typeof value === 'string' && value.length > 0); } +function createUiActionSuccessNextSteps(result: UiActionResultDomainResult): NextStep[] { + if (result.didError) { + return []; + } + + return [ + { + label: 'Refresh after UI action', + tool: 'snapshot_ui', + params: { simulatorId: result.artifacts.simulatorId }, + }, + ]; +} + +function getUiActionTargetRef(action: UiAction): string | null { + switch (action.type) { + case 'tap': + case 'touch': + case 'long-press': + case 'type-text': + return action.elementRef; + case 'swipe': + return action.withinElementRef; + case 'drag': + return action.elementRef; + default: + return null; + } +} + +function createNextStepActionTarget( + element: RuntimeElementV1, +): RuntimeSnapshotNextStepActionTarget { + return { + ...(element.label !== undefined ? { label: element.label } : {}), + ...(element.value !== undefined ? { value: element.value } : {}), + ...(element.identifier !== undefined ? { identifier: element.identifier } : {}), + ...(element.role !== undefined ? { role: element.role } : {}), + ...(element.state !== undefined ? { state: element.state } : {}), + }; +} + +function findUiActionTargetElement( + action: UiAction, + runtimeSnapshot: RuntimeSnapshotV1, +): RuntimeElementV1 | null { + const targetRef = getUiActionTargetRef(action); + if (!targetRef) { + return null; + } + + return runtimeSnapshot.elements.find((element) => element.ref === targetRef) ?? null; +} + +export function createUiAutomationRecoverableError(params: { + code: UiAutomationRecoverableErrorCode; + message: string; + recoveryHint?: string; + elementRef?: string; +}): UiAutomationRecoverableError { + return { + code: params.code, + message: params.message, + recoveryHint: params.recoveryHint ?? REFRESH_SNAPSHOT_RECOVERY_HINT, + ...(params.elementRef ? { elementRef: params.elementRef } : {}), + }; +} + export function createUiActionSuccessResult( action: UiAction, simulatorId: string, warnings: Array = [], + options: { + capture?: CapturePayload; + uiError?: UiAutomationRecoverableError; + previousRuntimeSnapshot?: RuntimeSnapshotV1; + } = {}, ): UiActionResultDomainResult { - return { + const result: UiActionResultDomainResult = { kind: 'ui-action-result', didError: false, error: null, summary: { status: 'SUCCEEDED' }, action, artifacts: { simulatorId }, + ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(warnings), []), + ...(options.uiError ? { uiError: options.uiError } : {}), }; + + if (options.previousRuntimeSnapshot) { + const actionTargetElement = findUiActionTargetElement(action, options.previousRuntimeSnapshot); + uiActionNextStepContexts.set(result, { + action, + previousScreenHash: options.previousRuntimeSnapshot.screenHash, + ...(actionTargetElement + ? { actionTarget: createNextStepActionTarget(actionTargetElement) } + : {}), + }); + } + + return result; } export function createUiActionFailureResult( @@ -51,6 +162,7 @@ export function createUiActionFailureResult( options: { warnings?: Array; details?: Array; + uiError?: UiAutomationRecoverableError; } = {}, ): UiActionResultDomainResult { return { @@ -61,6 +173,7 @@ export function createUiActionFailureResult( action, artifacts: { simulatorId }, diagnostics: createDiagnostics(compact(options.warnings ?? []), compact(options.details ?? [])), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -70,6 +183,8 @@ export function createCaptureSuccessResult( screenshotPath?: string; capture?: CapturePayload; warnings?: Array; + uiError?: UiAutomationRecoverableError; + waitMatch?: UiWaitMatch; } = {}, ): CaptureResultDomainResult { return { @@ -83,6 +198,8 @@ export function createCaptureSuccessResult( }, ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(options.warnings ?? []), []), + ...(options.uiError ? { uiError: options.uiError } : {}), + ...(options.waitMatch ? { waitMatch: options.waitMatch } : {}), }; } @@ -91,8 +208,10 @@ export function createCaptureFailureResult( message: string, options: { screenshotPath?: string; + capture?: CapturePayload; warnings?: Array; details?: Array; + uiError?: UiAutomationRecoverableError; } = {}, ): CaptureResultDomainResult { return { @@ -104,7 +223,9 @@ export function createCaptureFailureResult( simulatorId, ...(options.screenshotPath ? { screenshotPath: options.screenshotPath } : {}), }, + ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(options.warnings ?? []), compact(options.details ?? [])), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -115,6 +236,10 @@ interface AxeErrorMessages { unexpectedFailureMessage?: (message: string) => string; } +export function shouldInvalidateRuntimeSnapshotAfterActionError(error: unknown): boolean { + return error instanceof AxeError; +} + export function mapAxeCommandError( error: unknown, messages: AxeErrorMessages, @@ -147,24 +272,79 @@ export function mapAxeCommandError( }; } +function mergeRuntimeSnapshotRenderHints( + renderHints: RenderHints | undefined, + suppressedTargetRefs: readonly string[], +): RenderHints | undefined { + if (suppressedTargetRefs.length === 0) { + return renderHints; + } + + return { + ...renderHints, + runtimeSnapshot: { + ...renderHints?.runtimeSnapshot, + suppressedTargetRefs, + }, + }; +} + export function setUiActionStructuredOutput( ctx: ToolHandlerContext, result: UiActionResultDomainResult, ): void { + if (result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot') { + const actionContext = uiActionNextStepContexts.get(result); + const suppressedTargetRefs = getForegroundCompletionSuppressedRuntimeTargetRefs({ + simulatorId: result.artifacts.simulatorId, + runtimeSnapshot: result.capture, + }); + ctx.structuredOutput = { + result, + schema: UI_ACTION_SCHEMA, + schemaVersion: '2', + ...(suppressedTargetRefs.length > 0 + ? { + renderHints: { + runtimeSnapshot: { suppressedTargetRefs }, + }, + } + : {}), + }; + ctx.nextSteps = createRuntimeSnapshotNextSteps({ + simulatorId: result.artifacts.simulatorId, + runtimeSnapshot: result.capture, + includeRefreshAndWait: false, + ...(actionContext ? { actionContext } : {}), + }); + return; + } + ctx.structuredOutput = { result, schema: UI_ACTION_SCHEMA, schemaVersion: '2', }; + ctx.nextSteps = createUiActionSuccessNextSteps(result); } export function setCaptureStructuredOutput( ctx: ToolHandlerContext, result: CaptureResultDomainResult, + renderHints?: RenderHints, ): void { + const suppressedTargetRefs = + result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' + ? getForegroundCompletionSuppressedRuntimeTargetRefs({ + simulatorId: result.artifacts.simulatorId, + runtimeSnapshot: result.capture, + }) + : []; + const mergedRenderHints = mergeRuntimeSnapshotRenderHints(renderHints, suppressedTargetRefs); ctx.structuredOutput = { result, schema: CAPTURE_SCHEMA, schemaVersion: '2', + ...(mergedRenderHints ? { renderHints: mergedRenderHints } : {}), }; } diff --git a/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts new file mode 100644 index 000000000..240041377 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts @@ -0,0 +1,62 @@ +import type { CapturePayload } from '../../../../types/domain-results.ts'; +import type { UiAutomationRecoverableError } from '../../../../types/ui-snapshot.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { executeAxeCommand } from './axe-command.ts'; +import type { AxeHelpers } from './axe-command.ts'; +import { RuntimeSnapshotParseError, parseRuntimeSnapshotResponse } from './runtime-snapshot.ts'; +import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './snapshot-ui-state.ts'; + +const POST_ACTION_SNAPSHOT_RECOVERY_HINT = + 'Run snapshot_ui again before reusing elementRefs from the previous snapshot.'; + +export async function captureRuntimeSnapshotAfterAction(params: { + simulatorId: string; + executor: CommandExecutor; + axeHelpers: AxeHelpers; +}): Promise { + const responseText = await executeAxeCommand( + ['describe-ui'], + params.simulatorId, + 'describe-ui', + params.executor, + params.axeHelpers, + ); + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId: params.simulatorId, + responseText, + }); + recordRuntimeSnapshot(snapshot); + return snapshot.payload; +} + +export async function captureRuntimeSnapshotAfterActionSafely(params: { + simulatorId: string; + executor: CommandExecutor; + axeHelpers: AxeHelpers; +}): Promise< + | { capture: CapturePayload; warning?: never; uiError?: never } + | { capture?: never; warning: string; uiError: UiAutomationRecoverableError } +> { + try { + return { + capture: await captureRuntimeSnapshotAfterAction(params), + }; + } catch (error) { + clearRuntimeSnapshot(params.simulatorId); + + const isParseFailure = error instanceof RuntimeSnapshotParseError; + const message = isParseFailure + ? 'UI action succeeded, but the refreshed runtime snapshot could not be parsed.' + : 'UI action succeeded, but the refreshed runtime snapshot could not be captured.'; + const detail = error instanceof Error ? error.message : String(error); + + return { + warning: `${message} ${POST_ACTION_SNAPSHOT_RECOVERY_HINT}`, + uiError: { + code: isParseFailure ? 'SNAPSHOT_PARSE_FAILED' : 'SNAPSHOT_CAPTURE_FAILED', + message: `${message} ${detail}`, + recoveryHint: POST_ACTION_SNAPSHOT_RECOVERY_HINT, + }, + }; + } +} diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts new file mode 100644 index 000000000..57a3873f1 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -0,0 +1,763 @@ +import type { NextStep } from '../../../../types/common.ts'; +import type { UiAction } from '../../../../types/domain-results.ts'; +import type { + RuntimeElementV1, + RuntimeSnapshotElementRecord, + RuntimeSnapshotV1, +} from '../../../../types/ui-snapshot.ts'; +import { getRuntimeSnapshot } from './snapshot-ui-state.ts'; + +const HIDDEN_TAP_NEXT_STEP_LABELS = new Set(['sheet grabber']); + +const LOW_PRIORITY_TAP_NEXT_STEP_LABELS = new Set([ + 'close', + 'clear search', + 'remove', + 'delete', + 'clear', + 'c', + 'ac', + 'Âą', + '%', + 'Ãˇ', + '×', + '-', + '+', + '=', +]); + +const SCREEN_CHANGING_TAP_NEXT_STEP_LABELS = new Set([ + 'back', + 'cancel', + 'done', + 'settings', + 'menu', + 'home', + 'next', + 'previous', +]); + +const FOREGROUND_DISMISS_TAP_NEXT_STEP_LABELS = new Set(['back', 'cancel', 'close', 'done']); +const COMPLETION_ACTION_TAP_NEXT_STEP_LABELS = new Set(['add', 'save']); +const SHEET_EXPANDED_VALUE_PATTERN = /\b(?:expanded|full(?:\s+screen)?)\b/i; +const INCOMPLETE_STATE_NEXT_STEP_TEXT = new Set([ + 'not added', + 'not saved', + 'not selected', + 'unadded', + 'unsaved', + 'unselected', +]); + +export interface RuntimeSnapshotNextStepActionTarget { + label?: string; + value?: string; + identifier?: string; + role?: string; + state?: { selected?: boolean }; +} + +export interface RuntimeSnapshotNextStepActionContext { + action: UiAction; + previousScreenHash: string; + actionTarget?: RuntimeSnapshotNextStepActionTarget; +} + +function compactTapNextStepText(value: string | undefined): string { + return (value ?? '').replace(/\s+/g, ' ').trim(); +} + +function isHiddenTapNextStepElement(label: string | undefined): boolean { + return HIDDEN_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isLowPriorityTapNextStepElement(label: string | undefined): boolean { + return LOW_PRIORITY_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isContentRichTapNextStepElement(element: { + label?: string; + identifier?: string; +}): boolean { + const label = compactTapNextStepText(element.label); + const identifier = compactTapNextStepText(element.identifier); + return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); +} + +function isScreenChangingTapNextStepElement(element: { + label?: string; + identifier?: string; + role?: string; +}): boolean { + const label = compactTapNextStepText(element.label).toLowerCase(); + const identifier = compactTapNextStepText(element.identifier).toLowerCase(); + return ( + element.role === 'tab' || + SCREEN_CHANGING_TAP_NEXT_STEP_LABELS.has(label) || + /(?:^|[._-])(back|navigation|tab|detail|details)(?:$|[._-])/i.test(identifier) + ); +} + +function isGenericRowTapNextStepElement(element: { identifier?: string; role?: string }): boolean { + const identifier = compactTapNextStepText(element.identifier).toLowerCase(); + return element.role === 'cell' || /(?:^|[._-])(row|cell|item)(?:$|[._-])/i.test(identifier); +} + +function isStateChangingTapNextStepElement(element: { + role?: string; + state?: { selected?: boolean }; + value?: string; +}): boolean { + const value = compactTapNextStepText(element.value).toLowerCase(); + const hasSelectionState = + element.state?.selected === true || + value === 'selected' || + (element.role !== 'tab' && (element.state?.selected === false || value === 'not selected')); + + const hasToggleValue = + element.role !== 'tab' && (value === '0' || value === '1' || value === 'off' || value === 'on'); + + return element.role === 'switch' || hasSelectionState || hasToggleValue; +} + +/** + * Ranks generic tap next-step candidates. + * + * Business rules: + * - Prefer content-rich controls because they usually represent cards, rows, or details worth opening. + * - Prefer generic rows/cells/items over chrome when content-rich signals are absent. + * - Deprioritize navigation/screen-changing controls so agents do not immediately leave useful content. + * - Deprioritize utility/destructive controls such as close, clear, remove, and calculator operators. + * - State-changing controls are filtered out before ranking; they remain valid targets, but are not + * promoted as generic "try this next" suggestions because toggling state can be destructive. + */ +function getTapNextStepElementPriority(element: { + label?: string; + identifier?: string; + role?: string; + state?: { selected?: boolean }; + value?: string; +}): number { + if (isLowPriorityTapNextStepElement(element.label)) { + return 90; + } + if (isContentRichTapNextStepElement(element)) { + return 10; + } + if (isScreenChangingTapNextStepElement(element)) { + return 60; + } + if (isGenericRowTapNextStepElement(element)) { + return 30; + } + return 20; +} + +function hasScrollSemanticIdentity(element: { + label?: string; + value?: string; + identifier?: string; +}): boolean { + return ( + element.label !== undefined || element.value !== undefined || element.identifier !== undefined + ); +} + +function isScrollableNextStepElement(element: { + actions: readonly string[]; + role?: string; + label?: string; + value?: string; + identifier?: string; +}): boolean { + return ( + element.actions.includes('swipeWithin') && + (element.role === 'scroll-view' || + element.role === 'list' || + element.role === 'application' || + element.role === 'window' || + (element.role === 'other' && hasScrollSemanticIdentity(element))) + ); +} + +function getScrollRolePriority(element: RuntimeElementV1): number { + switch (element.role) { + case 'scroll-view': + case 'list': + return 0; + case 'other': + return 1; + case 'application': + case 'window': + return 2; + default: + return 3; + } +} + +function getScrollIdentityPriority(element: { + label?: string; + value?: string; + identifier?: string; +}): number { + const identifier = compactTapNextStepText(element.identifier).toLowerCase(); + if (/(?:^|[._-])(sheet|list|table|panel|drawer|overlay|dialog)(?:$|[._-])/i.test(identifier)) { + return 0; + } + return hasScrollSemanticIdentity(element) ? 1 : 2; +} + +function compareScrollableNextStepCandidates( + left: { element: RuntimeElementV1; index: number }, + right: { element: RuntimeElementV1; index: number }, + recordsByRef: Map, +): number { + const roleDelta = getScrollRolePriority(left.element) - getScrollRolePriority(right.element); + if (roleDelta !== 0) { + return roleDelta; + } + + const identityDelta = + getScrollIdentityPriority(left.element) - getScrollIdentityPriority(right.element); + if (identityDelta !== 0) { + return identityDelta; + } + + const leftDepth = recordsByRef.get(left.element.ref)?.metadata.depth ?? 0; + const rightDepth = recordsByRef.get(right.element.ref)?.metadata.depth ?? 0; + if (leftDepth !== rightDepth) { + return rightDepth - leftDepth; + } + + const leftIsVertical = left.element.frame.height >= left.element.frame.width; + const rightIsVertical = right.element.frame.height >= right.element.frame.width; + if (leftIsVertical !== rightIsVertical) { + return leftIsVertical ? -1 : 1; + } + + if (left.element.frame.height !== right.element.frame.height) { + return right.element.frame.height - left.element.frame.height; + } + + return left.index - right.index; +} + +/** + * Checks AX hierarchy ancestry using the snapshot metadata path. + * + * This is the strongest foreground/background signal because it comes from the raw accessibility + * tree. If a candidate path starts with the root path, it is structurally inside that root. + */ +function isSameOrDescendantPath(parentPath: string, candidatePath: string): boolean { + return candidatePath === parentPath || candidatePath.startsWith(`${parentPath}.`); +} + +/** + * Checks whether a candidate visually fits inside a potential foreground container. + * + * Business rules: + * - Use geometry as a fallback for AX layouts that flatten sheet/dialog children as siblings. + * - The candidate center must be inside the parent frame. + * - The candidate must not be larger than the parent; this prevents full-screen/background scroll + * views from being pulled into a smaller foreground panel just because their center overlaps it. + */ +function isFrameInside(parent: RuntimeElementV1, candidate: RuntimeElementV1): boolean { + const candidateCenterX = candidate.frame.x + candidate.frame.width / 2; + const candidateCenterY = candidate.frame.y + candidate.frame.height / 2; + return ( + candidate.frame.width <= parent.frame.width && + candidate.frame.height <= parent.frame.height && + candidateCenterX >= parent.frame.x && + candidateCenterX <= parent.frame.x + parent.frame.width && + candidateCenterY >= parent.frame.y && + candidateCenterY <= parent.frame.y + parent.frame.height + ); +} + +/** + * Decides whether a candidate belongs to a foreground root. + * + * Business rules: + * - Prefer AX hierarchy membership when available. + * - Fall back to frame containment for flattened AX trees. + * - This is intentionally app-agnostic: it does not rely on app-specific identifiers or labels. + */ +function isForegroundCandidateForRoot( + root: RuntimeSnapshotElementRecord, + candidate: RuntimeSnapshotElementRecord, +): boolean { + return ( + isSameOrDescendantPath(root.metadata.path, candidate.metadata.path) || + isFrameInside(root.publicElement, candidate.publicElement) + ); +} + +/** + * Looks up the stored per-ref metadata for the exact runtime snapshot being rendered. + * + * Next-step generation receives the compact public snapshot, but foreground filtering needs private + * metadata such as hierarchy path and depth. We only use stored metadata when both screen hash and + * sequence match, so stale records from an older UI state cannot influence current next steps. + */ +function findStoredSnapshotRecords(params: { + simulatorId: string; + runtimeSnapshot: RuntimeSnapshotV1; +}): Map { + const storedSnapshot = getRuntimeSnapshot( + params.simulatorId, + params.runtimeSnapshot.capturedAtMs, + ); + if ( + storedSnapshot?.payload.screenHash !== params.runtimeSnapshot.screenHash || + storedSnapshot.payload.seq !== params.runtimeSnapshot.seq + ) { + return new Map(); + } + + return storedSnapshot.elementsByRef; +} + +/** + * Finds the most likely active foreground scroll container. + * + * Business rules: + * - Scrollable elements can become foreground roots. A top-level root with a sheet grabber + * descendant can also become the root so flattened sheet controls are not assigned to background + * scroll views by geometry overlap. + * - A foreground root must contain at least one generic foreground cue: + * - dismiss/navigation-out control: back, cancel, close, done + * - text-entry control + * - state-changing control such as a switch/selected segment + * - Dismiss controls score highest because they are strong sheet/dialog/detail indicators. + * - Text fields score next because search panels and forms often appear as foreground overlays. + * - State controls score lower because settings panels are foreground, but controls themselves + * should not become generic tap suggestions. + * - Depth and later snapshot order are tie-breakers for nested/later-presented UI. + * + * Limitations: + * - This does not yet rank competing foreground scroll views by identifier specificity or visible + * area. After filtering, scroll selection still chooses the first remaining scrollable element. + */ +function findSheetGrabberDescendant( + root: RuntimeSnapshotElementRecord, + records: readonly RuntimeSnapshotElementRecord[], +): RuntimeSnapshotElementRecord | null { + return ( + records.find( + (candidate) => + candidate !== root && + compactTapNextStepText(candidate.publicElement.label).toLowerCase() === 'sheet grabber' && + isSameOrDescendantPath(root.metadata.path, candidate.metadata.path), + ) ?? null + ); +} + +function isExpandableSheetGrabber(element: RuntimeElementV1): boolean { + if (compactTapNextStepText(element.label).toLowerCase() !== 'sheet grabber') { + return false; + } + const value = compactTapNextStepText(element.value); + return value.length > 0 && !SHEET_EXPANDED_VALUE_PATTERN.test(value); +} + +function isExpandedSheetGrabber(element: RuntimeElementV1): boolean { + return ( + compactTapNextStepText(element.label).toLowerCase() === 'sheet grabber' && + SHEET_EXPANDED_VALUE_PATTERN.test(compactTapNextStepText(element.value)) + ); +} + +function findActiveForegroundRoot( + recordsByRef: Map, +): RuntimeSnapshotElementRecord | null { + const records = [...recordsByRef.values()]; + const indexByRef = new Map(records.map((record, index) => [record.publicElement.ref, index])); + const scoreByRef = new Map(); + + function foregroundScore(record: RuntimeSnapshotElementRecord): number { + const cachedScore = scoreByRef.get(record.publicElement.ref); + if (cachedScore !== undefined) { + return cachedScore; + } + const hasSheetGrabberDescendant = findSheetGrabberDescendant(record, records) !== null; + if (!isScrollableNextStepElement(record.publicElement) && !hasSheetGrabberDescendant) { + scoreByRef.set(record.publicElement.ref, 0); + return 0; + } + + const descendants = records.filter((candidate) => + isForegroundCandidateForRoot(record, candidate), + ); + const hasDismissControl = descendants.some((candidate) => + FOREGROUND_DISMISS_TAP_NEXT_STEP_LABELS.has( + compactTapNextStepText(candidate.publicElement.label).toLowerCase(), + ), + ); + const hasTextEntry = descendants.some((candidate) => + candidate.publicElement.actions.includes('typeText'), + ); + const hasStateControls = descendants.some((candidate) => + isStateChangingTapNextStepElement(candidate.publicElement), + ); + + if (!hasDismissControl && !hasTextEntry && !hasStateControls) { + scoreByRef.set(record.publicElement.ref, 0); + return 0; + } + + const element = record.publicElement; + const rolePriority = Math.max(0, 3 - getScrollRolePriority(element)); + const identityPriority = Math.max(0, 2 - getScrollIdentityPriority(element)); + const verticalPriority = element.frame.height >= element.frame.width ? 1 : 0; + const score = + (hasSheetGrabberDescendant ? 200 : 0) + + (hasDismissControl ? 100 : 0) + + (hasTextEntry ? 60 : 0) + + (hasStateControls ? 30 : 0) + + rolePriority + + identityPriority + + verticalPriority + + record.metadata.depth / 1000 + + (indexByRef.get(record.publicElement.ref) ?? 0) / 1_000_000; + scoreByRef.set(record.publicElement.ref, score); + return score; + } + + return records.reduce((best, candidate) => { + const candidateScore = foregroundScore(candidate); + if (candidateScore <= 0) { + return best; + } + if (!best || candidateScore > foregroundScore(best)) { + return candidate; + } + return best; + }, null); +} + +/** + * Filters public snapshot elements to the active foreground region when one can be detected. + * + * Business rules: + * - If foreground detection is confident, next-step examples should prefer controls in the active + * panel/sheet/detail instead of background controls that remain visible in the raw AX snapshot. + * - If no foreground root is detected, keep all elements rather than guessing; conservative output + * is better than hiding valid controls. + */ +function findSheetForegroundStartIndex( + foregroundRoot: RuntimeSnapshotElementRecord, + records: readonly RuntimeSnapshotElementRecord[], + indexByRef: Map, +): number | null { + const grabber = findSheetGrabberDescendant(foregroundRoot, records); + return grabber ? (indexByRef.get(grabber.publicElement.ref) ?? null) : null; +} + +function filterToForegroundElements( + elements: RuntimeElementV1[], + recordsByRef: Map, + foregroundRoot: RuntimeSnapshotElementRecord | null, +): RuntimeElementV1[] { + if (!foregroundRoot) { + return elements; + } + + const records = [...recordsByRef.values()]; + const indexByRef = new Map(records.map((record, index) => [record.publicElement.ref, index])); + const sheetForegroundStartIndex = findSheetForegroundStartIndex( + foregroundRoot, + records, + indexByRef, + ); + + return elements.filter((element) => { + const record = recordsByRef.get(element.ref); + if (!record || !isForegroundCandidateForRoot(foregroundRoot, record)) { + return false; + } + + const recordIndex = indexByRef.get(record.publicElement.ref) ?? -1; + return sheetForegroundStartIndex === null || recordIndex >= sheetForegroundStartIndex; + }); +} + +function getRepeatedNoOpActionRef(params: { + runtimeSnapshot: RuntimeSnapshotV1; + actionContext?: RuntimeSnapshotNextStepActionContext; +}): { tool: 'tap' | 'swipe' | 'drag'; ref: string } | null { + if (params.actionContext?.previousScreenHash !== params.runtimeSnapshot.screenHash) { + return null; + } + + switch (params.actionContext.action.type) { + case 'tap': + return { tool: 'tap', ref: params.actionContext.action.elementRef }; + case 'swipe': + return { tool: 'swipe', ref: params.actionContext.action.withinElementRef }; + case 'drag': + return { tool: 'drag', ref: params.actionContext.action.elementRef }; + default: + return null; + } +} + +function hasIncompleteStateSignal(element: { label?: string; value?: string }): boolean { + const label = compactTapNextStepText(element.label).toLowerCase(); + const value = compactTapNextStepText(element.value).toLowerCase(); + return INCOMPLETE_STATE_NEXT_STEP_TEXT.has(label) || INCOMPLETE_STATE_NEXT_STEP_TEXT.has(value); +} + +function findForegroundIncompleteCompletionTapElement( + elements: readonly RuntimeElementV1[], + repeatedNoOpAction: { tool: 'tap' | 'swipe' | 'drag'; ref: string } | null, +): RuntimeElementV1 | null { + if (!elements.some(hasIncompleteStateSignal)) { + return null; + } + + return ( + elements.find( + (element) => + element.actions.includes('tap') && + !element.actions.includes('typeText') && + !(repeatedNoOpAction?.tool === 'tap' && repeatedNoOpAction.ref === element.ref) && + COMPLETION_ACTION_TAP_NEXT_STEP_LABELS.has( + compactTapNextStepText(element.label).toLowerCase(), + ), + ) ?? null + ); +} + +/** + * Creates human/model-facing next-step examples from a runtime snapshot. + * + * Business rules: + * - Refs in next steps must come from the current runtime snapshot only. + * - Prefer runtime tap/scroll guidance over screenshots; screenshots are only suggested when there + * is no useful tap, batch, or scroll action to try. + * - Tap examples skip text fields, hidden controls, and state-changing controls to avoid destructive + * generic suggestions. + * - Batch examples include multiple visible switches because settings screens often require several + * same-screen toggles and batch is the efficient, app-agnostic primitive for that workflow. + * - Scroll examples prefer real list/scroll-view targets, then semantic containers, with + * application/window root scrolling used last as a fallback. + * - Refresh/wait examples are included for fresh snapshot captures, but not after every action. + */ +export function getForegroundCompletionSuppressedRuntimeTargetRefs(params: { + simulatorId: string; + runtimeSnapshot: RuntimeSnapshotV1; +}): string[] { + const recordsByRef = findStoredSnapshotRecords(params); + const foregroundRoot = findActiveForegroundRoot(recordsByRef); + if (!foregroundRoot) { + return []; + } + + const foregroundElements = filterToForegroundElements( + params.runtimeSnapshot.elements, + recordsByRef, + foregroundRoot, + ); + const completionActionElement = findForegroundIncompleteCompletionTapElement( + foregroundElements, + null, + ); + if (completionActionElement) { + return foregroundElements + .filter( + (element) => + element.ref !== completionActionElement.ref && hasIncompleteStateSignal(element), + ) + .map((element) => element.ref); + } + + return []; +} + +export function createRuntimeSnapshotNextSteps(params: { + simulatorId: string; + runtimeSnapshot: RuntimeSnapshotV1; + includeRefreshAndWait: boolean; + actionContext?: RuntimeSnapshotNextStepActionContext; +}): NextStep[] { + const recordsByRef = findStoredSnapshotRecords(params); + const foregroundRoot = findActiveForegroundRoot(recordsByRef); + const records = [...recordsByRef.values()]; + const foregroundSheetGrabber = + foregroundRoot !== null ? findSheetGrabberDescendant(foregroundRoot, records) : null; + const nextStepElements = filterToForegroundElements( + params.runtimeSnapshot.elements, + recordsByRef, + foregroundRoot, + ); + const repeatedNoOpAction = getRepeatedNoOpActionRef(params); + const foregroundIncompleteCompletionTapElement = + foregroundRoot !== null + ? findForegroundIncompleteCompletionTapElement(nextStepElements, repeatedNoOpAction) + : null; + const tapElements = nextStepElements + .map((element, index) => ({ element, index })) + .filter( + ({ element }) => + element.actions.includes('tap') && + !element.actions.includes('typeText') && + !(repeatedNoOpAction?.tool === 'tap' && repeatedNoOpAction.ref === element.ref) && + !isHiddenTapNextStepElement(element.label) && + !isStateChangingTapNextStepElement(element), + ) + .sort((left, right) => { + const priorityDelta = + getTapNextStepElementPriority(left.element) - getTapNextStepElementPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); + const tapElement = foregroundIncompleteCompletionTapElement ?? tapElements[0] ?? null; + const sameScreenBatchElements = tapElements.filter( + (element) => + !isContentRichTapNextStepElement(element) && + !isScreenChangingTapNextStepElement(element) && + !isLowPriorityTapNextStepElement(element.label), + ); + const switchBatchElements = nextStepElements.filter( + (element) => element.role === 'switch' && element.actions.includes('tap'), + ); + let batchElements = sameScreenBatchElements; + if (switchBatchElements.length >= 2) { + batchElements = switchBatchElements; + } + const batchLabel = + switchBatchElements.length >= 2 ? 'Batch visible switch toggles' : 'Batch same-screen taps'; + const scrollElement = + nextStepElements + .map((element, index) => ({ element, index })) + .filter( + ({ element }) => + isScrollableNextStepElement(element) && + !( + (repeatedNoOpAction?.tool === 'swipe' || repeatedNoOpAction?.tool === 'drag') && + repeatedNoOpAction.ref === element.ref + ), + ) + .sort((left, right) => compareScrollableNextStepCandidates(left, right, recordsByRef))[0] + ?.element ?? null; + const expandSheetNextStep: NextStep | null = + foregroundSheetGrabber && + isExpandableSheetGrabber(foregroundSheetGrabber.publicElement) && + !( + repeatedNoOpAction?.tool === 'drag' && + repeatedNoOpAction.ref === foregroundSheetGrabber.publicElement.ref + ) + ? { + label: 'Expand foreground sheet', + tool: 'drag', + params: { + simulatorId: params.simulatorId, + elementRef: foregroundSheetGrabber.publicElement.ref, + direction: 'up', + distance: 0.35, + duration: 0.8, + steps: 80, + postDelay: 0.8, + }, + } + : null; + const shouldDragSheetScroll = + expandSheetNextStep === null && + foregroundSheetGrabber !== null && + isExpandedSheetGrabber(foregroundSheetGrabber.publicElement) && + scrollElement !== null && + scrollElement.role !== 'application' && + scrollElement.role !== 'window'; + const scrollNextStep: NextStep | null = scrollElement + ? shouldDragSheetScroll + ? { + label: 'Drag visible sheet content', + tool: 'drag', + params: { + simulatorId: params.simulatorId, + elementRef: scrollElement.ref, + direction: 'up', + distance: 0.7, + duration: 0.8, + steps: 80, + postDelay: 0.5, + }, + } + : { + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId: params.simulatorId, + withinElementRef: scrollElement.ref, + direction: 'up', + distance: 0.5, + }, + } + : null; + const shouldPrioritizeScroll = + scrollNextStep !== null && + tapElement !== null && + expandSheetNextStep === null && + (shouldDragSheetScroll || + (batchElements.length < 2 && + (isScreenChangingTapNextStepElement(tapElement) || + (!isContentRichTapNextStepElement(tapElement) && + !isLowPriorityTapNextStepElement(tapElement.label))))); + const shouldShowBatch = + batchElements.length >= 2 && expandSheetNextStep === null && !shouldDragSheetScroll; + const hasUsefulRuntimeGuidance = + shouldShowBatch || + expandSheetNextStep !== null || + scrollNextStep !== null || + tapElement !== null; + const screenshotNextStep: NextStep = { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: params.simulatorId }, + }; + + return [ + ...(params.includeRefreshAndWait + ? [ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: params.simulatorId }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { simulatorId: params.simulatorId, predicate: 'settled' }, + }, + ] + : []), + ...(shouldShowBatch + ? [ + { + label: batchLabel, + tool: 'batch', + params: { + simulatorId: params.simulatorId, + steps: batchElements.slice(0, 2).map((element) => ({ + action: 'tap', + elementRef: element.ref, + })), + }, + }, + ] + : []), + ...(expandSheetNextStep ? [expandSheetNextStep] : []), + ...(scrollNextStep && shouldPrioritizeScroll ? [scrollNextStep] : []), + ...(tapElement + ? [ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId: params.simulatorId, elementRef: tapElement.ref }, + }, + ] + : []), + ...(scrollNextStep && !shouldPrioritizeScroll ? [scrollNextStep] : []), + ...(!hasUsefulRuntimeGuidance ? [screenshotNextStep] : []), + ]; +} diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts new file mode 100644 index 000000000..1265e18f1 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -0,0 +1,887 @@ +import type { AccessibilityNode, Frame, Point } from '../../../../types/domain-results.ts'; +import type { + RuntimeActionHintV1, + RuntimeActionNameV1, + RuntimeElementRoleV1, + RuntimeElementStateV1, + RuntimeElementV1, + RuntimeSnapshotElementRecord, + RuntimeSnapshotRecord, + RuntimeSnapshotV1, +} from '../../../../types/ui-snapshot.ts'; + +export const RUNTIME_SNAPSHOT_PROTOCOL = 'rs/1' as const; +export const RUNTIME_SNAPSHOT_TTL_MS = 60_000; + +interface NormalizedNodeInput { + node: AccessibilityNode; + path: string; + depth: number; +} + +export class RuntimeSnapshotParseError extends Error { + constructor(message: string) { + super(message); + this.name = 'RuntimeSnapshotParseError'; + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function normalizeText(value: unknown): string | undefined { + if (typeof value !== 'string' && typeof value !== 'number' && typeof value !== 'boolean') { + return undefined; + } + + const normalized = String(value).replace(/\s+/g, ' ').trim(); + return normalized.length > 0 ? normalized : undefined; +} + +function readText(node: AccessibilityNode, keys: readonly string[]): string | undefined { + for (const key of keys) { + const value = normalizeText(node[key]); + if (value) { + return value; + } + } + return undefined; +} + +function isFiniteNumber(value: unknown): value is number { + return typeof value === 'number' && Number.isFinite(value); +} + +function normalizeFrame(frame: Frame): Frame { + return { + x: Number(frame.x.toFixed(2)), + y: Number(frame.y.toFixed(2)), + width: Number(frame.width.toFixed(2)), + height: Number(frame.height.toFixed(2)), + }; +} + +function readFrameObject(value: unknown): Frame | null { + if (!isRecord(value)) { + return null; + } + + const { x, y, width, height } = value; + if ( + !isFiniteNumber(x) || + !isFiniteNumber(y) || + !isFiniteNumber(width) || + !isFiniteNumber(height) + ) { + return null; + } + + return normalizeFrame({ x, y, width, height }); +} + +function parseAxFrame(value: unknown): Frame | null { + if (typeof value !== 'string') { + return null; + } + + const numbers = value.match(/-?\d+(?:\.\d+)?/g)?.map(Number) ?? []; + if (numbers.length < 4 || numbers.some((entry) => !Number.isFinite(entry))) { + return null; + } + + const [x = 0, y = 0, width = 0, height = 0] = numbers; + return normalizeFrame({ x, y, width, height }); +} + +function readFrame(node: AccessibilityNode): Frame { + return ( + readFrameObject(node.frame) ?? parseAxFrame(node.AXFrame) ?? { x: 0, y: 0, width: 0, height: 0 } + ); +} + +function hasScrollSemanticIdentifier(identifier: string | undefined): boolean { + return /(?:^|[._-])scroll(?:view|[-_.]view)?(?:$|[._-])|scrollView/i.test(identifier ?? ''); +} + +function deriveRole( + node: AccessibilityNode, + identifier: string | undefined, +): RuntimeElementRoleV1 | undefined { + const roleText = [node.role, node.type, node.subrole, node.role_description] + .map((value) => normalizeText(value)?.toLowerCase()) + .filter((value): value is string => value !== undefined) + .join(' '); + + if (roleText.length === 0) return undefined; + if (/application/.test(roleText)) return 'application'; + if (/window/.test(roleText)) return 'window'; + if (/button/.test(roleText)) return 'button'; + if (/keyboard|key/.test(roleText)) return 'keyboard-key'; + if ( + /textfield|text field|searchfield|search field|securetext|textarea|combo box/.test(roleText) + ) { + return 'text-field'; + } + if (/menu/.test(roleText)) return 'menu'; + if (/statictext|text/.test(roleText)) return 'text'; + if (/image/.test(roleText)) return 'image'; + if (/switch|checkbox|check box/.test(roleText)) return 'switch'; + if (/slider/.test(roleText)) return 'slider'; + if (/cell|row/.test(roleText)) return 'cell'; + if (/scroll/.test(roleText)) return 'scroll-view'; + if (/table|list|outline|collection/.test(roleText)) return 'list'; + if (hasScrollSemanticIdentifier(identifier) && /group|other|view|container/.test(roleText)) { + return 'scroll-view'; + } + if (/(^|\b|ax)tab(\b|group|$)/.test(roleText)) return 'tab'; + return 'other'; +} + +function isVisible(frame: Frame): boolean { + return frame.width > 0 && frame.height > 0; +} + +function framesIntersect(a: Frame, b: Frame): boolean { + return a.x < b.x + b.width && a.x + a.width > b.x && a.y < b.y + b.height && a.y + a.height > b.y; +} + +function pointInsideFrame(point: Point, frame: Frame): boolean { + return ( + point.x >= frame.x && + point.x <= frame.x + frame.width && + point.y >= frame.y && + point.y <= frame.y + frame.height + ); +} + +function hasPointAction(actions: readonly RuntimeActionNameV1[]): boolean { + return actions.some( + (action) => + action === 'tap' || action === 'typeText' || action === 'longPress' || action === 'touch', + ); +} + +function isTapRole(role: RuntimeElementRoleV1 | undefined): boolean { + return ( + role === 'button' || + role === 'cell' || + role === 'keyboard-key' || + role === 'switch' || + role === 'tab' || + role === 'text-field' + ); +} + +function isGenericInternalIdentifier(identifier: string | undefined): boolean { + return identifier === 'label-view'; +} + +function deriveActions(params: { + role: RuntimeElementRoleV1 | undefined; + enabled: boolean; + frame: Frame; + customActions: readonly string[]; + hasSemanticIdentity: boolean; +}): RuntimeActionNameV1[] { + const { role, enabled, frame, customActions, hasSemanticIdentity } = params; + if (!enabled || !isVisible(frame)) { + return []; + } + + const actions = new Set(); + if (isTapRole(role) || (customActions.length > 0 && hasSemanticIdentity)) { + actions.add('tap'); + } + if (role === 'text-field') { + actions.add('typeText'); + } + if (role !== 'application' && role !== 'window') { + actions.add('longPress'); + actions.add('touch'); + } + if (role === 'scroll-view' || role === 'list' || role === 'cell') { + actions.add('swipeWithin'); + } + + return [...actions]; +} + +function hashString(input: string): string { + let hash = 0x811c9dc5; + for (let index = 0; index < input.length; index += 1) { + hash ^= input.charCodeAt(index); + hash = Math.imul(hash, 0x01000193) >>> 0; + } + return hash.toString(36).padStart(7, '0'); +} + +function readChildren(node: AccessibilityNode): AccessibilityNode[] { + return Array.isArray(node.children) ? node.children : []; +} + +function normalizeCustomActions(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.map(normalizeText).filter((entry): entry is string => entry !== undefined); +} + +function readState(node: AccessibilityNode, frame: Frame): RuntimeElementStateV1 | undefined { + const state: RuntimeElementStateV1 = { + enabled: node.enabled !== false, + visible: isVisible(frame), + }; + + if (typeof node.focused === 'boolean') { + state.focused = node.focused; + } else if (typeof node.AXFocused === 'boolean') { + state.focused = node.AXFocused; + } + + if (typeof node.selected === 'boolean') { + state.selected = node.selected; + } else if (typeof node.AXSelected === 'boolean') { + state.selected = node.AXSelected; + } + + return Object.keys(state).length > 0 ? state : undefined; +} + +function stableSignature(params: { + role?: RuntimeElementRoleV1; + label?: string; + value?: string; + identifier?: string; + path: string; + frame: Frame; +}): string { + return hashString(JSON.stringify(params)); +} + +function normalizeNode(input: NormalizedNodeInput, index: number): RuntimeSnapshotElementRecord { + const { node, path, depth } = input; + const ref = `e${index + 1}`; + const frame = readFrame(node); + const label = readText(node, ['AXLabel', 'title', 'help', 'label']); + const value = readText(node, ['AXValue', 'value']); + const identifier = readText(node, ['AXUniqueId', 'AXIdentifier', 'identifier', 'id']); + const role = deriveRole(node, identifier); + const enabled = node.enabled !== false; + const customActions = normalizeCustomActions(node.custom_actions); + const actions = deriveActions({ + role, + enabled, + frame, + customActions, + hasSemanticIdentity: + label !== undefined || + value !== undefined || + (identifier !== undefined && !isGenericInternalIdentifier(identifier)), + }); + const state = readState(node, frame); + + return { + publicElement: { + ref, + ...(role ? { role } : {}), + ...(label ? { label } : {}), + ...(value ? { value } : {}), + ...(identifier ? { identifier } : {}), + frame, + ...(state ? { state } : {}), + actions, + }, + metadata: { + path, + depth, + childCount: readChildren(node).length, + signature: stableSignature({ role, label, value, identifier, path, frame }), + }, + rawNode: node, + }; +} + +function isContainerRole(role: RuntimeElementRoleV1 | undefined): boolean { + return ( + role === 'application' || + role === 'window' || + role === 'scroll-view' || + role === 'list' || + role === 'other' + ); +} + +function isDescendantPath(parentPath: string, candidatePath: string): boolean { + return candidatePath.startsWith(`${parentPath}.`); +} + +function isLargeEnoughInferredScrollContainer( + role: RuntimeElementRoleV1 | undefined, + frame: Frame, +): boolean { + if (role !== 'other') { + return true; + } + return frame.width >= 120 && frame.height >= 120; +} + +function frameOverflowsContainer(frame: Frame, containerFrame: Frame): boolean { + const tolerance = 8; + return ( + frame.x < containerFrame.x - tolerance || + frame.y < containerFrame.y - tolerance || + frame.x + frame.width > containerFrame.x + containerFrame.width + tolerance || + frame.y + frame.height > containerFrame.y + containerFrame.height + tolerance + ); +} + +function frameVerticallyOverflowsContainer(frame: Frame, containerFrame: Frame): boolean { + const tolerance = 8; + return ( + frame.y < containerFrame.y - tolerance || + frame.y + frame.height > containerFrame.y + containerFrame.height + tolerance + ); +} + +function hasPublicSemanticIdentity(element: RuntimeElementV1): boolean { + return ( + element.label !== undefined || + element.value !== undefined || + (element.identifier !== undefined && !isGenericInternalIdentifier(element.identifier)) + ); +} + +function isTopLevelViewportElement(element: RuntimeSnapshotElementRecord): boolean { + const { role } = element.publicElement; + return (role === 'application' || role === 'window') && !element.metadata.path.includes('.'); +} + +function hasSemanticVerticalOverflowingDescendant( + element: RuntimeSnapshotElementRecord, + elements: RuntimeSnapshotElementRecord[], +): boolean { + return elements.some((candidate) => { + if ( + candidate === element || + !isDescendantPath(element.metadata.path, candidate.metadata.path) + ) { + return false; + } + return ( + hasPublicSemanticIdentity(candidate.publicElement) && + isVisible(candidate.publicElement.frame) && + frameVerticallyOverflowsContainer(candidate.publicElement.frame, element.publicElement.frame) + ); + }); +} + +function hasPreferredDescendantSwipeTarget( + element: RuntimeSnapshotElementRecord, + elements: RuntimeSnapshotElementRecord[], +): boolean { + return elements.some( + (candidate) => + candidate !== element && + isDescendantPath(element.metadata.path, candidate.metadata.path) && + isPreferredSwipeTarget(candidate), + ); +} + +function createViewportSwipeFrame(viewportFrame: Frame): Frame { + return normalizeFrame(viewportFrame); +} + +function isSheetGrabberElement(element: RuntimeSnapshotElementRecord): boolean { + return element.publicElement.label?.toLowerCase() === 'sheet grabber'; +} + +function findSheetGrabberDescendant( + element: RuntimeSnapshotElementRecord, + elements: RuntimeSnapshotElementRecord[], +): RuntimeSnapshotElementRecord | null { + return ( + elements.find( + (candidate) => + candidate !== element && + isDescendantPath(element.metadata.path, candidate.metadata.path) && + isSheetGrabberElement(candidate), + ) ?? null + ); +} + +function clamp(value: number, minimum: number, maximum: number): number { + return Math.min(Math.max(value, minimum), maximum); +} + +function findViewportFrame(elements: RuntimeSnapshotElementRecord[]): Frame | null { + return ( + elements.find( + (element) => + (element.publicElement.role === 'application' || element.publicElement.role === 'window') && + isVisible(element.publicElement.frame), + )?.publicElement.frame ?? null + ); +} + +function applyViewportVisibility(elements: RuntimeSnapshotElementRecord[]): void { + const viewport = findViewportFrame(elements); + if (!viewport) { + return; + } + + for (const element of elements) { + const publicElement = element.publicElement; + if (publicElement.role === 'application' || publicElement.role === 'window') { + continue; + } + + if (!framesIntersect(publicElement.frame, viewport)) { + publicElement.state = { ...publicElement.state, visible: false }; + publicElement.actions = []; + continue; + } + + const activationPoint = getDefaultRuntimeElementActivationPoint(element); + if (!pointInsideFrame(activationPoint, viewport)) { + publicElement.actions = publicElement.actions.filter((action) => action === 'swipeWithin'); + continue; + } + + const adjustedActivationPoint = getBottomClippedActivationPoint(element, viewport); + if (adjustedActivationPoint) { + element.metadata.activationPoint = adjustedActivationPoint; + } + } +} + +function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): void { + for (const element of elements) { + const { publicElement, metadata } = element; + if ( + !isContainerRole(publicElement.role) || + publicElement.state?.visible === false || + !isVisible(publicElement.frame) || + !isLargeEnoughInferredScrollContainer(publicElement.role, publicElement.frame) + ) { + continue; + } + if (publicElement.actions.includes('swipeWithin')) { + continue; + } + + const sheetGrabber = + publicElement.role === 'application' || publicElement.role === 'window' + ? findSheetGrabberDescendant(element, elements) + : null; + if (sheetGrabber) { + continue; + } + + const hasOverflowingDescendant = elements.some((candidate) => { + if (candidate === element) { + return false; + } + return ( + isDescendantPath(metadata.path, candidate.metadata.path) && + frameOverflowsContainer(candidate.publicElement.frame, publicElement.frame) + ); + }); + + if ( + publicElement.role !== 'application' && + publicElement.role !== 'window' && + hasOverflowingDescendant + ) { + publicElement.actions.push('swipeWithin'); + } + } + + for (const element of elements) { + const { publicElement, metadata } = element; + if ( + !isTopLevelViewportElement(element) || + publicElement.state?.visible === false || + !isVisible(publicElement.frame) || + publicElement.actions.includes('swipeWithin') || + findSheetGrabberDescendant(element, elements) !== null || + hasPreferredDescendantSwipeTarget(element, elements) || + !hasSemanticVerticalOverflowingDescendant(element, elements) + ) { + continue; + } + + publicElement.actions.push('swipeWithin'); + metadata.swipeFrame = createViewportSwipeFrame(publicElement.frame); + } + + pruneGenericFallbackSwipeTargets(elements); +} + +function isUnidentifiedOtherSwipeTarget(element: RuntimeSnapshotElementRecord): boolean { + const publicElement = element.publicElement; + return ( + publicElement.role === 'other' && + publicElement.actions.includes('swipeWithin') && + !publicElement.label && + !publicElement.value && + !publicElement.identifier + ); +} + +function isPreferredSwipeTarget(element: RuntimeSnapshotElementRecord): boolean { + const publicElement = element.publicElement; + if (!publicElement.actions.includes('swipeWithin')) { + return false; + } + return !isUnidentifiedOtherSwipeTarget(element); +} + +function pruneGenericFallbackSwipeTargets(elements: RuntimeSnapshotElementRecord[]): void { + if (!elements.some(isPreferredSwipeTarget)) { + return; + } + + for (const element of elements) { + if (!isUnidentifiedOtherSwipeTarget(element)) { + continue; + } + element.publicElement.actions = element.publicElement.actions.filter( + (action) => action !== 'swipeWithin', + ); + } +} + +function flattenHierarchy(roots: AccessibilityNode[]): NormalizedNodeInput[] { + const flattened: NormalizedNodeInput[] = []; + + function visit(node: AccessibilityNode, path: string, depth: number): void { + flattened.push({ node, path, depth }); + readChildren(node).forEach((child, index) => visit(child, `${path}.${index}`, depth + 1)); + } + + roots.forEach((root, index) => visit(root, String(index), 0)); + return flattened; +} + +function toActionHints(elements: readonly RuntimeElementV1[]): RuntimeActionHintV1[] { + return elements.flatMap((element) => + element.actions.map((action) => ({ + action, + elementRef: element.ref, + ...(element.label ? { label: element.label } : {}), + })), + ); +} + +function createScreenHash(params: { + elements: readonly RuntimeElementV1[]; + actions: readonly RuntimeActionHintV1[]; +}): string { + return hashString( + JSON.stringify({ + protocol: RUNTIME_SNAPSHOT_PROTOCOL, + elements: params.elements, + actions: params.actions, + }), + ); +} + +export function extractAccessibilityHierarchy(responseText: string): AccessibilityNode[] { + let parsed: unknown; + try { + parsed = JSON.parse(responseText) as unknown; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new RuntimeSnapshotParseError(`AXe describe-ui returned invalid JSON: ${message}`); + } + + if (Array.isArray(parsed)) { + return parsed as AccessibilityNode[]; + } + + if (isRecord(parsed) && Array.isArray(parsed.elements)) { + return parsed.elements as AccessibilityNode[]; + } + + throw new RuntimeSnapshotParseError( + 'AXe describe-ui did not return an accessibility element array.', + ); +} + +export function createRuntimeSnapshotRecord(params: { + simulatorId: string; + uiHierarchy: AccessibilityNode[]; + nowMs?: number; + seq?: number; +}): RuntimeSnapshotRecord { + const capturedAtMs = params.nowMs ?? Date.now(); + const expiresAtMs = capturedAtMs + RUNTIME_SNAPSHOT_TTL_MS; + const elements = flattenHierarchy(params.uiHierarchy).map((input, index) => + normalizeNode(input, index), + ); + applyViewportVisibility(elements); + inferScrollableContainers(elements); + const publicElements = elements.map((element) => element.publicElement); + const actions = toActionHints(publicElements); + const screenHash = createScreenHash({ elements: publicElements, actions }); + const seq = params.seq ?? 0; + const elementsByRef = new Map(elements.map((element) => [element.publicElement.ref, element])); + const payload: RuntimeSnapshotV1 = { + type: 'runtime-snapshot', + protocol: RUNTIME_SNAPSHOT_PROTOCOL, + simulatorId: params.simulatorId, + screenHash, + seq, + capturedAtMs, + expiresAtMs, + elements: publicElements, + actions, + }; + + return { + simulatorId: params.simulatorId, + screenHash, + seq, + capturedAtMs, + expiresAtMs, + payload, + elements, + elementsByRef, + }; +} + +export function parseRuntimeSnapshotResponse(params: { + simulatorId: string; + responseText: string; + nowMs?: number; + allowEmpty?: boolean; +}): RuntimeSnapshotRecord { + const uiHierarchy = extractAccessibilityHierarchy(params.responseText); + if (uiHierarchy.length === 0 && params.allowEmpty !== true) { + throw new RuntimeSnapshotParseError( + 'AXe describe-ui returned an empty accessibility element array.', + ); + } + + return createRuntimeSnapshotRecord({ + simulatorId: params.simulatorId, + uiHierarchy, + nowMs: params.nowMs, + }); +} + +export function getPrimaryRuntimeElement( + snapshot: RuntimeSnapshotV1, + action: RuntimeActionNameV1 = 'tap', +): RuntimeElementV1 | null { + return ( + snapshot.elements.find((element) => element.actions.includes(action)) ?? + snapshot.elements[0] ?? + null + ); +} + +export function getRuntimeElementCenter(element: RuntimeSnapshotElementRecord): Point { + const { frame } = element.publicElement; + return { + x: Math.round(frame.x + frame.width / 2), + y: Math.round(frame.y + frame.height / 2), + }; +} + +function getDefaultRuntimeElementActivationPoint(element: RuntimeSnapshotElementRecord): Point { + const { frame, role } = element.publicElement; + if (role === 'switch' && frame.width > 120) { + return { + x: Math.round(frame.x + frame.width - 52), + y: Math.round(frame.y + frame.height / 2), + }; + } + + return getRuntimeElementCenter(element); +} + +function getBottomClippedActivationPoint( + element: RuntimeSnapshotElementRecord, + viewport: Frame, +): Point | null { + if (!hasPointAction(element.publicElement.actions)) { + return null; + } + + const defaultPoint = getDefaultRuntimeElementActivationPoint(element); + const bottomClippedZoneStart = viewport.y + viewport.height * 0.93; + if (defaultPoint.y < bottomClippedZoneStart) { + return null; + } + + const { frame } = element.publicElement; + const verticalOffset = Math.min(Math.max(frame.height * 0.1, 8), frame.height / 2); + const adjustedPoint = { + x: defaultPoint.x, + y: Math.round(frame.y + verticalOffset), + }; + + if (!pointInsideFrame(adjustedPoint, frame) || !pointInsideFrame(adjustedPoint, viewport)) { + return null; + } + + return adjustedPoint; +} + +export function getRuntimeElementActivationPoint(element: RuntimeSnapshotElementRecord): Point { + return element.metadata.activationPoint ?? getDefaultRuntimeElementActivationPoint(element); +} + +export type RuntimeSwipeDirection = 'up' | 'down' | 'left' | 'right'; + +export type RuntimeSwipePointResolution = + | { ok: true; from: Point; to: Point } + | { ok: false; message: string }; + +function isDegenerateSwipe(from: Point, to: Point): boolean { + return from.x === to.x && from.y === to.y; +} + +function getFrameCenter(frame: Frame): Point { + return { + x: Math.round(frame.x + frame.width / 2), + y: Math.round(frame.y + frame.height / 2), + }; +} + +function getRuntimeSwipeCenter( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, + swipeFrame: Frame, +): Point { + const center = getFrameCenter(swipeFrame); + const { role } = element.publicElement; + if ( + (role === 'application' || role === 'window') && + (direction === 'left' || direction === 'right') + ) { + return { x: center.x, y: Math.round(swipeFrame.y + swipeFrame.height * 0.6) }; + } + return center; +} + +export function getRuntimeElementSwipePoints( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, + distance = 1, +): RuntimeSwipePointResolution { + const frame = element.metadata.swipeFrame ?? element.publicElement.frame; + if (frame.width < 2 || frame.height < 2) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' is too small for a reliable swipe.`, + }; + } + + const center = getRuntimeSwipeCenter(element, direction, frame); + const horizontalInset = Math.max(1, Math.min(Math.max(frame.width * 0.15, 24), frame.width / 3)); + const verticalInset = Math.max(1, Math.min(Math.max(frame.height * 0.15, 24), frame.height / 3)); + const left = Math.round(frame.x + horizontalInset); + const right = Math.round(frame.x + frame.width - horizontalInset); + const top = Math.round(frame.y + verticalInset); + const bottom = Math.round(frame.y + frame.height - verticalInset); + + const strokeFraction = clamp(distance, 0, 1); + const horizontalCenter = (left + right) / 2; + const verticalCenter = (top + bottom) / 2; + const horizontalHalfStroke = ((right - left) * strokeFraction) / 2; + const verticalHalfStroke = ((bottom - top) * strokeFraction) / 2; + + let points: { from: Point; to: Point }; + switch (direction) { + case 'up': + points = { + from: { x: center.x, y: Math.round(verticalCenter + verticalHalfStroke) }, + to: { x: center.x, y: Math.round(verticalCenter - verticalHalfStroke) }, + }; + break; + case 'down': + points = { + from: { x: center.x, y: Math.round(verticalCenter - verticalHalfStroke) }, + to: { x: center.x, y: Math.round(verticalCenter + verticalHalfStroke) }, + }; + break; + case 'left': + points = { + from: { x: Math.round(horizontalCenter + horizontalHalfStroke), y: center.y }, + to: { x: Math.round(horizontalCenter - horizontalHalfStroke), y: center.y }, + }; + break; + case 'right': + points = { + from: { x: Math.round(horizontalCenter - horizontalHalfStroke), y: center.y }, + to: { x: Math.round(horizontalCenter + horizontalHalfStroke), y: center.y }, + }; + break; + } + + if (isDegenerateSwipe(points.from, points.to)) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' does not provide non-degenerate ${direction} swipe points.`, + }; + } + + return { ok: true, ...points }; +} + +export function getRuntimeElementDirectionalDragPoints( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, + distance = 0.35, + viewportFrame?: Frame, +): RuntimeSwipePointResolution { + const { frame } = element.publicElement; + if (frame.width < 2 || frame.height < 2) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' is too small for a reliable drag.`, + }; + } + + const from = getRuntimeElementActivationPoint(element); + const boundingFrame = viewportFrame ?? frame; + const edgeInset = 24; + const horizontalDistance = Math.max(1, Math.round(boundingFrame.width * clamp(distance, 0, 1))); + const verticalDistance = Math.max(1, Math.round(boundingFrame.height * clamp(distance, 0, 1))); + const minX = Math.round(boundingFrame.x + Math.min(edgeInset, boundingFrame.width / 2)); + const maxX = Math.round( + boundingFrame.x + boundingFrame.width - Math.min(edgeInset, boundingFrame.width / 2), + ); + const minY = Math.round(boundingFrame.y + Math.min(edgeInset, boundingFrame.height / 2)); + const maxY = Math.round( + boundingFrame.y + boundingFrame.height - Math.min(edgeInset, boundingFrame.height / 2), + ); + + let to: Point; + switch (direction) { + case 'up': + to = { x: from.x, y: clamp(from.y - verticalDistance, minY, maxY) }; + break; + case 'down': + to = { x: from.x, y: clamp(from.y + verticalDistance, minY, maxY) }; + break; + case 'left': + to = { x: clamp(from.x - horizontalDistance, minX, maxX), y: from.y }; + break; + case 'right': + to = { x: clamp(from.x + horizontalDistance, minX, maxX), y: from.y }; + break; + } + + if (isDegenerateSwipe(from, to)) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' does not provide non-degenerate ${direction} drag points.`, + }; + } + + return { ok: true, from, to }; +} diff --git a/src/mcp/tools/ui-automation/shared/semantic-tap.ts b/src/mcp/tools/ui-automation/shared/semantic-tap.ts new file mode 100644 index 000000000..f3a92d9a2 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/semantic-tap.ts @@ -0,0 +1,178 @@ +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { executeAxeCommand } from './axe-command.ts'; +import type { AxeHelpers } from './axe-command.ts'; +import { getRuntimeElementActivationPoint } from './runtime-snapshot.ts'; +import type { RuntimeSnapshotElementRecord } from '../../../../types/ui-snapshot.ts'; + +export interface SemanticTapCommand { + selectorArgs: string[] | null; + coordinateArgs: string[]; + primaryArgs: string[]; + targetDescription: string; + usedSelector: boolean; +} + +function axeElementTypeFor(element: RuntimeSnapshotElementRecord): string | null { + switch (element.publicElement.role) { + case 'button': + return 'Button'; + case 'cell': + return 'Cell'; + case 'keyboard-key': + return 'Key'; + case 'switch': + return 'Switch'; + case 'tab': + return 'Tab'; + case 'text-field': + return 'TextField'; + default: + return null; + } +} + +export function isRecoverableAxeSelectorError(error: unknown): boolean { + const messageParts = error instanceof Error ? [error.message] : [String(error)]; + if (typeof error === 'object' && error !== null && 'axeOutput' in error) { + const { axeOutput } = error as { axeOutput?: unknown }; + if (typeof axeOutput === 'string') { + messageParts.push(axeOutput); + } + } + + const message = messageParts.join('\n'); + return ( + /multiple(?:\s+\(?\d+\)?)?\s+accessibility\s+elements\s+matched/i.test(message) || + /no\s+accessibility\s+element\s+matched/i.test(message) + ); +} + +function hasDuplicateSelectorMatch(params: { + element: RuntimeSnapshotElementRecord; + elements: readonly RuntimeSnapshotElementRecord[]; + selector: 'identifier' | 'label' | 'value'; + value: string; +}): boolean { + const targetType = axeElementTypeFor(params.element); + const matches = params.elements.filter((candidate) => { + if (axeElementTypeFor(candidate) !== targetType) { + return false; + } + return candidate.publicElement[params.selector] === params.value; + }); + + return matches.length > 1; +} + +function pickSemanticTapSelectorArgs(params: { + element: RuntimeSnapshotElementRecord; + elements: readonly RuntimeSnapshotElementRecord[]; + elementTypeArgs: readonly string[]; + extraArgs: readonly string[]; +}): string[] | null { + const { element, elements, elementTypeArgs, extraArgs } = params; + const { identifier, label, value } = element.publicElement; + + if (element.publicElement.role === 'switch') return null; + if ( + identifier && + !hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier }) + ) { + return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs]; + } + if (label && !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label })) { + return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs]; + } + if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) { + return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs]; + } + return null; +} + +export function createSemanticTapCommand( + element: RuntimeSnapshotElementRecord, + elementRef: string, + extraArgs: readonly string[] = [], + elements: readonly RuntimeSnapshotElementRecord[] = [element], +): SemanticTapCommand { + const activationPoint = getRuntimeElementActivationPoint(element); + const elementType = axeElementTypeFor(element); + const elementTypeArgs = elementType ? ['--element-type', elementType] : []; + const coordinateArgs = + element.publicElement.role === 'switch' + ? [ + 'touch', + '-x', + String(activationPoint.x), + '-y', + String(activationPoint.y), + '--down', + '--up', + ] + : ['tap', '-x', String(activationPoint.x), '-y', String(activationPoint.y), ...extraArgs]; + + const selectorArgs = pickSemanticTapSelectorArgs({ + element, + elements, + elementTypeArgs, + extraArgs, + }); + + return { + selectorArgs, + coordinateArgs, + primaryArgs: selectorArgs ?? coordinateArgs, + targetDescription: selectorArgs + ? `elementRef ${elementRef} semantic selector` + : `elementRef ${elementRef} activation point (${activationPoint.x}, ${activationPoint.y})`, + usedSelector: selectorArgs !== null, + }; +} + +function readAxeCommandName(args: readonly string[]): string { + const commandName = args[0]; + if (!commandName) { + throw new Error('Semantic tap command has no AXe command name.'); + } + return commandName; +} + +export function createSemanticTapBatchSteps(command: SemanticTapCommand): string[] { + if (command.coordinateArgs[0] !== 'touch') { + return [command.coordinateArgs.join(' ')]; + } + + const baseArgs = command.coordinateArgs.filter((arg) => arg !== '--down' && arg !== '--up'); + return [`${baseArgs.join(' ')} --down`, `${baseArgs.join(' ')} --up`]; +} + +export async function executeSemanticTapWithAmbiguityFallback(params: { + command: SemanticTapCommand; + simulatorId: string; + executor: CommandExecutor; + axeHelpers: AxeHelpers; +}): Promise { + const { command, simulatorId, executor, axeHelpers } = params; + + try { + await executeAxeCommand( + command.primaryArgs, + simulatorId, + readAxeCommandName(command.primaryArgs), + executor, + axeHelpers, + ); + } catch (error) { + if (!command.selectorArgs || !isRecoverableAxeSelectorError(error)) { + throw error; + } + + await executeAxeCommand( + command.coordinateArgs, + simulatorId, + readAxeCommandName(command.coordinateArgs), + executor, + axeHelpers, + ); + } +} diff --git a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts index cd0fa28c6..dc5801d2d 100644 --- a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts +++ b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts @@ -1,21 +1,144 @@ -const SNAPSHOT_UI_WARNING_TIMEOUT_MS = 60000; // 60 seconds +import type { + RuntimeActionNameV1, + RuntimeElementResolution, + RuntimeSnapshotLookup, + RuntimeSnapshotRecord, + UiAutomationRecoverableError, +} from '../../../../types/ui-snapshot.ts'; -const snapshotUiTimestamps = new Map(); +const runtimeSnapshots = new Map(); +const runtimeSnapshotSeqs = new Map(); -export function recordSnapshotUiCall(simulatorId: string): void { - snapshotUiTimestamps.set(simulatorId, Date.now()); +function snapshotAgeMs(snapshot: RuntimeSnapshotRecord, nowMs: number): number { + return Math.max(0, nowMs - snapshot.capturedAtMs); +} + +function snapshotMissingError(): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_MISSING', + message: 'No runtime UI snapshot is available for this simulator.', + recoveryHint: + 'Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot.', + }; +} + +function snapshotExpiredError(snapshotAgeMs: number): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_EXPIRED', + message: 'The runtime UI snapshot for this simulator has expired.', + recoveryHint: 'Run snapshot_ui again and retry with a current elementRef.', + snapshotAgeMs, + }; +} + +export function recordRuntimeSnapshot(snapshot: RuntimeSnapshotRecord): RuntimeSnapshotRecord { + const nextSeq = (runtimeSnapshotSeqs.get(snapshot.simulatorId) ?? 0) + 1; + runtimeSnapshotSeqs.set(snapshot.simulatorId, nextSeq); + snapshot.seq = nextSeq; + snapshot.payload.seq = nextSeq; + runtimeSnapshots.set(snapshot.simulatorId, snapshot); + return snapshot; +} + +export function clearRuntimeSnapshot(simulatorId: string): void { + runtimeSnapshots.delete(simulatorId); +} + +export function __resetRuntimeSnapshotStoreForTests(): void { + runtimeSnapshots.clear(); + runtimeSnapshotSeqs.clear(); +} + +export function getRuntimeSnapshotLookup( + simulatorId: string, + nowMs = Date.now(), +): RuntimeSnapshotLookup { + const snapshot = runtimeSnapshots.get(simulatorId) ?? null; + if (!snapshot) { + return { status: 'missing', snapshot: null }; + } + + const ageMs = snapshotAgeMs(snapshot, nowMs); + if (nowMs > snapshot.expiresAtMs) { + runtimeSnapshots.delete(simulatorId); + return { status: 'expired', snapshot: null, snapshotAgeMs: ageMs }; + } + + return { status: 'available', snapshot, snapshotAgeMs: ageMs }; +} + +export function getRuntimeSnapshot( + simulatorId: string, + nowMs = Date.now(), +): RuntimeSnapshotRecord | null { + return getRuntimeSnapshotLookup(simulatorId, nowMs).snapshot; +} + +export function resolveElementRef( + simulatorId: string, + elementRef: string, + requiredAction: RuntimeActionNameV1, + nowMs = Date.now(), +): RuntimeElementResolution { + const lookup = getRuntimeSnapshotLookup(simulatorId, nowMs); + if (lookup.status === 'missing') { + return { ok: false, error: snapshotMissingError() }; + } + + if (lookup.status === 'expired') { + return { ok: false, error: snapshotExpiredError(lookup.snapshotAgeMs ?? 0) }; + } + + const snapshot = lookup.snapshot; + if (!snapshot) { + throw new Error('Runtime snapshot lookup returned an available status without a snapshot.'); + } + const ageMs = lookup.snapshotAgeMs ?? 0; + const element = snapshot.elementsByRef.get(elementRef); + if (!element) { + return { + ok: false, + error: { + code: 'ELEMENT_REF_NOT_FOUND', + message: `Element ref '${elementRef}' was not found in the current runtime UI snapshot.`, + recoveryHint: + 'Run snapshot_ui again and retry with an elementRef from the latest snapshot.', + elementRef, + snapshotAgeMs: ageMs, + }, + }; + } + + if (!element.publicElement.actions.includes(requiredAction)) { + return { + ok: false, + error: { + code: 'TARGET_NOT_ACTIONABLE', + message: `Element ref '${elementRef}' does not support '${requiredAction}'.`, + recoveryHint: + 'Choose an elementRef that lists the required action, or refresh with snapshot_ui.', + elementRef, + candidates: snapshot.payload.elements.filter((candidate) => + candidate.actions.includes(requiredAction), + ), + snapshotAgeMs: ageMs, + }, + }; + } + + return { ok: true, snapshot, element, snapshotAgeMs: ageMs }; } export function getSnapshotUiWarning(simulatorId: string): string | null { - const timestamp = snapshotUiTimestamps.get(simulatorId); - if (!timestamp) { - return 'Warning: snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots.'; + const lookup = getRuntimeSnapshotLookup(simulatorId); + + if (lookup.status === 'missing') { + return 'Warning: snapshot_ui has not been called yet. Consider using snapshot_ui to capture semantic element references before interacting with the UI.'; } - const timeSinceDescribe = Date.now() - timestamp; - if (timeSinceDescribe > SNAPSHOT_UI_WARNING_TIMEOUT_MS) { - const secondsAgo = Math.round(timeSinceDescribe / 1000); - return `Warning: snapshot_ui was last called ${secondsAgo} seconds ago. Consider refreshing UI coordinates with snapshot_ui instead of using potentially stale coordinates.`; + if (lookup.status === 'expired') { + const secondsAgo = Math.round((lookup.snapshotAgeMs ?? 0) / 1000); + return `Warning: snapshot_ui was last called ${secondsAgo} seconds ago. Refresh UI element references with snapshot_ui before interacting with the UI.`; } return null; diff --git a/src/mcp/tools/ui-automation/shared/wait-predicate.ts b/src/mcp/tools/ui-automation/shared/wait-predicate.ts new file mode 100644 index 000000000..234573a26 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/wait-predicate.ts @@ -0,0 +1,364 @@ +import type { + RuntimeElementRoleV1, + RuntimeElementV1, + RuntimeSnapshotRecord, + UiAutomationRecoverableError, +} from '../../../../types/ui-snapshot.ts'; +import { getRuntimeSnapshotLookup } from './snapshot-ui-state.ts'; + +export const waitPredicates = [ + 'exists', + 'gone', + 'enabled', + 'focused', + 'textContains', + 'settled', +] as const; + +export type WaitPredicate = (typeof waitPredicates)[number]; +export type SelectorPredicate = Exclude; + +export interface WaitSelector { + elementRef?: string; + identifier?: string; + label?: string; + role?: RuntimeElementRoleV1; + value?: string; +} + +export interface ResolvedWaitSelector { + sourceElementRef?: string; + identifier?: string; + label?: string; + role?: RuntimeElementRoleV1; + value?: string; +} + +export interface WaitEvaluation { + matched: boolean; + candidates?: RuntimeElementV1[]; + uiError?: UiAutomationRecoverableError; +} + +export interface SettledTracker { + signature: string | null; + stableSinceMs: number | null; +} + +function snapshotMissingError(): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_MISSING', + message: 'No runtime UI snapshot is available for this simulator.', + recoveryHint: + 'Run snapshot_ui for this simulator, then retry wait_for_ui with an elementRef from that snapshot.', + }; +} + +function snapshotExpiredError(snapshotAgeMs: number): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_EXPIRED', + message: 'The runtime UI snapshot for this simulator has expired.', + recoveryHint: 'Run snapshot_ui again and retry wait_for_ui with a current elementRef.', + snapshotAgeMs, + }; +} + +function targetNotFoundError(elementRef: string): UiAutomationRecoverableError { + return { + code: 'TARGET_NOT_FOUND', + message: `Element ref '${elementRef}' cannot be converted into a stable wait selector.`, + recoveryHint: + 'Use an element with an identifier, label, or value, or refresh with snapshot_ui and choose a more stable target.', + elementRef, + }; +} + +function normalizedText(value: string | undefined): string { + return value?.replace(/\s+/g, ' ').trim() ?? ''; +} + +function elementTextContains(element: RuntimeElementV1, text: string): boolean { + const needle = normalizedText(text).toLowerCase(); + if (needle.length === 0) { + return false; + } + return ( + normalizedText(element.value).toLowerCase().includes(needle) || + normalizedText(element.label).toLowerCase().includes(needle) + ); +} + +function matchingElementText(element: RuntimeElementV1, text: string): string | null { + const needle = normalizedText(text).toLowerCase(); + if (needle.length === 0) { + return null; + } + + const value = normalizedText(element.value).toLowerCase(); + if (value.includes(needle)) { + return value; + } + + const label = normalizedText(element.label).toLowerCase(); + if (label.includes(needle)) { + return label; + } + + return null; +} + +function candidatesShareMatchingText(candidates: RuntimeElementV1[], text: string): boolean { + const [first, ...remaining] = candidates.map((candidate) => matchingElementText(candidate, text)); + return first !== null && remaining.every((candidateText) => candidateText === first); +} + +function elementSignatures(snapshot: RuntimeSnapshotRecord): string { + return snapshot.elements.map((element) => element.metadata.signature).join('|'); +} + +export function hasSelectorFields(selector: WaitSelector): boolean { + return Boolean( + selector.elementRef || selector.identifier || selector.label || selector.role || selector.value, + ); +} + +export function selectorFromParams(selector: WaitSelector): ResolvedWaitSelector | null { + const resolved: ResolvedWaitSelector = { + ...(selector.identifier ? { identifier: selector.identifier } : {}), + ...(selector.label ? { label: selector.label } : {}), + ...(selector.role ? { role: selector.role } : {}), + ...(selector.value ? { value: selector.value } : {}), + }; + + return hasSelectorFields(resolved) ? resolved : null; +} + +export function resolveElementSelector( + simulatorId: string, + elementRef: string, + nowMs: number, +): + | { ok: true; selector: ResolvedWaitSelector } + | { ok: false; error: UiAutomationRecoverableError } { + const lookup = getRuntimeSnapshotLookup(simulatorId, nowMs); + if (lookup.status === 'missing') { + return { ok: false, error: snapshotMissingError() }; + } + + if (lookup.status === 'expired') { + return { ok: false, error: snapshotExpiredError(lookup.snapshotAgeMs ?? 0) }; + } + + const snapshot = lookup.snapshot; + const element = snapshot?.elementsByRef.get(elementRef); + if (!snapshot || !element) { + return { + ok: false, + error: { + code: 'ELEMENT_REF_NOT_FOUND', + message: `Element ref '${elementRef}' was not found in the current runtime UI snapshot.`, + recoveryHint: + 'Run snapshot_ui again and retry wait_for_ui with an elementRef from the latest snapshot.', + elementRef, + snapshotAgeMs: lookup.snapshotAgeMs ?? 0, + }, + }; + } + + const publicElement = element.publicElement; + if (publicElement.identifier) { + return { + ok: true, + selector: { sourceElementRef: elementRef, identifier: publicElement.identifier }, + }; + } + + if (publicElement.label && publicElement.role) { + return { + ok: true, + selector: { + sourceElementRef: elementRef, + label: publicElement.label, + role: publicElement.role, + }, + }; + } + + if (publicElement.value && publicElement.role) { + return { + ok: true, + selector: { + sourceElementRef: elementRef, + value: publicElement.value, + role: publicElement.role, + }, + }; + } + + return { ok: false, error: targetNotFoundError(elementRef) }; +} + +function matchSelector( + snapshot: RuntimeSnapshotRecord, + selector: ResolvedWaitSelector, +): RuntimeElementV1[] { + return snapshot.elements + .map((element) => element.publicElement) + .filter((element) => { + if (selector.identifier !== undefined && element.identifier !== selector.identifier) + return false; + if (selector.label !== undefined && element.label !== selector.label) return false; + if (selector.role !== undefined && element.role !== selector.role) return false; + if (selector.value !== undefined && element.value !== selector.value) return false; + return true; + }); +} + +function ambiguousSelectorError( + selector: ResolvedWaitSelector, + candidates: RuntimeElementV1[], +): UiAutomationRecoverableError { + return { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: + 'Retry with the intended candidate elementRef from this result, or narrow the selector with role, label, value, or identifier. Refresh with snapshot_ui only if the refs are stale.', + ...(selector.sourceElementRef ? { elementRef: selector.sourceElementRef } : {}), + candidates, + }; +} + +function focusedStateUnavailableError( + selector: ResolvedWaitSelector, + candidate: RuntimeElementV1, +): UiAutomationRecoverableError { + return { + code: 'TARGET_NOT_ACTIONABLE', + message: 'The matched runtime UI element does not expose focus state.', + recoveryHint: + 'Use exists, enabled, textContains, or a screenshot-based check for this element instead of focused.', + ...(selector.sourceElementRef ? { elementRef: selector.sourceElementRef } : {}), + candidates: [candidate], + }; +} + +export function evaluateTextContainsPredicate(params: { + snapshot: RuntimeSnapshotRecord; + text: string; +}): WaitEvaluation { + const candidates = params.snapshot.elements + .map((element) => element.publicElement) + .filter((element) => elementTextContains(element, params.text)); + + if (candidates.length > 1) { + if (candidatesShareMatchingText(candidates, params.text)) { + return { matched: true, candidates }; + } + return { + matched: false, + candidates, + uiError: ambiguousSelectorError({}, candidates), + }; + } + + return { matched: candidates.length === 1, candidates }; +} + +export function evaluateElementPredicate(params: { + predicate: SelectorPredicate; + selector: ResolvedWaitSelector; + snapshot: RuntimeSnapshotRecord; + text?: string; +}): WaitEvaluation { + const { predicate, selector, snapshot, text } = params; + const candidates = matchSelector(snapshot, selector); + + if (predicate === 'exists') { + return { matched: candidates.length > 0, candidates }; + } + + if (predicate === 'gone') { + const goneCandidates = text + ? candidates.filter((candidate) => elementTextContains(candidate, text)) + : candidates; + return { matched: goneCandidates.length === 0, candidates: goneCandidates }; + } + + if (predicate === 'textContains') { + const textMatches = candidates.filter((candidate) => + elementTextContains(candidate, text ?? ''), + ); + if (textMatches.length > 1) { + if (candidatesShareMatchingText(textMatches, text ?? '')) { + return { matched: true, candidates: textMatches }; + } + return { + matched: false, + candidates: textMatches, + uiError: ambiguousSelectorError(selector, textMatches), + }; + } + return { matched: textMatches.length === 1, candidates: textMatches }; + } + + if (candidates.length > 1) { + return { matched: false, candidates, uiError: ambiguousSelectorError(selector, candidates) }; + } + + const match = candidates[0]; + if (!match) { + return { matched: false, candidates }; + } + + switch (predicate) { + case 'enabled': + return { matched: match.state?.enabled === true, candidates }; + case 'focused': + if (match.state?.focused === undefined) { + return { + matched: false, + candidates, + uiError: focusedStateUnavailableError(selector, match), + }; + } + return { matched: match.state.focused === true, candidates }; + } +} + +export function evaluateSettledPredicate(params: { + snapshot: RuntimeSnapshotRecord; + nowMs: number; + settledDurationMs: number; + tracker: SettledTracker; +}): boolean { + const signature = elementSignatures(params.snapshot); + if (params.tracker.signature !== signature) { + params.tracker.signature = signature; + params.tracker.stableSinceMs = params.nowMs; + return params.settledDurationMs === 0; + } + + const stableSinceMs = params.tracker.stableSinceMs ?? params.nowMs; + params.tracker.stableSinceMs = stableSinceMs; + return params.nowMs - stableSinceMs >= params.settledDurationMs; +} + +export function createWaitTimeoutError(params: { + predicate: WaitPredicate; + timeoutMs: number; + selector?: ResolvedWaitSelector; + candidates?: RuntimeElementV1[]; +}): UiAutomationRecoverableError { + const recoveryHint = params.selector + ? 'Selector fields match exact values. Use textContains for partial visible text, inspect the latest runtime snapshot, or adjust the wait selector.' + : 'Inspect the latest runtime snapshot, adjust the wait selector, or retry later.'; + + return { + code: 'WAIT_TIMEOUT', + message: `Timed out after ${params.timeoutMs}ms waiting for UI predicate '${params.predicate}'.`, + recoveryHint, + timeoutMs: params.timeoutMs, + ...(params.selector?.sourceElementRef ? { elementRef: params.selector.sourceElementRef } : {}), + ...(params.candidates !== undefined ? { candidates: params.candidates } : {}), + }; +} diff --git a/src/mcp/tools/ui-automation/snapshot_ui.ts b/src/mcp/tools/ui-automation/snapshot_ui.ts index 6a5558dcc..843c1941a 100644 --- a/src/mcp/tools/ui-automation/snapshot_ui.ts +++ b/src/mcp/tools/ui-automation/snapshot_ui.ts @@ -11,23 +11,31 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { recordSnapshotUiCall } from './shared/snapshot-ui-state.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; -import type { - AccessibilityNode, - CaptureResultDomainResult, -} from '../../../types/domain-results.ts'; +import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { RuntimeSnapshotV1 } from '../../../types/ui-snapshot.ts'; +import { createRuntimeSnapshotNextSteps } from './shared/runtime-next-steps.ts'; import { createCaptureFailureResult, createCaptureSuccessResult, mapAxeCommandError, setCaptureStructuredOutput, } from './shared/domain-result.ts'; +import { + parseRuntimeSnapshotResponse, + RuntimeSnapshotParseError, +} from './shared/runtime-snapshot.ts'; const snapshotUiSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + sinceScreenHash: z + .string() + .min(1, 'sinceScreenHash must not be empty') + .optional() + .describe('Return an unchanged response when the current screen hash matches this value'), }); type SnapshotUiParams = z.infer; @@ -35,26 +43,6 @@ type SnapshotUiResult = CaptureResultDomainResult; const LOG_PREFIX = '[AXe]'; -function parseUiHierarchy(responseText: string): AccessibilityNode[] | undefined { - try { - const parsed = JSON.parse(responseText) as unknown; - if (Array.isArray(parsed)) { - return parsed as AccessibilityNode[]; - } - if ( - parsed && - typeof parsed === 'object' && - 'elements' in parsed && - Array.isArray((parsed as { elements?: unknown }).elements) - ) { - return (parsed as { elements: AccessibilityNode[] }).elements; - } - } catch { - // ignore - } - return undefined; -} - export function createSnapshotUiExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, @@ -71,7 +59,14 @@ export function createSnapshotUiExecutor( toolName, }); if (guard.blockedMessage) { - return createCaptureFailureResult(simulatorId, guard.blockedMessage); + return createCaptureFailureResult(simulatorId, guard.blockedMessage, { + uiError: { + code: 'ACTION_FAILED', + message: guard.blockedMessage, + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }, + }); } log('info', `${LOG_PREFIX}/${toolName}: Starting for ${simulatorId}`); @@ -85,20 +80,45 @@ export function createSnapshotUiExecutor( axeHelpers, ); - recordSnapshotUiCall(simulatorId); + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId, + responseText, + allowEmpty: true, + }); + recordRuntimeSnapshot(snapshot); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - const uiHierarchy = parseUiHierarchy(responseText); + if (params.sinceScreenHash === snapshot.screenHash) { + return createCaptureSuccessResult(simulatorId, { + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId, + screenHash: snapshot.screenHash, + seq: snapshot.seq, + }, + warnings: [guard.warningText], + }); + } + return createCaptureSuccessResult(simulatorId, { - capture: uiHierarchy - ? { - type: 'ui-hierarchy', - uiHierarchy, - } - : undefined, + capture: snapshot.payload, warnings: [guard.warningText], }); } catch (error) { + if (error instanceof RuntimeSnapshotParseError) { + const message = 'Failed to parse runtime UI snapshot.'; + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${message}`); + return createCaptureFailureResult(simulatorId, message, { + details: [error.message], + uiError: { + code: 'SNAPSHOT_PARSE_FAILED', + message, + recoveryHint: 'Run snapshot_ui again after the app is fully launched and responsive.', + }, + }); + } + const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to get accessibility hierarchy.', }); @@ -122,11 +142,22 @@ export async function snapshot_uiLogic( setCaptureStructuredOutput(ctx, result); - ctx.nextStepParams = { - snapshot_ui: { simulatorId: params.simulatorId }, - tap: { simulatorId: params.simulatorId, x: 0, y: 0 }, - screenshot: { simulatorId: params.simulatorId }, - }; + if (!result.didError && result.capture && 'type' in result.capture) { + let runtimeSnapshot: RuntimeSnapshotV1 | undefined; + if (result.capture.type === 'runtime-snapshot') { + runtimeSnapshot = result.capture; + } else if (result.capture.type === 'runtime-snapshot-unchanged') { + runtimeSnapshot = getRuntimeSnapshot(params.simulatorId)?.payload; + } + + if (runtimeSnapshot) { + ctx.nextSteps = createRuntimeSnapshotNextSteps({ + simulatorId: params.simulatorId, + runtimeSnapshot, + includeRefreshAndWait: true, + }); + } + } } const publicSchemaObject = z.strictObject( diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index 145f32a4f..5d8ad46f6 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -1,7 +1,7 @@ /** * UI Testing Plugin: Swipe * - * Swipe from one coordinate to another on iOS simulator with customizable duration and delta. + * Swipes within a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -17,8 +17,10 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementSwipePoints } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -26,30 +28,37 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const swipeSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x1: z.number().int({ message: 'Start X coordinate' }), - y1: z.number().int({ message: 'Start Y coordinate' }), - x2: z.number().int({ message: 'End X coordinate' }), - y2: z.number().int({ message: 'End Y coordinate' }), + withinElementRef: z.string().min(1, { message: 'withinElementRef must be non-empty' }), + direction: z.enum(['up', 'down', 'left', 'right']).describe('up|down|left|right'), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) .optional() .describe('seconds'), - delta: z.number().min(0, { message: 'Delta must be non-negative' }).optional(), + distance: z + .number() + .positive({ message: 'Distance must be greater than 0' }) + .max(1, { message: 'Distance must be at most 1' }) + .optional() + .describe('Normalized stroke fraction greater than 0 and up to 1'), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('seconds'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); @@ -68,41 +77,63 @@ export function createSwipeExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'swipe'; - const { simulatorId, x1, y1, x2, y2, duration, delta, preDelay, postDelay } = params; - const baseAction = { type: 'swipe' as const }; - const fullAction = { + const { simulatorId, withinElementRef, direction, duration, distance, preDelay, postDelay } = + params; + const unresolvedAction = { type: 'swipe' as const, - from: { x: x1, y: y1 }, - to: { x: x2, y: y2 }, + withinElementRef, + direction, ...(duration !== undefined ? { durationSeconds: duration } : {}), }; + const resolution = resolveElementRef(simulatorId, withinElementRef, 'swipeWithin'); + if (!resolution.ok) { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const points = getRuntimeElementSwipePoints(resolution.element, direction, distance); + if (!points.ok) { + const uiError = createUiAutomationRecoverableError({ + code: 'TARGET_NOT_ACTIONABLE', + message: points.message, + elementRef: withinElementRef, + }); + return createUiActionFailureResult(unresolvedAction, simulatorId, points.message, { + uiError, + }); + } + + const action = { + ...unresolvedAction, + from: points.from, + to: points.to, + }; + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, toolName, }); if (guard.blockedMessage) { - return createUiActionFailureResult(baseAction, simulatorId, guard.blockedMessage); + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } const commandArgs = [ 'swipe', '--start-x', - String(x1), + String(points.from.x), '--start-y', - String(y1), + String(points.from.y), '--end-x', - String(x2), + String(points.to.x), '--end-y', - String(y2), + String(points.to.y), ]; if (duration !== undefined) { commandArgs.push('--duration', String(duration)); } - if (delta !== undefined) { - commandArgs.push('--delta', String(delta)); - } if (preDelay !== undefined) { commandArgs.push('--pre-delay', String(preDelay)); } @@ -110,28 +141,50 @@ export function createSwipeExecutor( commandArgs.push('--post-delay', String(postDelay)); } - const optionsText = duration ? ` duration=${duration}s` : ''; + const optionsText = duration !== undefined ? ` duration=${duration}s` : ''; log( 'info', - `${LOG_PREFIX}/${toolName}: Starting swipe (${x1},${y1})->(${x2},${y2})${optionsText} on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting ${direction} swipe within ${withinElementRef}${optionsText} on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'swipe', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(fullAction, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => 'Failed to simulate swipe.', + axeFailureMessage: () => + `Failed to simulate ${direction} swipe within ${withinElementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); - return createUiActionFailureResult(baseAction, simulatorId, failure.message, { + return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef: withinElementRef, + }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + previousRuntimeSnapshot: resolution.snapshot.payload, + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index 960d8bc7f..5b89b294f 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -11,8 +11,14 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; -import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; +import { defaultAxeHelpers } from './shared/axe-command.ts'; +import { + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, +} from './shared/semantic-tap.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -20,95 +26,42 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; -const baseTapSchema = z.object({ +const tapSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z - .number() - .int({ message: 'X coordinate must be an integer' }) - .optional() - .describe( - 'Fallback tap X coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', - ), - y: z - .number() - .int({ message: 'Y coordinate must be an integer' }) - .optional() - .describe( - 'Fallback tap Y coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', - ), - id: z - .string() - .min(1, { message: 'Id must be non-empty' }) - .optional() - .describe('Recommended tap target: accessibility element id (AXUniqueId).'), - label: z - .string() - .min(1, { message: 'Label must be non-empty' }) - .optional() - .describe('Recommended when unique: accessibility label (AXLabel).'), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('seconds'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); -const tapSchema = baseTapSchema.superRefine((values, ctx) => { - const hasX = values.x !== undefined; - const hasY = values.y !== undefined; - const hasId = values.id !== undefined; - const hasLabel = values.label !== undefined; - - if (!hasX && !hasY && hasId && hasLabel) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['id'], - message: 'Provide either id or label, not both.', - }); - } - - if (hasX !== hasY) { - if (!hasX) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['x'], - message: 'X coordinate is required when y is provided.', - }); - } - if (!hasY) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['y'], - message: 'Y coordinate is required when x is provided.', - }); - } - } - - if (!hasX && !hasY && !hasId && !hasLabel) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['x'], - message: 'Provide an element id/label (recommended) or x/y coordinates as fallback.', - }); - } -}); - type TapParams = z.infer; type TapResult = UiActionResultDomainResult; -const publicSchemaObject = z.strictObject(baseTapSchema.omit({ simulatorId: true } as const).shape); +const publicSchemaObject = z.strictObject(tapSchema.omit({ simulatorId: true } as const).shape); const LOG_PREFIX = '[AXe]'; +function delayMs(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} + export function createTapExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, @@ -116,15 +69,22 @@ export function createTapExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'tap'; - const { simulatorId, x, y, id, label, preDelay, postDelay } = params; - const action = - x !== undefined && y !== undefined - ? { type: 'tap' as const, x, y } - : id !== undefined - ? { type: 'tap' as const, id } - : label !== undefined - ? { type: 'tap' as const, label } - : { type: 'tap' as const }; + const { simulatorId, elementRef, preDelay, postDelay } = params; + const unresolvedAction = { type: 'tap' as const, elementRef }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'tap'); + if (!resolution.ok) { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const activationPoint = getRuntimeElementActivationPoint(resolution.element); + const action = { + ...unresolvedAction, + x: activationPoint.x, + y: activationPoint.y, + }; const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -135,57 +95,74 @@ export function createTapExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - let targetDescription = ''; - let actionDescription = ''; - let usesCoordinates = false; - const commandArgs = ['tap']; - - if (x !== undefined && y !== undefined) { - usesCoordinates = true; - targetDescription = `(${x}, ${y})`; - actionDescription = `Tap at ${targetDescription}`; - commandArgs.push('-x', String(x), '-y', String(y)); - } else if (id !== undefined) { - targetDescription = `element id "${id}"`; - actionDescription = `Tap on ${targetDescription}`; - commandArgs.push('--id', id); - } else if (label !== undefined) { - targetDescription = `element label "${label}"`; - actionDescription = `Tap on ${targetDescription}`; - commandArgs.push('--label', label); - } else { - return createUiActionFailureResult( - action, - simulatorId, - 'Parameter validation failed: Missing tap target', - ); - } - - if (preDelay !== undefined) { - commandArgs.push('--pre-delay', String(preDelay)); + const usesTouchActivation = resolution.element.publicElement.role === 'switch'; + const extraArgs: string[] = []; + if (!usesTouchActivation && preDelay !== undefined) { + extraArgs.push('--pre-delay', String(preDelay)); } - if (postDelay !== undefined) { - commandArgs.push('--post-delay', String(postDelay)); + if (!usesTouchActivation && postDelay !== undefined) { + extraArgs.push('--post-delay', String(postDelay)); } - - log('info', `${LOG_PREFIX}/${toolName}: Starting for ${targetDescription} on ${simulatorId}`); + const tapCommand = createSemanticTapCommand( + resolution.element, + elementRef, + extraArgs, + resolution.snapshot.elements, + ); + + log( + 'info', + `${LOG_PREFIX}/${toolName}: Starting for ${tapCommand.targetDescription} on ${simulatorId}`, + ); try { - await executeAxeCommand(commandArgs, simulatorId, 'tap', executor, axeHelpers); + if (usesTouchActivation && preDelay !== undefined) { + await delayMs(preDelay * 1000); + } + await executeSemanticTapWithAmbiguityFallback({ + command: tapCommand, + simulatorId, + executor, + axeHelpers, + }); + clearRuntimeSnapshot(simulatorId); + if (usesTouchActivation && postDelay !== undefined) { + await delayMs(postDelay * 1000); + } log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [ - guard.warningText, - usesCoordinates ? getSnapshotUiWarning(simulatorId) : null, - ]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => `Failed to simulate ${actionDescription.toLowerCase()}.`, + axeFailureMessage: () => `Failed to simulate tap on elementRef ${elementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: tapCommand.usedSelector ? 'UI_STATE_CHANGED' : 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + previousRuntimeSnapshot: resolution.snapshot.payload, + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } @@ -204,7 +181,7 @@ export async function tapLogic( export const schema = getSessionAwareToolSchemaShape({ sessionAware: publicSchemaObject, - legacy: baseTapSchema, + legacy: tapSchema, }); export const handler = createSessionAwareTool({ diff --git a/src/mcp/tools/ui-automation/touch.ts b/src/mcp/tools/ui-automation/touch.ts index 650dce8e2..27e4bc666 100644 --- a/src/mcp/tools/ui-automation/touch.ts +++ b/src/mcp/tools/ui-automation/touch.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Touch * - * Perform touch down/up events at specific coordinates. - * Use snapshot_ui for precise coordinates (don't guess from screenshots). + * Performs touch down/up events on a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,7 +17,8 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -26,27 +26,43 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; -const touchSchema = z.object({ +const touchSchemaObject = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z.number().int({ message: 'X coordinate must be an integer' }), - y: z.number().int({ message: 'Y coordinate must be an integer' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), down: z.boolean().optional(), up: z.boolean().optional(), delay: z .number() .min(0, { message: 'Delay must be non-negative' }) + .max(10, { message: 'Delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); -type TouchParams = z.infer; +function refineTouchDelay(value: z.infer, ctx: z.RefinementCtx): void { + if (value.delay !== undefined && !(value.down === true && value.up === true)) { + ctx.addIssue({ + code: 'custom', + path: ['delay'], + message: 'Delay can only be used when both down and up are true', + }); + } +} + +const touchSchema = touchSchemaObject.superRefine(refineTouchDelay); + +type TouchParams = z.infer; type TouchResult = UiActionResultDomainResult; -const publicSchemaObject = z.strictObject(touchSchema.omit({ simulatorId: true } as const).shape); +const publicSchemaObject = z.strictObject( + touchSchemaObject.omit({ simulatorId: true } as const).shape, +); const LOG_PREFIX = '[AXe]'; @@ -57,29 +73,43 @@ export function createTouchExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'touch'; - const { simulatorId, x, y, down, up, delay } = params; - const actionText = down && up ? 'touch down+up' : down ? 'touch down' : 'touch up'; - const baseAction = { type: 'touch' as const }; - const fullAction = { type: 'touch' as const, event: actionText, x, y }; + const { simulatorId, elementRef, down, up, delay } = params; + const actionText = + down && up ? 'touch down+up' : down ? 'touch down' : up ? 'touch up' : undefined; + const unresolvedAction = { + type: 'touch' as const, + elementRef, + ...(actionText ? { event: actionText } : {}), + }; if (!down && !up) { return createUiActionFailureResult( - baseAction, + unresolvedAction, simulatorId, 'At least one of "down" or "up" must be true', ); } + const resolution = resolveElementRef(simulatorId, elementRef, 'touch'); + if (!resolution.ok) { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const center = getRuntimeElementActivationPoint(resolution.element); + const action = { ...unresolvedAction, x: center.x, y: center.y }; + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, toolName, }); if (guard.blockedMessage) { - return createUiActionFailureResult(baseAction, simulatorId, guard.blockedMessage); + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const commandArgs = ['touch', '-x', String(x), '-y', String(y)]; + const commandArgs = ['touch', '-x', String(center.x), '-y', String(center.y)]; if (down) { commandArgs.push('--down'); } @@ -92,23 +122,29 @@ export function createTouchExecutor( log( 'info', - `${LOG_PREFIX}/${toolName}: Starting ${actionText} at (${x}, ${y}) on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting ${actionText ?? 'touch'} on elementRef ${elementRef} on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'touch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(fullAction, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to execute touch event.', }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); - return createUiActionFailureResult(baseAction, simulatorId, failure.message, { + return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; @@ -129,7 +165,7 @@ export async function touchLogic( export const schema = getSessionAwareToolSchemaShape({ sessionAware: publicSchemaObject, - legacy: touchSchema, + legacy: touchSchemaObject, }); export const handler = createSessionAwareTool({ diff --git a/src/mcp/tools/ui-automation/type_text.ts b/src/mcp/tools/ui-automation/type_text.ts index a18c09208..84a443d00 100644 --- a/src/mcp/tools/ui-automation/type_text.ts +++ b/src/mcp/tools/ui-automation/type_text.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Type Text * - * Types text into the iOS Simulator using keyboard input. - * Supports standard US keyboard characters. + * Types text into a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,22 +17,53 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, +} from './shared/semantic-tap.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const LOG_PREFIX = '[AXe]'; +const AXE_UNSUPPORTED_TEXT_MESSAGE = + 'Text contains characters unsupported by AXe typing. AXe type supports US keyboard characters only.'; + +function containsUnsupportedAxeTypeText(text: string): boolean { + for (const character of text) { + const codePoint = character.codePointAt(0); + if (codePoint === undefined || codePoint < 0x20 || codePoint > 0x7e) { + return true; + } + } + + return false; +} const typeTextSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - text: z.string().min(1, { message: 'Text cannot be empty' }), + elementRef: z + .string() + .min(1, { message: 'elementRef must be non-empty' }) + .describe( + 'Required runtime text-field elementRef from the latest snapshot_ui or wait_for_ui output', + ), + text: z.string().min(1, { message: 'Text cannot be empty' }).describe('Text to type'), + replaceExisting: z + .boolean() + .optional() + .describe('Select and replace existing field contents before typing'), }); type TypeTextParams = z.infer; @@ -50,8 +80,15 @@ export function createTypeTextExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'type_text'; - const { simulatorId, text } = params; - const action = { type: 'type-text' as const }; + const { simulatorId, elementRef, text, replaceExisting } = params; + const action = { type: 'type-text' as const, elementRef, textLength: text.length }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'typeText'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -62,26 +99,98 @@ export function createTypeTextExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const commandArgs = ['type', text]; + if (containsUnsupportedAxeTypeText(text)) { + return createUiActionFailureResult(action, simulatorId, AXE_UNSUPPORTED_TEXT_MESSAGE, { + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: AXE_UNSUPPORTED_TEXT_MESSAGE, + recoveryHint: 'Use only US keyboard characters supported by AXe type.', + elementRef, + }), + }); + } + + const focusCommand = createSemanticTapCommand( + resolution.element, + elementRef, + [], + resolution.snapshot.elements, + ); + const typeCommandArgs = ['type', text]; log( 'info', - `${LOG_PREFIX}/${toolName}: Starting type "${text.substring(0, 20)}..." on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting type into elementRef ${elementRef}, length=${text.length} on ${simulatorId}`, ); try { - await executeAxeCommand(commandArgs, simulatorId, 'type', executor, axeHelpers); + await executeSemanticTapWithAmbiguityFallback({ + command: focusCommand, + simulatorId, + executor, + axeHelpers, + }); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to focus elementRef ${elementRef} before typing.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Focus failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message, { + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), + }); + } + + try { + if (replaceExisting === true) { + await executeAxeCommand( + ['key-combo', '--modifiers', '227', '--key', '4'], + simulatorId, + 'key-combo', + executor, + axeHelpers, + ); + } + await executeAxeCommand(typeCommandArgs, simulatorId, 'type', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => 'Failed to simulate text typing.', + axeFailureMessage: () => `Failed to type text into elementRef ${elementRef}.`, }); - log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + log('error', `${LOG_PREFIX}/${toolName}: Typing failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { - details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/mcp/tools/ui-automation/wait_for_ui.ts b/src/mcp/tools/ui-automation/wait_for_ui.ts new file mode 100644 index 000000000..3dc77b0bc --- /dev/null +++ b/src/mcp/tools/ui-automation/wait_for_ui.ts @@ -0,0 +1,421 @@ +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { + RuntimeElementRoleV1, + RuntimeElementV1, + RuntimeSnapshotRecord, + UiWaitMatch, +} from '../../../types/ui-snapshot.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +import { recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import { + parseRuntimeSnapshotResponse, + RuntimeSnapshotParseError, +} from './shared/runtime-snapshot.ts'; +import { + createCaptureFailureResult, + createCaptureSuccessResult, + mapAxeCommandError, + setCaptureStructuredOutput, +} from './shared/domain-result.ts'; +import { + createWaitTimeoutError, + evaluateElementPredicate, + evaluateSettledPredicate, + evaluateTextContainsPredicate, + hasSelectorFields, + resolveElementSelector, + selectorFromParams, + waitPredicates, +} from './shared/wait-predicate.ts'; +import type { ResolvedWaitSelector, SettledTracker } from './shared/wait-predicate.ts'; + +const DEFAULT_TIMEOUT_MS = 5_000; +const DEFAULT_POLL_INTERVAL_MS = 250; +const DEFAULT_SETTLED_DURATION_MS = 500; +const LOG_PREFIX = '[AXe]'; + +const waitForUiSchemaShape = { + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + predicate: z.enum(waitPredicates), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }).optional(), + identifier: z.string().min(1, { message: 'identifier must be non-empty' }).optional(), + label: z.string().min(1, { message: 'label must be non-empty' }).optional(), + role: z + .enum([ + 'application', + 'button', + 'cell', + 'image', + 'keyboard-key', + 'list', + 'menu', + 'other', + 'scroll-view', + 'slider', + 'switch', + 'tab', + 'text', + 'text-field', + 'window', + ] satisfies RuntimeElementRoleV1[]) + .optional(), + value: z.string().min(1, { message: 'value must be non-empty' }).optional(), + text: z + .string() + .min(1, { message: 'text must be non-empty' }) + .refine((value) => value.replace(/\s+/g, ' ').trim().length > 0, { + message: 'text must contain non-whitespace characters', + }) + .optional(), + timeoutMs: z + .number() + .int({ message: 'timeoutMs must be an integer number of milliseconds' }) + .min(0, { message: 'timeoutMs must be non-negative' }) + .optional() + .describe('milliseconds'), + pollIntervalMs: z + .number() + .int({ message: 'pollIntervalMs must be an integer number of milliseconds' }) + .min(1, { message: 'pollIntervalMs must be at least 1 millisecond' }) + .optional() + .describe('milliseconds'), + settledDurationMs: z + .number() + .int({ message: 'settledDurationMs must be an integer number of milliseconds' }) + .min(0, { message: 'settledDurationMs must be non-negative' }) + .optional() + .describe('milliseconds'), +}; + +const waitForUiSchema = z.strictObject(waitForUiSchemaShape).superRefine((value, ctx) => { + if ( + value.predicate !== 'settled' && + value.predicate !== 'textContains' && + !(value.predicate === 'gone' && value.text !== undefined) && + !hasSelectorFields(value) + ) { + ctx.addIssue({ + code: 'custom', + path: ['elementRef'], + message: `${value.predicate} waits require at least one selector field`, + }); + } + + if (value.predicate === 'textContains' && value.text === undefined) { + ctx.addIssue({ + code: 'custom', + path: ['text'], + message: 'textContains waits require text', + }); + } + + if ( + value.predicate !== 'textContains' && + value.predicate !== 'gone' && + value.text !== undefined + ) { + ctx.addIssue({ + code: 'custom', + path: ['text'], + message: 'text is only supported for textContains and gone waits', + }); + } +}); + +type WaitForUiParams = z.infer; +type WaitForUiResult = CaptureResultDomainResult; + +interface WaitTiming { + now: () => number; + sleep: (durationMs: number) => Promise; +} + +function defaultSleep(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} + +type WaitPredicateEvaluation = + | ReturnType + | ReturnType + | ReturnType; + +function createWaitMatch( + predicate: WaitForUiParams['predicate'], + matches: RuntimeElementV1[] | undefined, +): UiWaitMatch | undefined { + if (predicate === 'settled' || matches === undefined) { + return undefined; + } + return { predicate, matches }; +} + +function evaluateWaitPredicate(args: { + predicate: WaitForUiParams['predicate']; + selector: ResolvedWaitSelector | null; + snapshot: RuntimeSnapshotRecord; + text?: string; + nowMs: number; + settledDurationMs: number; + settledTracker: SettledTracker; +}): WaitPredicateEvaluation { + const { predicate, selector, snapshot, text, nowMs, settledDurationMs, settledTracker } = args; + + if (predicate === 'settled') { + return evaluateSettledPredicate({ + snapshot, + nowMs, + settledDurationMs, + tracker: settledTracker, + }); + } + + if (predicate === 'textContains' && !selector) { + return evaluateTextContainsPredicate({ snapshot, text: text! }); + } + + if (predicate === 'gone' && !selector && text) { + const textMatch = evaluateTextContainsPredicate({ snapshot, text }); + return { + matched: (textMatch.candidates ?? []).length === 0, + candidates: textMatch.candidates ?? [], + uiError: undefined, + }; + } + + return evaluateElementPredicate({ predicate, selector: selector!, snapshot, text }); +} + +export function createWaitForUiExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + timing: WaitTiming = { now: Date.now, sleep: defaultSleep }, +): NonStreamingExecutor { + return async (params) => { + const toolName = 'wait_for_ui'; + const { simulatorId, predicate, elementRef, text } = params; + const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const pollIntervalMs = params.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; + const settledDurationMs = params.settledDurationMs ?? DEFAULT_SETTLED_DURATION_MS; + const startedAtMs = timing.now(); + const deadlineMs = startedAtMs + timeoutMs; + let selector: ResolvedWaitSelector | null = null; + if (predicate !== 'settled') { + if (elementRef) { + const selectorResolution = resolveElementSelector(simulatorId, elementRef, startedAtMs); + if (!selectorResolution.ok) { + return createCaptureFailureResult(simulatorId, selectorResolution.error.message, { + uiError: selectorResolution.error, + }); + } + selector = selectorResolution.selector; + } else { + selector = selectorFromParams(params); + } + } + + if (predicate !== 'settled' && predicate !== 'textContains' && !selector && !text) { + const message = `${predicate} waits require at least one selector field.`; + return createCaptureFailureResult(simulatorId, message, { + uiError: { + code: 'TARGET_NOT_FOUND', + message, + recoveryHint: + 'Provide elementRef, identifier, label, role, or value, or use settled for selector-free waits.', + }, + }); + } + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + return createCaptureFailureResult(simulatorId, guard.blockedMessage, { + uiError: { + code: 'ACTION_FAILED', + message: guard.blockedMessage, + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }, + }); + } + + let latestSnapshot: RuntimeSnapshotRecord | null = null; + let latestCandidates: RuntimeElementV1[] = []; + let lastParseError: RuntimeSnapshotParseError | null = null; + let lastPollError: string | null = null; + const settledTracker: SettledTracker = { signature: null, stableSinceMs: null }; + + log('info', `${LOG_PREFIX}/${toolName}: Waiting for ${predicate} on ${simulatorId}`); + + while (true) { + try { + const responseText = await executeAxeCommand( + ['describe-ui'], + simulatorId, + 'describe-ui', + executor, + axeHelpers, + ); + const nowMs = timing.now(); + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId, + responseText, + nowMs, + allowEmpty: true, + }); + latestSnapshot = snapshot; + lastParseError = null; + lastPollError = null; + recordRuntimeSnapshot(snapshot); + + const matched = evaluateWaitPredicate({ + predicate, + selector, + snapshot, + text, + nowMs, + settledDurationMs, + settledTracker, + }); + + if (typeof matched === 'boolean') { + if (matched) { + return createCaptureSuccessResult(simulatorId, { + capture: snapshot.payload, + warnings: [guard.warningText], + }); + } + } else { + latestCandidates = matched.candidates ?? []; + if (matched.uiError) { + return createCaptureFailureResult(simulatorId, matched.uiError.message, { + warnings: [guard.warningText], + uiError: matched.uiError, + capture: snapshot.payload, + }); + } + if (matched.matched) { + return createCaptureSuccessResult(simulatorId, { + capture: snapshot.payload, + warnings: [guard.warningText], + waitMatch: createWaitMatch(predicate, matched.candidates), + }); + } + } + } catch (error) { + if (error instanceof RuntimeSnapshotParseError) { + lastParseError = error; + lastPollError = null; + } else { + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => 'Failed to poll runtime UI snapshot.', + }); + lastPollError = failure.message; + lastParseError = null; + } + } + + const nowMs = timing.now(); + if (nowMs >= deadlineMs) { + break; + } + + await timing.sleep(Math.min(pollIntervalMs, deadlineMs - nowMs)); + } + + if (latestSnapshot) { + const uiError = createWaitTimeoutError({ + predicate, + timeoutMs, + selector: selector ?? undefined, + candidates: latestCandidates, + }); + return createCaptureFailureResult(simulatorId, uiError.message, { + warnings: [guard.warningText], + uiError, + capture: latestSnapshot.payload, + }); + } + + if (lastParseError) { + const message = 'Failed to parse runtime UI snapshot while waiting for UI.'; + return createCaptureFailureResult(simulatorId, message, { + details: [lastParseError.message], + uiError: { + code: 'SNAPSHOT_PARSE_FAILED', + message, + recoveryHint: 'Retry after the app is fully launched and responsive.', + }, + }); + } + + const message = + lastPollError ?? `Timed out after ${timeoutMs}ms waiting for UI predicate '${predicate}'.`; + return createCaptureFailureResult(simulatorId, message, { + uiError: { + code: lastPollError ? 'ACTION_FAILED' : 'WAIT_TIMEOUT', + message, + recoveryHint: 'Retry after the app is fully launched and responsive.', + ...(lastPollError ? {} : { timeoutMs }), + }, + }); + }; +} + +export async function wait_for_uiLogic( + params: WaitForUiParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + timing?: WaitTiming, +): Promise { + const ctx = getHandlerContext(); + const executeWaitForUi = createWaitForUiExecutor(executor, axeHelpers, debuggerManager, timing); + const result = await executeWaitForUi(params); + + setCaptureStructuredOutput(ctx, result, { headerTitle: 'Wait for UI' }); + + if (!result.didError) { + ctx.nextStepParams = { + snapshot_ui: { simulatorId: params.simulatorId }, + wait_for_ui: { simulatorId: params.simulatorId, predicate: 'settled' }, + }; + } +} + +const publicSchemaObject = z.strictObject( + z.object(waitForUiSchemaShape).omit({ simulatorId: true } as const).shape, +); + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: waitForUiSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(waitForUiSchema), + logicFunction: (params: WaitForUiParams, executor: CommandExecutor) => + wait_for_uiLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/rendering/render.ts b/src/rendering/render.ts index 2b0d6fded..ac72786a7 100644 --- a/src/rendering/render.ts +++ b/src/rendering/render.ts @@ -95,6 +95,7 @@ function createRenderHooks( outputStyle?: OutputStyle; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; }, ): RenderSessionHooks { const suppressWarnings = sessionStore.get('suppressWarnings'); @@ -118,6 +119,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }), }; case 'raw': @@ -144,6 +146,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }); if (text) { process.stdout.write(text); @@ -158,6 +161,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }); return { @@ -179,6 +183,7 @@ export interface RenderSessionOptions { outputStyle?: OutputStyle; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } export function createRenderSession( @@ -195,7 +200,7 @@ export function renderTranscript( strategy: RenderStrategy, options?: Pick< RenderSessionOptions, - 'runtime' | 'outputStyle' | 'filePathRenderStyle' | 'includeHeaderDetails' + 'runtime' | 'outputStyle' | 'filePathRenderStyle' | 'includeHeaderDetails' | 'includeNextSteps' >, ): string { return createRenderHooks(strategy, { ...options, interactive: false }).finalize(input); diff --git a/src/rendering/types.ts b/src/rendering/types.ts index 9bf3299e2..1b3819d4e 100644 --- a/src/rendering/types.ts +++ b/src/rendering/types.ts @@ -24,6 +24,9 @@ export interface RenderSession { export interface RenderHints { headerTitle?: string; + runtimeSnapshot?: { + suppressedTargetRefs?: readonly string[]; + }; } export interface StructuredToolOutput { diff --git a/src/runtime/__tests__/tool-invoker.test.ts b/src/runtime/__tests__/tool-invoker.test.ts index 73a5e0fb8..2e7c47cfd 100644 --- a/src/runtime/__tests__/tool-invoker.test.ts +++ b/src/runtime/__tests__/tool-invoker.test.ts @@ -664,6 +664,50 @@ describe('DefaultToolInvoker next steps post-processing', () => { expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id 123'); }); + it('prefers the current workflow when normalizing duplicate next-step tool names', async () => { + const directHandler = emitNextStepsHandler('ok', [ + { + tool: 'screenshot', + label: 'Take screenshot', + params: { simulatorId: '123' }, + }, + ]); + + const catalog = createToolCatalog([ + makeTool({ + id: 'snapshot_ui', + cliName: 'snapshot-ui', + mcpName: 'snapshot_ui', + workflow: 'ui-automation', + stateful: false, + handler: directHandler, + }), + makeTool({ + id: 'screenshot', + cliName: 'screenshot', + mcpName: 'screenshot', + workflow: 'simulator', + stateful: false, + handler: emitHandler('simulator screenshot'), + }), + makeTool({ + id: 'screenshot', + cliName: 'screenshot', + mcpName: 'screenshot', + workflow: 'ui-automation', + stateful: false, + handler: emitHandler('ui screenshot'), + }), + ]); + + const invoker = new DefaultToolInvoker(catalog); + const response = await invokeAndFinalize(invoker, 'snapshot-ui', {}, { runtime: 'cli' }); + + const text = response.content.map((c) => (c.type === 'text' ? c.text : '')).join('\n'); + expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id 123'); + expect(text).not.toContain('xcodebuildmcp simulator screenshot --simulator-id 123'); + }); + it('injects manifest template next steps from dynamic nextStepParams when response omits nextSteps', async () => { const directHandler = emitNextStepsHandler('ok', undefined, { snapshot_ui: { simulatorId: '12345678-1234-4234-8234-123456789012' }, diff --git a/src/runtime/tool-invoker.ts b/src/runtime/tool-invoker.ts index 75935866c..1f82f2abe 100644 --- a/src/runtime/tool-invoker.ts +++ b/src/runtime/tool-invoker.ts @@ -135,13 +135,31 @@ function mergeTemplateAndResponseNextSteps( }); } -function normalizeNextSteps(steps: NextStep[], catalog: ToolCatalog): NextStep[] { +function getNextStepTarget(params: { + catalog: ToolCatalog; + mcpName: string; + preferredWorkflow: string; +}): ToolDefinition | null { + return ( + params.catalog.tools.find( + (tool) => + tool.mcpName.toLowerCase() === params.mcpName.toLowerCase().trim() && + tool.workflow === params.preferredWorkflow, + ) ?? params.catalog.getByMcpName(params.mcpName) + ); +} + +function normalizeNextSteps( + steps: NextStep[], + catalog: ToolCatalog, + preferredWorkflow: string, +): NextStep[] { return steps.map((step) => { if (!step.tool) { return step; } - const target = catalog.getByMcpName(step.tool); + const target = getNextStepTarget({ catalog, mcpName: step.tool, preferredWorkflow }); if (!target) { return step; } @@ -238,7 +256,7 @@ export function postProcessSession(params: { return; } - const normalized = normalizeNextSteps(finalSteps, catalog); + const normalized = normalizeNextSteps(finalSteps, catalog, tool.workflow); if (normalized.length > 0) { session.setNextSteps?.(normalized, runtime); diff --git a/src/server/server.ts b/src/server/server.ts index 1e35e9cac..1de931b7d 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -42,7 +42,7 @@ Capabilities: - macOS workflows: Build, run, and test macOS applications - Log capture: Stream and capture logs from simulators and devices - LLDB debugging: Attach debugger, set breakpoints, inspect stack traces and variables, execute LLDB commands -- UI automation: Capture screenshots, inspect view hierarchy with coordinates, perform taps/swipes/gestures, type text, press hardware buttons +- UI automation: Capture screenshots, inspect runtime UI snapshots, perform taps/swipes/gestures, type text, press hardware buttons, and batch multiple same-screen elementRef taps - SwiftPM: Build, run, test, and manage Swift Package Manager projects - Project scaffolding: Generate new iOS/macOS project templates diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json index bbfc05f0e..4b8c807a1 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id " + ] } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json index 4ef507f23..f17368243 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json @@ -18,5 +18,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id " + ] } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json index 6968362b0..29c92f1c9 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate swipe.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "swipe" + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-not-actionable.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-not-actionable.json new file mode 100644 index 000000000..d7d3c80b9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'swipeWithin'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'swipeWithin'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json index 30d9ab14d..ffb164bb3 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate text typing.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "type-text" + "type": "type-text", + "elementRef": "e3", + "textLength": 5 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-not-actionable.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-not-actionable.json new file mode 100644 index 000000000..e403f06f9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'typeText'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "type-text", + "elementRef": "e3", + "textLength": 5 + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'typeText'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--success.json deleted file mode 100644 index a2686f68c..000000000 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--success.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "2", - "didError": false, - "error": null, - "data": { - "summary": { - "status": "SUCCEEDED" - }, - "action": { - "type": "type-text" - }, - "artifacts": { - "simulatorId": "" - } - } -} diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/wait-for-ui--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/wait-for-ui--success.json new file mode 100644 index 000000000..b677ec15a --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/wait-for-ui--success.json @@ -0,0 +1,53 @@ +{ + "schema": "xcodebuildmcp.output.capture-result", + "schemaVersion": "2", + "didError": false, + "error": null, + "data": { + "summary": { + "status": "SUCCEEDED" + }, + "artifacts": { + "simulatorId": "" + }, + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", + "e21|tap|button|=||" + ], + "scroll": [], + "text": [ + "e2|text|text|0||" + ], + "udid": "" + }, + "waitMatch": { + "predicate": "exists", + "matches": [ + "e3|tap|button|C||" + ] + } + } +} diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt index 58a78296a..5afe9edbc 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Hardware button 'home' pressed successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt index f7cbf6735..322f385a9 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Gesture 'scroll-down' executed successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt index c687f6b65..9782d138b 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Key press (code: 4) simulated successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt index 6950454c4..7fa5eedd2 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Key sequence [4,5,6] executed successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt index b04be39af..8884e85c9 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt @@ -3,8 +3,9 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate long press at (100, 400). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt index 678f04137..2bd35e372 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt @@ -3,8 +3,7 @@ Simulator: -Warnings (1): +✅ Long press on elementRef e3 for 500ms simulated successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Long press at (100, 400) for 500ms simulated successfully. +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt index 412aea1fd..02c8fe280 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt @@ -3,586 +3,36 @@ Simulator: -Accessibility Hierarchy - ```json - [ - { - "AXFrame" : "{{0, 0}, {402, 874}}", - "AXUniqueId" : null, - "frame" : { - "y" : 0, - "x" : 0, - "width" : 402, - "height" : 874 - }, - "role_description" : "application", - "AXLabel" : "Calculator", - "content_required" : false, - "type" : "Application", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXApplication", - "children" : [ - { - "AXFrame" : "{{344, 250.5}, {34, 67}}", - "AXUniqueId" : null, - "frame" : { - "y" : 250.5, - "x" : 344, - "width" : 34, - "height" : 67 - }, - "role_description" : "text", - "AXLabel" : "0", - "content_required" : false, - "type" : "StaticText", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXStaticText", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "C", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Âą", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "%", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Ãˇ", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "7", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "8", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "9", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "×", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "4", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "5", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "6", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "-", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "1", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "2", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "3", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "+", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "0", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : ".", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "=", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - } - ], - "subrole" : null, - "pid" : - } - ] - ``` +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| Tips - - Use frame coordinates for tap/swipe (center: x+width/2, y+height/2) - - If a debugger is attached, ensure the app is running (not stopped on breakpoints) - - Screenshots are for visual verification only + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. + - Use wait_for_ui for text/assertions or changing UI. -✅ Accessibility hierarchy retrieved successfully. +✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: -1. Refresh after layout changes: xcodebuildmcp simulator snapshot-ui --simulator-id -2. Tap on element: xcodebuildmcp ui-automation tap --simulator-id --x 0 --y 0 -3. Take screenshot for verification: xcodebuildmcp simulator screenshot --simulator-id +1. Refresh after layout changes: xcodebuildmcp ui-automation snapshot-ui --simulator-id +2. Wait for UI to settle: xcodebuildmcp ui-automation wait-for-ui --simulator-id --predicate settled +3. Batch same-screen taps: xcodebuildmcp ui-automation batch --json '{"simulatorId":"","steps":[{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}]}' +4. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id --element-ref e7 diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt index 4716920bc..299c2be21 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt @@ -3,8 +3,9 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate swipe. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt new file mode 100644 index 000000000..f227e42b4 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt @@ -0,0 +1,12 @@ + +👆 Swipe + + Simulator: + +Recovery + Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'swipeWithin'. + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'swipeWithin'. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt index f78015c16..b8d804265 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt @@ -3,8 +3,8 @@ Simulator: -Warnings (1): +✅ Swipe up within elementRef e3 simulated successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Swipe from (200, 400) to (200, 200) simulated successfully. +Next steps: +1. Batch same-screen taps: xcodebuildmcp ui-automation batch --json '{"simulatorId":"","steps":[{"action":"tap","elementRef":"e4"},{"action":"tap","elementRef":"e6"}]}' +2. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id --element-ref e5 diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt index d45f020a2..78309b818 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt @@ -3,8 +3,9 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate tap at (100, 100). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt index d4a41a58c..90db0d93e 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt @@ -3,8 +3,8 @@ Simulator: -Warnings (1): +✅ Tap on elementRef e3 simulated successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Tap at (100, 400) simulated successfully. +Next steps: +1. Batch same-screen taps: xcodebuildmcp ui-automation batch --json '{"simulatorId":"","steps":[{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}]}' +2. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id --element-ref e7 diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt index 751d9f3fd..1d05a8c9f 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt @@ -3,8 +3,9 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to execute touch event. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt index 5197f0e74..83a80a6cb 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt @@ -3,8 +3,7 @@ Simulator: -Warnings (1): +✅ Touch event (touch down+up) on elementRef e3 executed successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Touch event (touch down+up) at (100, 400) executed successfully. +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt index bba706413..39d9c2f15 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt @@ -3,8 +3,9 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate text typing. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt new file mode 100644 index 000000000..27afd2f76 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt @@ -0,0 +1,12 @@ + +âŒ¨ī¸ Type Text + + Simulator: + +Recovery + Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'typeText'. + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'typeText'. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--success.txt deleted file mode 100644 index 72a6ac50b..000000000 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--success.txt +++ /dev/null @@ -1,6 +0,0 @@ - -âŒ¨ī¸ Type Text - - Simulator: - -✅ Text typing simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt new file mode 100644 index 000000000..4801e6851 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt @@ -0,0 +1,35 @@ + +âš™ī¸ Wait for UI + + Simulator: + +Matched exists (1) — ref|action|role|label|value|id + e3|tap|button|C|| + +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| + +Tips + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. + - Use wait_for_ui for text/assertions or changing UI. + +✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json index 7df9f68e4..8b3c5cbd3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json index 39cb6883a..153b5dec4 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json index bbfc05f0e..6251ea993 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json index 4ef507f23..6a480c985 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json @@ -18,5 +18,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json index b75c46872..4bef73811 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json @@ -2,27 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate long press at (100, 400).", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { "type": "long-press", - "x": 100, - "y": 400, + "elementRef": "e3", "durationMs": 500 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json index 32a7bd325..62d4fd08f 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json @@ -9,20 +9,14 @@ }, "action": { "type": "long-press", - "x": 100, - "y": 400, + "elementRef": "e3", "durationMs": 500 }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json index 68cb4caaf..d4beeeb25 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json @@ -11,393 +11,43 @@ "simulatorId": "" }, "capture": { - "type": "ui-hierarchy", - "uiHierarchy": [ - { - "AXFrame": "{{0, 0}, {402, 874}}", - "AXUniqueId": null, - "frame": { "x": 0, "y": 0, "width": 402, "height": 874 }, - "role_description": "application", - "AXLabel": "Calculator", - "content_required": false, - "type": "Application", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXApplication", - "children": [ - { - "AXFrame": "{{344, 250.5}, {34, 67}}", - "AXUniqueId": null, - "frame": { "x": 344, "y": 250.5, "width": 34, "height": 67 }, - "role_description": "text", - "AXLabel": "0", - "content_required": false, - "type": "StaticText", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXStaticText", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 357.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "C", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 357.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "Âą", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 357.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "%", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 357.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "Ãˇ", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 449.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "7", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 449.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "8", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 449.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "9", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 449.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "×", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 541.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "4", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 541.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "5", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 541.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "6", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 541.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "-", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 633.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "1", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 633.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "2", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 633.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "3", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 633.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "+", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 725.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "0", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 725.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": ".", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 725.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "=", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - } - ], - "subrole": null, - "pid": 99999 - } - ] + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", + "e21|tap|button|=||" + ], + "scroll": [], + "text": [ + "e2|text|text|0||" + ], + "udid": "" } }, "nextSteps": [ "Refresh after layout changes: snapshot_ui({ simulatorId: \"\" })", - "Tap on element: tap({ simulatorId: \"\", x: 0, y: 0 })", - "Take screenshot for verification: screenshot({ simulatorId: \"\" })" + "Wait for UI to settle: wait_for_ui({ simulatorId: \"\", predicate: \"settled\" })", + "Batch same-screen taps: batch({ simulatorId: \"\", steps: [{\"action\":\"tap\",\"elementRef\":\"e7\"},{\"action\":\"tap\",\"elementRef\":\"e8\"}] })", + "Tap an elementRef: tap({ simulatorId: \"\", elementRef: \"e7\" })" ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json index 6968362b0..29c92f1c9 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate swipe.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "swipe" + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-not-actionable.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-not-actionable.json new file mode 100644 index 000000000..d7d3c80b9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'swipeWithin'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'swipeWithin'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json index d3a04cc16..c74601169 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json @@ -9,25 +9,39 @@ }, "action": { "type": "swipe", - "from": { - "x": 200, - "y": 400 - }, - "to": { - "x": 200, - "y": 200 - } + "withinElementRef": "e3", + "direction": "up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 16, + "targets": [ + "e6|tap|button|Home Screen & App Library||com.apple.settings.homeScreen", + "e5|tap|button|Camera||com.apple.settings.camera", + "e7|tap|button|Search||com.apple.settings.search", + "e8|tap|button|StandBy||com.apple.settings.standBy", + "e9|tap|button|Screen Time||com.apple.settings.screenTime", + "e10|tap|button|Passcode||com.apple.settings.passcodeAndBiometrics", + "e11|tap|button|Privacy & Security||com.apple.settings.privacyAndSecurity", + "e12|tap|button|Game Center||com.apple.settings.gameCenter", + "e13|tap|button|iCloud||com.apple.settings.iCloud", + "e14|tap|button|Apps||com.apple.settings.apps", + "e15|tap|button|Developer||com.apple.settings.developer" ], - "errors": [] + "scroll": [ + "e3|swipe|other|||" + ], + "udid": "" } - } + }, + "nextSteps": [ + "Batch same-screen taps: batch({ simulatorId: \"\", steps: [{\"action\":\"tap\",\"elementRef\":\"e5\"},{\"action\":\"tap\",\"elementRef\":\"e7\"}] })", + "Tap an elementRef: tap({ simulatorId: \"\", elementRef: \"e6\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json index 00556fe82..ba680e80d 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json @@ -2,26 +2,22 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate tap at (100, 100).", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { "type": "tap", - "x": 100, - "y": 100 + "elementRef": "e3" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json index fa6e9978e..1143d393c 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json @@ -9,19 +9,47 @@ }, "action": { "type": "tap", - "x": 100, - "y": 400 + "elementRef": "e3" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", + "e21|tap|button|=||" ], - "errors": [] + "scroll": [], + "text": [ + "e2|text|text|0||" + ], + "udid": "" } - } + }, + "nextSteps": [ + "Batch same-screen taps: batch({ simulatorId: \"\", steps: [{\"action\":\"tap\",\"elementRef\":\"e7\"},{\"action\":\"tap\",\"elementRef\":\"e8\"}] })", + "Tap an elementRef: tap({ simulatorId: \"\", elementRef: \"e7\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json index 4cbcc83fb..9b589c7d6 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to execute touch event.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "touch" + "type": "touch", + "elementRef": "e3", + "event": "touch down+up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json index 0e708ad44..c00c1c649 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json @@ -9,20 +9,14 @@ }, "action": { "type": "touch", - "event": "touch down+up", - "x": 100, - "y": 400 + "elementRef": "e3", + "event": "touch down+up" }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json index 30d9ab14d..ffb164bb3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate text typing.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "type-text" + "type": "type-text", + "elementRef": "e3", + "textLength": 5 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-not-actionable.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-not-actionable.json new file mode 100644 index 000000000..e403f06f9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'typeText'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "type-text", + "elementRef": "e3", + "textLength": 5 + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'typeText'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--success.json deleted file mode 100644 index a2686f68c..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--success.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "2", - "didError": false, - "error": null, - "data": { - "summary": { - "status": "SUCCEEDED" - }, - "action": { - "type": "type-text" - }, - "artifacts": { - "simulatorId": "" - } - } -} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json new file mode 100644 index 000000000..b677ec15a --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json @@ -0,0 +1,53 @@ +{ + "schema": "xcodebuildmcp.output.capture-result", + "schemaVersion": "2", + "didError": false, + "error": null, + "data": { + "summary": { + "status": "SUCCEEDED" + }, + "artifacts": { + "simulatorId": "" + }, + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", + "e21|tap|button|=||" + ], + "scroll": [], + "text": [ + "e2|text|text|0||" + ], + "udid": "" + }, + "waitMatch": { + "predicate": "exists", + "matches": [ + "e3|tap|button|C||" + ] + } + } +} diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt index 18de11b04..162a402b7 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt @@ -2,3 +2,6 @@ 👆 Button ✅ Hardware button 'home' pressed successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt index c693a2f5d..cf1e9175d 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt @@ -2,3 +2,6 @@ 👆 Gesture ✅ Gesture 'scroll-down' executed successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt index c886800b4..456c5cce9 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt @@ -2,3 +2,6 @@ âŒ¨ī¸ Key Press ✅ Key press (code: 4) simulated successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt index 6faf04241..84b66caa3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt @@ -2,3 +2,6 @@ âŒ¨ī¸ Key Sequence ✅ Key sequence [4,5,6] executed successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt index 10acee620..0b69ada38 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt @@ -1,8 +1,9 @@ 👆 Long Press -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate long press at (100, 400). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt index faeec8350..f0df9dad8 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt @@ -1,8 +1,7 @@ 👆 Long Press -Warnings (1): +✅ Long press on elementRef e3 for 500ms simulated successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Long press at (100, 400) for 500ms simulated successfully. +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt index 5766d2720..7c6323932 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt @@ -1,586 +1,36 @@ 📷 Snapshot UI -Accessibility Hierarchy - ```json - [ - { - "AXFrame" : "{{0, 0}, {402, 874}}", - "AXUniqueId" : null, - "frame" : { - "y" : 0, - "x" : 0, - "width" : 402, - "height" : 874 - }, - "role_description" : "application", - "AXLabel" : "Calculator", - "content_required" : false, - "type" : "Application", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXApplication", - "children" : [ - { - "AXFrame" : "{{344, 250.5}, {34, 67}}", - "AXUniqueId" : null, - "frame" : { - "y" : 250.5, - "x" : 344, - "width" : 34, - "height" : 67 - }, - "role_description" : "text", - "AXLabel" : "0", - "content_required" : false, - "type" : "StaticText", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXStaticText", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "C", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Âą", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "%", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Ãˇ", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "7", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "8", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "9", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "×", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "4", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "5", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "6", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "-", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "1", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "2", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "3", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "+", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "0", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : ".", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "=", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - } - ], - "subrole" : null, - "pid" : - } - ] - ``` +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| Tips - - Use frame coordinates for tap/swipe (center: x+width/2, y+height/2) - - If a debugger is attached, ensure the app is running (not stopped on breakpoints) - - Screenshots are for visual verification only + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. + - Use wait_for_ui for text/assertions or changing UI. -✅ Accessibility hierarchy retrieved successfully. +✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: 1. Refresh after layout changes: snapshot_ui({ simulatorId: "" }) -2. Tap on element: tap({ simulatorId: "", x: 0, y: 0 }) -3. Take screenshot for verification: screenshot({ simulatorId: "" }) +2. Wait for UI to settle: wait_for_ui({ simulatorId: "", predicate: "settled" }) +3. Batch same-screen taps: batch({ simulatorId: "", steps: [{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}] }) +4. Tap an elementRef: tap({ simulatorId: "", elementRef: "e7" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt index 05a6c9606..15636c457 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt @@ -1,8 +1,9 @@ 👆 Swipe -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate swipe. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt new file mode 100644 index 000000000..51d69da9e --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt @@ -0,0 +1,10 @@ + +👆 Swipe + +Recovery + Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'swipeWithin'. + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'swipeWithin'. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt index 9b5ca8373..6c402717c 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt @@ -1,8 +1,8 @@ 👆 Swipe -Warnings (1): +✅ Swipe up within elementRef e3 simulated successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Swipe from (200, 400) to (200, 200) simulated successfully. +Next steps: +1. Batch same-screen taps: batch({ simulatorId: "", steps: [{"action":"tap","elementRef":"e5"},{"action":"tap","elementRef":"e7"}] }) +2. Tap an elementRef: tap({ simulatorId: "", elementRef: "e6" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt index 3aa5515fa..3cc10d903 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt @@ -1,8 +1,9 @@ 👆 Tap -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate tap at (100, 100). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt index 6c3da0d59..303efa283 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt @@ -1,8 +1,8 @@ 👆 Tap -Warnings (1): +✅ Tap on elementRef e3 simulated successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Tap at (100, 400) simulated successfully. +Next steps: +1. Batch same-screen taps: batch({ simulatorId: "", steps: [{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}] }) +2. Tap an elementRef: tap({ simulatorId: "", elementRef: "e7" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt index ad4778d4a..f276ccf07 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt @@ -1,8 +1,9 @@ 👆 Touch -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to execute touch event. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt index b9dad4d4d..c7256530e 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt @@ -1,8 +1,7 @@ 👆 Touch -Warnings (1): +✅ Touch event (touch down+up) on elementRef e3 executed successfully. - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Touch event (touch down+up) at (100, 400) executed successfully. +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt index 40a192802..19df3f2f3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt @@ -1,8 +1,9 @@ âŒ¨ī¸ Type Text -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate text typing. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt new file mode 100644 index 000000000..93cf24775 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt @@ -0,0 +1,10 @@ + +âŒ¨ī¸ Type Text + +Recovery + Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'typeText'. + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'typeText'. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--success.txt deleted file mode 100644 index a3abffa98..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--success.txt +++ /dev/null @@ -1,4 +0,0 @@ - -âŒ¨ī¸ Type Text - -✅ Text typing simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt new file mode 100644 index 000000000..f575f68b7 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt @@ -0,0 +1,33 @@ + +âš™ī¸ Wait for UI + +Matched exists (1) — ref|action|role|label|value|id + e3|tap|button|C|| + +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| + +Tips + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. + - Use wait_for_ui for text/assertions or changing UI. + +✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. diff --git a/src/snapshot-tests/__tests__/json-normalize.test.ts b/src/snapshot-tests/__tests__/json-normalize.test.ts index 0e4ecabc4..899e85e91 100644 --- a/src/snapshot-tests/__tests__/json-normalize.test.ts +++ b/src/snapshot-tests/__tests__/json-normalize.test.ts @@ -3,7 +3,7 @@ import type { StructuredOutputEnvelope } from '../../types/structured-output.ts' import { formatStructuredEnvelopeFixture, normalizeStructuredEnvelope } from '../json-normalize.ts'; describe('normalizeStructuredEnvelope', () => { - it('keeps only failing test cases for failed result snapshots', () => { + it('keeps suite-less simulator test cases while normalizing volatile durations', () => { const envelope: StructuredOutputEnvelope = { schema: 'xcodebuildmcp.output.test-result', schemaVersion: '1', @@ -73,6 +73,64 @@ describe('normalizeStructuredEnvelope', () => { }); }); + it('normalizes volatile runtime snapshot timestamps', () => { + const envelope: StructuredOutputEnvelope = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 9, + capturedAtMs: 123, + expiresAtMs: 456, + elements: [], + actions: [], + }, + uiError: { + code: 'TARGET_NOT_ACTIONABLE', + message: 'Target is not actionable.', + recoveryHint: 'Refresh the snapshot and choose another element.', + snapshotAgeMs: 42, + }, + }, + }; + + expect(normalizeStructuredEnvelope(envelope)).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: '', + seq: 1, + capturedAtMs: 1_700_000_000_000, + expiresAtMs: 1_700_000_060_000, + elements: [], + actions: [], + }, + uiError: { + code: 'TARGET_NOT_ACTIONABLE', + message: 'Target is not actionable.', + recoveryHint: 'Refresh the snapshot and choose another element.', + snapshotAgeMs: 1234, + }, + }, + }); + }); + it('normalizes and sorts SwiftPM build progress lines in stderr arrays', () => { const envelope: StructuredOutputEnvelope = { schema: 'xcodebuildmcp.output.build-run-result', diff --git a/src/snapshot-tests/json-normalize.ts b/src/snapshot-tests/json-normalize.ts index 5699bef61..f058aa8e6 100644 --- a/src/snapshot-tests/json-normalize.ts +++ b/src/snapshot-tests/json-normalize.ts @@ -22,6 +22,10 @@ function normalizeString(value: string, key?: string, path: string[] = []): stri return ''; } + if (key === 'screenHash') { + return ''; + } + if (key === 'AXFrame') { // Round embedded floats to 1 decimal place for rounding-stable comparison with // the sibling `frame` object. e.g. 82.666664123535156 -> 82.7, 250.5 stays 250.5. @@ -61,6 +65,15 @@ function normalizeNumber(path: string[], key: string | undefined, value: number) return 3600; case 'threadId': return 1; + case 'capturedAtMs': + return 1_700_000_000_000; + case 'expiresAtMs': + return 1_700_000_060_000; + case 'snapshotAgeMs': + return 1234; + case 'seq': + if (path.includes('capture')) return 1; + return value; case 'x': case 'y': case 'width': @@ -243,7 +256,7 @@ function normalizeXcodeBridgeCallEnvelope( content: [], ...(Object.hasOwn(data, 'structuredContent') ? { structuredContent: {} } : {}), }, - } as StructuredOutputEnvelope; + }; } export function normalizeStructuredEnvelope( diff --git a/src/snapshot-tests/suites/ui-automation-suite.ts b/src/snapshot-tests/suites/ui-automation-suite.ts index 536095bf2..36064ec35 100644 --- a/src/snapshot-tests/suites/ui-automation-suite.ts +++ b/src/snapshot-tests/suites/ui-automation-suite.ts @@ -13,6 +13,40 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe(`${runtime} ui-automation workflow`, () => { let harness: WorkflowSnapshotHarness; let simulatorUdid: string; + let snapshotCaptured = false; + + async function refreshRuntimeSnapshot(): Promise { + if (snapshotCaptured) { + return; + } + + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId: BUNDLE_ID, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { isError } = await harness.invoke('ui-automation', 'snapshot-ui', { + simulatorId: simulatorUdid, + }); + expect(isError).toBe(false); + snapshotCaptured = true; + } + + async function captureFirstScrollRef(bundleId: string): Promise { + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { text, isError } = await harness.invoke('ui-automation', 'snapshot-ui', { + simulatorId: simulatorUdid, + }); + expect(isError).toBe(false); + + return /\b(e\d+)\|swipe\|/.exec(text)?.[1] ?? null; + } beforeAll(async () => { vi.setConfig({ testTimeout: 120_000 }); @@ -22,7 +56,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi await harness.invoke('simulator', 'build-and-run', { workspacePath: WORKSPACE, scheme: 'CalculatorApp', - simulatorName: 'iPhone 17', + simulatorName: 'iPhone 17 Pro', }); await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -34,10 +68,11 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('tap', () => { it('success', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'tap', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', }); expect(isError).toBe(false); expectFixture(text, 'tap--success'); @@ -46,8 +81,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'tap', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 100, + elementRef: 'e3', }); expect(isError).toBe(true); expectFixture(text, 'tap--error-no-simulator'); @@ -56,10 +90,12 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('touch', () => { it('success', async () => { + snapshotCaptured = false; + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'touch', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', down: true, up: true, }); @@ -70,8 +106,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'touch', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 400, + elementRef: 'e3', down: true, up: true, }); @@ -82,10 +117,12 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('long-press', () => { it('success', async () => { + snapshotCaptured = false; + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'long-press', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', duration: 500, }); expect(isError).toBe(false); @@ -95,8 +132,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'long-press', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 400, + elementRef: 'e3', duration: 500, }); expect(isError).toBe(true); @@ -106,24 +142,40 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('swipe', () => { it('success', async () => { + if (runtime === 'cli/json') { + return; + } + + const scrollRef = await captureFirstScrollRef('com.apple.Preferences'); + expect(scrollRef).not.toBeNull(); + const { text, isError } = await harness.invoke('ui-automation', 'swipe', { simulatorId: simulatorUdid, - x1: 200, - y1: 400, - x2: 200, - y2: 200, + withinElementRef: scrollRef, + direction: 'up', }); expect(isError).toBe(false); expectFixture(text, 'swipe--success'); + snapshotCaptured = false; + }); + + it('error - target not actionable', async () => { + await refreshRuntimeSnapshot(); + + const { text, isError } = await harness.invoke('ui-automation', 'swipe', { + simulatorId: simulatorUdid, + withinElementRef: 'e3', + direction: 'up', + }); + expect(isError).toBe(true); + expectFixture(text, 'swipe--error-not-actionable'); }); it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'swipe', { simulatorId: INVALID_SIMULATOR_ID, - x1: 200, - y1: 400, - x2: 200, - y2: 200, + withinElementRef: 'e3', + direction: 'up', }); expect(isError).toBe(true); expectFixture(text, 'swipe--error-no-simulator'); @@ -211,18 +263,23 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); describe('type-text', () => { - it('success', async () => { + it('error - target not actionable', async () => { + snapshotCaptured = false; + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'type-text', { simulatorId: simulatorUdid, + elementRef: 'e3', text: 'hello', }); - expect(isError).toBe(false); - expectFixture(text, 'type-text--success'); + expect(isError).toBe(true); + expectFixture(text, 'type-text--error-not-actionable'); }); it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'type-text', { simulatorId: INVALID_SIMULATOR_ID, + elementRef: 'e3', text: 'hello', }); expect(isError).toBe(true); @@ -230,6 +287,28 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); }); + describe('wait-for-ui', () => { + it('success - existing calculator button', async () => { + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId: BUNDLE_ID, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { text, isError } = await harness.invoke('ui-automation', 'wait-for-ui', { + simulatorId: simulatorUdid, + predicate: 'exists', + label: 'C', + role: 'button', + timeoutMs: 1000, + pollIntervalMs: 100, + }); + expect(isError).toBe(false); + expectFixture(text, 'wait-for-ui--success'); + snapshotCaptured = true; + }); + }); + describe('snapshot-ui', () => { it('success - calculator app', async () => { // Re-focus the calculator app before snapshotting: preceding UI tests @@ -247,6 +326,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi expect(isError).toBe(false); expect(text.length).toBeGreaterThan(100); expectFixture(text, 'snapshot-ui--success'); + snapshotCaptured = true; }); it('error - invalid simulator', async () => { diff --git a/src/types/common.ts b/src/types/common.ts index 3534c2c6d..64ecd325f 100644 --- a/src/types/common.ts +++ b/src/types/common.ts @@ -15,6 +15,14 @@ /** * Represents a suggested next step that can be rendered for CLI or MCP. */ +export type NextStepParamValue = + | string + | number + | boolean + | null + | NextStepParamValue[] + | { [key: string]: NextStepParamValue }; + export interface NextStep { /** Optional MCP tool name (e.g., "boot_sim") */ tool?: string; @@ -25,14 +33,14 @@ export interface NextStep { /** Human-readable description of the action (optional when manifest template provides it) */ label?: string; /** Optional parameters to pass to the tool */ - params?: Record; + params?: Record; /** Optional ordering hint for merged steps */ priority?: number; /** When to show this step: 'always' (default), 'success', or 'failure' */ when?: 'always' | 'success' | 'failure'; } -export type NextStepParams = Record; +export type NextStepParams = Record; export type NextStepParamsMap = Record; /** diff --git a/src/types/domain-results.ts b/src/types/domain-results.ts index d6886d7d0..e67f2f59f 100644 --- a/src/types/domain-results.ts +++ b/src/types/domain-results.ts @@ -50,6 +50,12 @@ export type AtLeastOne = { [K in keyof T]-?: Required> & Partial>; }[keyof T]; import type { BuildInvocationRequest } from './domain-fragments.ts'; +import type { + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, + UiAutomationRecoverableError, + UiWaitMatch, +} from './ui-snapshot.ts'; export type ExecutionStatus = 'SUCCEEDED' | 'FAILED'; export type BuildTarget = 'simulator' | 'device' | 'macos' | 'swift-package'; @@ -242,7 +248,9 @@ export interface CaptureVideoRecordingPayload { export type CapturePayload = | CaptureImagePayload | CaptureUiHierarchyPayload - | CaptureVideoRecordingPayload; + | CaptureVideoRecordingPayload + | RuntimeSnapshotV1 + | RuntimeSnapshotUnchangedV1; export interface DebugFileLineBreakpoint { kind: 'file-line'; file: string; @@ -349,28 +357,40 @@ export interface TestSelectionInfo { } export interface UiActionTap { type: 'tap'; + elementRef: string; x?: number; y?: number; - id?: string; - label?: string; } export interface UiActionSwipe { type: 'swipe'; + withinElementRef: string; + direction: 'up' | 'down' | 'left' | 'right'; from?: Point; to?: Point; durationSeconds?: number; } +export interface UiActionDrag { + type: 'drag'; + elementRef: string; + direction: 'up' | 'down' | 'left' | 'right'; + from?: Point; + to?: Point; + durationSeconds?: number; + steps?: number; +} export interface UiActionTouch { type: 'touch'; + elementRef: string; event?: string; x?: number; y?: number; } export interface UiActionLongPress { type: 'long-press'; - x: number; - y: number; + elementRef: string; durationMs: number; + x?: number; + y?: number; } export interface UiActionButton { type: 'button'; @@ -382,6 +402,8 @@ export interface UiActionGesture { } export interface UiActionTypeText { type: 'type-text'; + elementRef: string; + textLength?: number; } export interface UiActionKeyPress { type: 'key-press'; @@ -391,16 +413,22 @@ export interface UiActionKeySequence { type: 'key-sequence'; keyCodes: number[]; } +export interface UiActionBatch { + type: 'batch'; + stepCount: number; +} export type UiAction = | UiActionTap | UiActionSwipe + | UiActionDrag | UiActionTouch | UiActionLongPress | UiActionButton | UiActionGesture | UiActionTypeText | UiActionKeyPress - | UiActionKeySequence; + | UiActionKeySequence + | UiActionBatch; export interface SimulatorActionBoot { type: 'boot'; } @@ -491,6 +519,8 @@ export type CaptureResultDomainResult = ToolDomainResultBase & { artifacts: { simulatorId: string; screenshotPath?: string }; capture?: CapturePayload; diagnostics?: BasicDiagnostics; + uiError?: UiAutomationRecoverableError; + waitMatch?: UiWaitMatch; }; export type CoverageResultDomainResult = ToolDomainResultBase & { kind: 'coverage-result'; @@ -629,7 +659,9 @@ export type UiActionResultDomainResult = ToolDomainResultBase & { summary: StatusSummary; action: UiAction; artifacts: { simulatorId: string }; + capture?: CapturePayload; diagnostics?: BasicDiagnostics; + uiError?: UiAutomationRecoverableError; }; export type XcodeBridgeCallResultDomainResult = ToolDomainResultBase & { kind: 'xcode-bridge-call-result'; diff --git a/src/types/ui-snapshot.ts b/src/types/ui-snapshot.ts new file mode 100644 index 000000000..aee7cfcab --- /dev/null +++ b/src/types/ui-snapshot.ts @@ -0,0 +1,149 @@ +import type { AccessibilityNode, Frame, Point } from './domain-results.ts'; + +export type RuntimeSnapshotProtocol = 'rs/1'; +export type RuntimeSnapshotCaptureType = 'runtime-snapshot'; + +export type RuntimeActionNameV1 = 'tap' | 'typeText' | 'longPress' | 'touch' | 'swipeWithin'; + +export type RuntimeElementRoleV1 = + | 'application' + | 'button' + | 'cell' + | 'image' + | 'keyboard-key' + | 'list' + | 'menu' + | 'other' + | 'scroll-view' + | 'slider' + | 'switch' + | 'tab' + | 'text' + | 'text-field' + | 'window'; + +export interface RuntimeElementStateV1 { + enabled?: boolean; + focused?: boolean; + selected?: boolean; + visible?: boolean; +} + +export interface RuntimeElementV1 { + ref: string; + role?: RuntimeElementRoleV1; + label?: string; + value?: string; + identifier?: string; + frame: Frame; + state?: RuntimeElementStateV1; + actions: RuntimeActionNameV1[]; +} + +export interface RuntimeActionHintV1 { + action: RuntimeActionNameV1; + elementRef: string; + label?: string; +} + +export interface RuntimeSnapshotV1 { + type: RuntimeSnapshotCaptureType; + protocol: RuntimeSnapshotProtocol; + simulatorId: string; + screenHash: string; + seq: number; + capturedAtMs: number; + expiresAtMs: number; + elements: RuntimeElementV1[]; + actions: RuntimeActionHintV1[]; +} + +export interface RuntimeSnapshotUnchangedV1 { + type: 'runtime-snapshot-unchanged'; + protocol: RuntimeSnapshotProtocol; + simulatorId: string; + screenHash: string; + seq: number; +} + +export interface RuntimeSnapshotMetadata { + path: string; + depth: number; + childCount: number; + signature: string; + activationPoint?: Point; + swipeFrame?: Frame; +} + +export interface RuntimeSnapshotElementRecord { + publicElement: RuntimeElementV1; + metadata: RuntimeSnapshotMetadata; + rawNode: AccessibilityNode; +} + +export interface RuntimeSnapshotRecord { + simulatorId: string; + screenHash: string; + seq: number; + capturedAtMs: number; + expiresAtMs: number; + payload: RuntimeSnapshotV1; + elements: RuntimeSnapshotElementRecord[]; + elementsByRef: Map; +} + +export type RuntimeSnapshotLookupStatus = 'available' | 'expired' | 'missing'; + +export interface RuntimeSnapshotLookup { + status: RuntimeSnapshotLookupStatus; + snapshot: RuntimeSnapshotRecord | null; + snapshotAgeMs?: number; +} + +export type UiAutomationRecoverableErrorCode = + | 'SNAPSHOT_MISSING' + | 'SNAPSHOT_EXPIRED' + | 'SNAPSHOT_PARSE_FAILED' + | 'SNAPSHOT_CAPTURE_FAILED' + | 'ELEMENT_REF_NOT_FOUND' + | 'TARGET_NOT_FOUND' + | 'TARGET_AMBIGUOUS' + | 'TARGET_NOT_ACTIONABLE' + | 'WAIT_TIMEOUT' + | 'UI_STATE_CHANGED' + | 'ACTION_FAILED'; + +export interface UiAutomationRecoverableError { + code: UiAutomationRecoverableErrorCode; + message: string; + recoveryHint: string; + elementRef?: string; + candidates?: RuntimeElementV1[]; + snapshotAgeMs?: number; + timeoutMs?: number; +} + +export type UiWaitPredicate = + | 'exists' + | 'gone' + | 'enabled' + | 'focused' + | 'textContains' + | 'settled'; + +export interface UiWaitMatch { + predicate: UiWaitPredicate; + matches: RuntimeElementV1[]; +} + +export type RuntimeElementResolution = + | { + ok: true; + snapshot: RuntimeSnapshotRecord; + element: RuntimeSnapshotElementRecord; + snapshotAgeMs: number; + } + | { + ok: false; + error: UiAutomationRecoverableError; + }; diff --git a/src/utils/__tests__/axe-helpers.test.ts b/src/utils/__tests__/axe-helpers.test.ts index 468a48e45..e2b5465ec 100644 --- a/src/utils/__tests__/axe-helpers.test.ts +++ b/src/utils/__tests__/axe-helpers.test.ts @@ -1,19 +1,39 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { chmodSync, mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { getBundledAxeEnvironment } from '../axe-helpers.ts'; +import { dirname, join } from 'node:path'; +import { getBundledAxeEnvironment, resolveAxeBinary } from '../axe-helpers.ts'; import { resetResourceRootCacheForTests } from '../../core/resource-root.ts'; +import { __resetConfigStoreForTests } from '../config-store.ts'; + +function writeExecutable(path: string): void { + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, ''); + chmodSync(path, 0o755); +} describe('axe-helpers', () => { let originalResourceRoot: string | undefined; let originalDyldFrameworkPath: string | undefined; + let originalAxePath: string | undefined; + let originalLegacyAxePath: string | undefined; + let originalAxeSourcePath: string | undefined; + let originalLegacyAxeSourcePath: string | undefined; let tempDir: string; beforeEach(() => { originalResourceRoot = process.env.XCODEBUILDMCP_RESOURCE_ROOT; originalDyldFrameworkPath = process.env.DYLD_FRAMEWORK_PATH; + originalAxePath = process.env.XCODEBUILDMCP_AXE_PATH; + originalLegacyAxePath = process.env.AXE_PATH; + originalAxeSourcePath = process.env.XCODEBUILDMCP_AXE_SOURCE_PATH; + originalLegacyAxeSourcePath = process.env.AXE_SOURCE_PATH; tempDir = mkdtempSync(join(tmpdir(), 'xbmcp-axe-helpers-')); + delete process.env.XCODEBUILDMCP_AXE_PATH; + delete process.env.AXE_PATH; + delete process.env.XCODEBUILDMCP_AXE_SOURCE_PATH; + delete process.env.AXE_SOURCE_PATH; + __resetConfigStoreForTests(); resetResourceRootCacheForTests(); }); @@ -30,7 +50,32 @@ describe('axe-helpers', () => { process.env.DYLD_FRAMEWORK_PATH = originalDyldFrameworkPath; } + if (originalAxePath === undefined) { + delete process.env.XCODEBUILDMCP_AXE_PATH; + } else { + process.env.XCODEBUILDMCP_AXE_PATH = originalAxePath; + } + + if (originalLegacyAxePath === undefined) { + delete process.env.AXE_PATH; + } else { + process.env.AXE_PATH = originalLegacyAxePath; + } + + if (originalAxeSourcePath === undefined) { + delete process.env.XCODEBUILDMCP_AXE_SOURCE_PATH; + } else { + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = originalAxeSourcePath; + } + + if (originalLegacyAxeSourcePath === undefined) { + delete process.env.AXE_SOURCE_PATH; + } else { + process.env.AXE_SOURCE_PATH = originalLegacyAxeSourcePath; + } + rmSync(tempDir, { recursive: true, force: true }); + __resetConfigStoreForTests(); resetResourceRootCacheForTests(); }); @@ -39,8 +84,7 @@ describe('axe-helpers', () => { const axePath = join(resourceRoot, 'bundled', 'axe'); const frameworksDir = join(resourceRoot, 'bundled', 'Frameworks'); mkdirSync(frameworksDir, { recursive: true }); - writeFileSync(axePath, ''); - chmodSync(axePath, 0o755); + writeExecutable(axePath); process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; delete process.env.DYLD_FRAMEWORK_PATH; @@ -55,8 +99,7 @@ describe('axe-helpers', () => { const axePath = join(resourceRoot, 'bundled', 'axe'); const frameworksDir = join(resourceRoot, 'bundled', 'Frameworks'); mkdirSync(frameworksDir, { recursive: true }); - writeFileSync(axePath, ''); - chmodSync(axePath, 0o755); + writeExecutable(axePath); process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; process.env.DYLD_FRAMEWORK_PATH = '/existing/frameworks'; @@ -65,4 +108,47 @@ describe('axe-helpers', () => { DYLD_FRAMEWORK_PATH: `${frameworksDir}:/existing/frameworks`, }); }); + + it('resolves an explicit AXe source checkout before bundled and PATH fallback', () => { + const sourceRoot = join(tempDir, 'AXe'); + const sourceAxePath = join(sourceRoot, '.build', 'arm64-apple-macosx', 'release', 'axe'); + const resourceRoot = join(tempDir, 'portable-root'); + const bundledAxePath = join(resourceRoot, 'bundled', 'axe'); + writeExecutable(sourceAxePath); + writeExecutable(bundledAxePath); + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = sourceRoot; + process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; + + expect(resolveAxeBinary()).toEqual({ path: sourceAxePath, source: 'source' }); + }); + + it('keeps explicit axePath precedence over axeSourcePath', () => { + const configuredAxePath = join(tempDir, 'configured', 'axe'); + writeExecutable(configuredAxePath); + process.env.XCODEBUILDMCP_AXE_PATH = configuredAxePath; + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = join(tempDir, 'missing-source'); + + expect(resolveAxeBinary()).toEqual({ path: configuredAxePath, source: 'env' }); + }); + + it('preserves existing invalid axePath fallback behavior', () => { + const sourceRoot = join(tempDir, 'AXe'); + const sourceAxePath = join(sourceRoot, '.build', 'arm64-apple-macosx', 'release', 'axe'); + writeExecutable(sourceAxePath); + process.env.XCODEBUILDMCP_AXE_PATH = join(tempDir, 'missing', 'axe'); + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = sourceRoot; + + expect(resolveAxeBinary()).toEqual({ path: sourceAxePath, source: 'source' }); + }); + + it('fails loudly for invalid explicit axeSourcePath instead of falling back', () => { + const resourceRoot = join(tempDir, 'portable-root'); + writeExecutable(join(resourceRoot, 'bundled', 'axe')); + process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = join(tempDir, 'missing-source'); + + expect(() => resolveAxeBinary()).toThrow( + 'Configured axeSourcePath does not exist or is not a directory', + ); + }); }); diff --git a/src/utils/__tests__/config-store.test.ts b/src/utils/__tests__/config-store.test.ts index e0f6307bd..8d953de4c 100644 --- a/src/utils/__tests__/config-store.test.ts +++ b/src/utils/__tests__/config-store.test.ts @@ -56,6 +56,7 @@ describe('config-store', () => { XCODEBUILDMCP_UI_DEBUGGER_GUARD_MODE: 'warn', XCODEBUILDMCP_DEBUGGER_BACKEND: 'lldb', XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list', + XCODEBUILDMCP_AXE_SOURCE_PATH: '/Volumes/Developer/AXe', }; await initConfigStore({ cwd, fs: createFs(), env }); @@ -71,6 +72,7 @@ describe('config-store', () => { expect(config.uiDebuggerGuardMode).toBe('warn'); expect(config.debuggerBackend).toBe('lldb-cli'); expect(config.filePathRenderStyle).toBe('list'); + expect(config.axeSourcePath).toBe('/Volumes/Developer/AXe'); }); it('prefers overrides over config file values and config over env', async () => { @@ -79,18 +81,25 @@ describe('config-store', () => { 'debug: false', 'dapRequestTimeoutMs: 4000', 'filePathRenderStyle: tree', + 'axeSourcePath: /file/AXe', '', ].join('\n'); const env = { XCODEBUILDMCP_DEBUG: 'true', XCODEBUILDMCP_DAP_REQUEST_TIMEOUT_MS: '999', XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list', + XCODEBUILDMCP_AXE_SOURCE_PATH: '/env/AXe', }; await initConfigStore({ cwd, fs: createFs(yaml), - overrides: { debug: true, dapRequestTimeoutMs: 12345, filePathRenderStyle: 'list' }, + overrides: { + debug: true, + dapRequestTimeoutMs: 12345, + filePathRenderStyle: 'list', + axeSourcePath: '/override/AXe', + }, env, }); @@ -98,15 +107,25 @@ describe('config-store', () => { expect(config.debug).toBe(true); expect(config.dapRequestTimeoutMs).toBe(12345); expect(config.filePathRenderStyle).toBe('list'); + expect(config.axeSourcePath).toBe('/override/AXe'); }); - it('uses filePathRenderStyle from config before env when no override is provided', async () => { - const yaml = ['schemaVersion: 1', 'filePathRenderStyle: tree', ''].join('\n'); - const env = { XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list' }; + it('uses file config before env when no override is provided', async () => { + const yaml = [ + 'schemaVersion: 1', + 'filePathRenderStyle: tree', + 'axeSourcePath: /file/AXe', + '', + ].join('\n'); + const env = { + XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list', + XCODEBUILDMCP_AXE_SOURCE_PATH: '/env/AXe', + }; await initConfigStore({ cwd, fs: createFs(yaml), env }); expect(getConfig().filePathRenderStyle).toBe('tree'); + expect(getConfig().axeSourcePath).toBe('/file/AXe'); }); it('reads sentryDisabled from config file', async () => { diff --git a/src/utils/__tests__/project-config.test.ts b/src/utils/__tests__/project-config.test.ts index c72483cd6..130d8724e 100644 --- a/src/utils/__tests__/project-config.test.ts +++ b/src/utils/__tests__/project-config.test.ts @@ -68,6 +68,7 @@ describe('project-config', () => { ' - SCREENSHOT', 'debug: true', 'axePath: "./bin/axe"', + 'axeSourcePath: "../AXe"', 'sessionDefaults:', ' projectPath: "./App.xcodeproj"', ' workspacePath: "./App.xcworkspace"', @@ -89,6 +90,7 @@ describe('project-config', () => { }); expect(result.config.debug).toBe(true); expect(result.config.axePath).toBe(path.join(cwd, 'bin', 'axe')); + expect(result.config.axeSourcePath).toBe(path.join(cwd, '..', 'AXe')); expect(defaults.workspacePath).toBe(path.join(cwd, 'App.xcworkspace')); expect(defaults.projectPath).toBeUndefined(); expect(defaults.simulatorId).toBe('SIM-1'); @@ -154,6 +156,7 @@ describe('project-config', () => { const yaml = [ 'schemaVersion: 1', 'axePath: "file:///repo/bin/axe"', + 'axeSourcePath: "file:///repo/AXe"', 'sessionDefaults:', ' workspacePath: "file:///repo/App.xcworkspace"', ' derivedDataPath: "file:///repo/.derivedData"', @@ -166,6 +169,7 @@ describe('project-config', () => { if (!result.found) throw new Error('expected config to be found'); expect(result.config.axePath).toBe('/repo/bin/axe'); + expect(result.config.axeSourcePath).toBe('/repo/AXe'); const defaults = result.config.sessionDefaults ?? {}; expect(defaults.workspacePath).toBe('/repo/App.xcworkspace'); expect(defaults.derivedDataPath).toBe('/repo/.derivedData'); @@ -193,6 +197,7 @@ describe('project-config', () => { const yaml = [ 'schemaVersion: 1', 'axePath: "~/tools/axe"', + 'axeSourcePath: "~/Code/AXe"', 'iosTemplatePath: "~/templates/ios"', '', ].join('\n'); @@ -202,6 +207,7 @@ describe('project-config', () => { if (!result.found) throw new Error('expected config to be found'); expect(result.config.axePath).toBe(path.join(homedir(), 'tools/axe')); + expect(result.config.axeSourcePath).toBe(path.join(homedir(), 'Code/AXe')); expect(result.config.iosTemplatePath).toBe(path.join(homedir(), 'templates/ios')); }); diff --git a/src/utils/__tests__/session-aware-tool-factory.test.ts b/src/utils/__tests__/session-aware-tool-factory.test.ts index b28f87086..0e89f941c 100644 --- a/src/utils/__tests__/session-aware-tool-factory.test.ts +++ b/src/utils/__tests__/session-aware-tool-factory.test.ts @@ -376,6 +376,126 @@ describe('createSessionAwareTool', () => { expect(parsed).toEqual({ API_KEY: 'abc123', DEBUG: 'true', VERBOSE: '0' }); }); + it('only merges session defaults that exist in the schema before strict validation', async () => { + const strictSchema = z.strictObject({ + bundleId: z.string(), + }); + + const strictHandler = createSessionAwareTool>({ + internalSchema: strictSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['bundleId'] }], + }); + + sessionStore.setDefaults({ + scheme: 'App', + projectPath: '/a.xcodeproj', + simulatorId: 'SIM-123', + bundleId: 'com.example.app', + }); + + const result = await invokeAndCollect(strictHandler, {}); + expect(result.isError).toBe(false); + + const parsed = JSON.parse(result.text.replace(/\n/g, '').replace(/^.*?(\{.*\}).*$/, '$1')); + expect(parsed).toEqual({ bundleId: 'com.example.app' }); + }); + + it('uses filtered session defaults to satisfy required fields on strict schemas', async () => { + const strictSchema = z.strictObject({ + scheme: z.string(), + projectPath: z.string(), + }); + + const strictHandler = createSessionAwareTool>({ + internalSchema: strictSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['scheme', 'projectPath'] }], + }); + + sessionStore.setDefaults({ + scheme: 'App', + projectPath: '/a.xcodeproj', + simulatorId: 'SIM-123', + }); + + const result = await invokeAndCollect(strictHandler, {}); + expect(result.isError).toBe(false); + + const parsed = JSON.parse(result.text.replace(/\n/g, '').replace(/^.*?(\{.*\}).*$/, '$1')); + expect(parsed).toEqual({ scheme: 'App', projectPath: '/a.xcodeproj' }); + }); + + it('rejects explicit unknown args on strict schemas after filtering session defaults', async () => { + const strictSchema = z.strictObject({ + bundleId: z.string(), + }); + + const strictHandler = createSessionAwareTool>({ + internalSchema: strictSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['bundleId'] }], + }); + + sessionStore.setDefaults({ + bundleId: 'com.example.app', + simulatorId: 'SIM-123', + }); + + const result = await invokeAndCollect(strictHandler, { simulatorName: 'iPhone 17' }); + expect(result.isError).toBe(true); + expect(result.text).toContain('Parameter validation failed'); + expect(result.text).toContain('simulatorName'); + }); + + it('applies refinements after filtering unrelated session defaults', async () => { + const refinedSchema = z + .strictObject({ + scheme: z.string(), + projectPath: z.string().optional(), + workspacePath: z.string().optional(), + }) + .refine((params) => !!params.projectPath !== !!params.workspacePath, { + message: 'provide exactly one projectPath or workspacePath', + path: ['projectPath'], + }); + + const refinedHandler = createSessionAwareTool>({ + internalSchema: refinedSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['scheme'] }], + }); + + sessionStore.setDefaults({ + scheme: 'App', + projectPath: '/a.xcodeproj', + workspacePath: '/a.xcworkspace', + simulatorId: 'SIM-123', + }); + + const result = await invokeAndCollect(refinedHandler, {}); + expect(result.isError).toBe(true); + expect(result.text).toContain('Parameter validation failed'); + expect(result.text).toContain('provide exactly one projectPath or workspacePath'); + expect(result.text).not.toContain('simulatorId'); + }); + it('rejects array passed as env instead of deep-merging it', async () => { const envSchema = z.object({ scheme: z.string(), diff --git a/src/utils/__tests__/structured-output-envelope.test.ts b/src/utils/__tests__/structured-output-envelope.test.ts index 2dcb4b950..f5c344170 100644 --- a/src/utils/__tests__/structured-output-envelope.test.ts +++ b/src/utils/__tests__/structured-output-envelope.test.ts @@ -1,11 +1,10 @@ import { describe, expect, it } from 'vitest'; import { toStructuredEnvelope } from '../structured-output-envelope.ts'; -import type { NextStep } from '../../types/common.ts'; import type { BuildResultDomainResult, + CaptureResultDomainResult, DeviceListDomainResult, } from '../../types/domain-results.ts'; -import type { StructuredOutputEnvelope } from '../../types/structured-output.ts'; describe('toStructuredEnvelope', () => { it('strips kind, didError, and error from the data payload', () => { @@ -52,345 +51,428 @@ describe('toStructuredEnvelope', () => { }); }); - it('omits nextSteps when no serializable steps are provided', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: true, - error: 'Build failed', - }; - const expectedEnvelope = { - schema: 'xcodebuildmcp.output.build-result', - schemaVersion: '1', - didError: true, - error: 'Build failed', - data: null, - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '1', { nextSteps: [] }), - ).toEqual(expectedEnvelope); - }); - - it('does not serialize next steps on error envelopes because the error schema has no nextSteps field', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: true, - error: 'Build failed', - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.error', '1', { - nextSteps: [ + it('compacts runtime snapshots inside the capture payload by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'exists', + matches: [ { - label: 'Retry build', - cliTool: 'build', - workflow: 'project', - params: { scheme: 'CalculatorApp' }, + ref: 'e2', + role: 'button', + label: 'Overview', + identifier: 'app.primaryButton', + frame: { x: 12, y: 81, width: 178, height: 33 }, + actions: ['tap', 'longPress', 'touch'], }, ], - }), - ).toEqual({ - schema: 'xcodebuildmcp.output.error', - schemaVersion: '1', - didError: true, - error: 'Build failed', - data: null, - }); - }); - - it('serializes next steps as rendered CLI command lines by default sorted by priority', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], - }; - const nextSteps: NextStep[] = [ - { - tool: 'launch_app_sim', - cliTool: 'launch-app', - workflow: 'simulator', - label: 'Launch app', - params: { simulatorId: 'SIM-1' }, - priority: 20, - when: 'success', }, - { - tool: 'boot_sim', - cliTool: 'boot', - workflow: 'simulator', - label: 'Boot the simulator', - params: { simulatorId: 'SIM-1', useLatestOS: true }, - priority: 10, - when: 'success', + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-one', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'Overview', + identifier: 'app.primaryButton', + frame: { x: 12, y: 81, width: 178, height: 33 }, + actions: ['tap', 'longPress', 'touch'], + }, + { + ref: 'e3', + role: 'text', + label: 'Current reading', + frame: { x: 24, y: 140, width: 80, height: 24 }, + state: { visible: true }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Example' }, + { action: 'tap', elementRef: 'e2', label: 'Overview' }, + ], }, - ]; + }; - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { nextSteps }), - ).toEqual({ - schema: 'xcodebuildmcp.output.device-list', + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', schemaVersion: '2', didError: false, error: null, data: { - devices: [], - }, - nextSteps: [ - 'Boot the simulator: xcodebuildmcp simulator boot --simulator-id SIM-1 --use-latest-os', - 'Launch app: xcodebuildmcp simulator launch-app --simulator-id SIM-1', - ], - }); - }); - - it('shell-escapes only JSON next step arguments that need quoting', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], - }; - const nextSteps: NextStep[] = [ - { - tool: 'launch_sim', - cliTool: 'launch', - workflow: 'simulator', - label: 'Launch app', - params: { - simulatorId: 'SIM-1', - appPath: '/tmp/My App.app', - displayName: "Cam's App", + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + rs: '1', + screenHash: 'screen-one', + seq: 1, + count: 3, + targets: ['e2|tap|button|Overview||app.primaryButton'], + scroll: ['e1|swipe|application|Example||'], + text: ['e3|text|text|Current reading||'], + udid: 'SIMULATOR-1', + }, + waitMatch: { + predicate: 'exists', + matches: ['e2|tap|button|Overview||app.primaryButton'], }, }, - ]; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { nextSteps }), - ).toMatchObject({ - nextSteps: [ - "Launch app: xcodebuildmcp simulator launch --simulator-id SIM-1 --app-path '/tmp/My App.app' --display-name 'Cam'\\''s App'", - ], }); }); - it('serializes CLI next steps when only cliTool is present', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', + it('puts suppressed target evidence in a no-ref evidence array, not text rows', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', didError: false, error: null, - devices: [], - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-suppressed', + seq: 2, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ { - cliTool: 'list', - workflow: 'simulator', - label: 'List simulators', - params: { platform: 'iOS Simulator' }, + ref: 'e1', + role: 'button', + label: 'Add', + frame: { x: 12, y: 81, width: 80, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e2', + role: 'button', + label: 'London, England', + value: 'not saved', + frame: { x: 20, y: 140, width: 200, height: 72 }, + state: { visible: true }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'text', + label: 'Search results', + frame: { x: 20, y: 100, width: 120, height: 24 }, + state: { visible: true }, + actions: [], }, ], - }), - ).toMatchObject({ - nextSteps: ["List simulators: xcodebuildmcp simulator list --platform 'iOS Simulator'"], - }); - }); - - it('serializes next steps as MCP tool-call lines for MCP structured content', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Add' }, + { action: 'tap', elementRef: 'e2', label: 'London, England' }, + ], + }, }; expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ - { - tool: 'get_mac_app_path', - cliTool: 'get-app-path', - workflow: 'macos', - label: 'Get app path', - params: { scheme: 'MCPTest' }, - }, - ], - nextStepRuntime: 'mcp', + toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshotSuppressedTargetRefs: ['e2'], }), ).toMatchObject({ - nextSteps: ['Get app path: get_mac_app_path({ scheme: "MCPTest" })'], + data: { + capture: { + targets: ['e1|tap|button|Add||'], + text: ['e3|text|text|Search results||'], + evidence: ['button|London, England|not saved|'], + }, + }, }); }); - it('escapes MCP structured next-step string params as JSON string literals', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', + it('caps compact runtime snapshot rows by category', () => { + const targets = Array.from({ length: 80 }, (_, index) => ({ + ref: `e${index + 1}`, + role: 'button' as const, + label: `Target ${index + 1}`, + frame: { x: 0, y: index, width: 100, height: 40 }, + actions: ['tap' as const], + })); + const scroll = Array.from({ length: 40 }, (_, index) => ({ + ref: `e${index + 81}`, + role: 'scroll-view' as const, + label: `Scroll ${index + 1}`, + frame: { x: 0, y: index, width: 390, height: 600 }, + actions: ['swipeWithin' as const], + })); + const text = Array.from({ length: 70 }, (_, index) => ({ + ref: `e${index + 121}`, + role: 'text' as const, + label: `Text ${index + 1}`, + frame: { x: 0, y: index, width: 100, height: 20 }, + state: { visible: true }, + actions: ['touch' as const], + })); + const result: CaptureResultDomainResult = { + kind: 'capture-result', didError: false, error: null, - devices: [], + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'large-screen', + seq: 4, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [...targets, ...scroll, ...text], + actions: [], + }, }; - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ - { - tool: 'launch_app_sim', - cliTool: 'launch-app', - workflow: 'simulator', - label: 'Launch app', - params: { - scheme: 'Cam "Debug" App', - bundleId: 'com.example.$APP\\debug', - launchArg: 'line1\nline2', - }, - }, - ], - nextStepRuntime: 'mcp', - }), - ).toMatchObject({ - nextSteps: [ - 'Launch app: launch_app_sim({ scheme: "Cam \\"Debug\\" App", bundleId: "com.example.$APP\\\\debug", launchArg: "line1\\nline2" })', - ], - }); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2'); + const data = envelope.data as { + capture: { targets: string[]; scroll: string[]; text?: string[] }; + }; + + expect(data.capture.targets).toHaveLength(64); + expect(data.capture.scroll).toHaveLength(32); + expect(data.capture.text).toHaveLength(64); }); - it('preserves request data for normal structured output', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', + it('compacts unchanged runtime snapshot captures by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', didError: false, error: null, - request: { - scheme: 'CalculatorApp', - workspacePath: 'example_projects/iOS_Calculator/CalculatorApp.xcworkspace', + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-one', + seq: 2, }, - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, }; - expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '2')).toEqual({ - schema: 'xcodebuildmcp.output.build-result', + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', schemaVersion: '2', didError: false, error: null, data: { - request: { - scheme: 'CalculatorApp', - workspacePath: 'example_projects/iOS_Calculator/CalculatorApp.xcworkspace', + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: 'screen-one', + seq: 2, + unchanged: true, + udid: 'SIMULATOR-1', }, - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, }, }); }); - it('preserves CLI next steps while applying minimal structured-output compactness', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', + it('orders compact runtime snapshot targets by usefulness', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', didError: false, error: null, - request: { - scheme: 'CalculatorApp', - workspacePath: 'example_projects/iOS_Calculator/CalculatorApp.xcworkspace', - }, - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '2', { - nextSteps: [ + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-two', + seq: 2, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e2', + role: 'button', + label: 'Sheet Grabber', + value: 'Expanded', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + frame: { x: 320, y: 40, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e8', + role: 'text-field', + value: 'Portland', + frame: { x: 20, y: 100, width: 200, height: 40 }, + actions: ['typeText'], + }, { - tool: 'get_mac_app_path', - cliTool: 'get-app-path', - workflow: 'macos', - label: 'Get built app path', - params: { scheme: 'CalculatorApp' }, + ref: 'e9', + role: 'button', + label: 'Clear search', + frame: { x: 230, y: 100, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e82', + role: 'button', + label: 'PRECIP., 78%, Next 24 hours', + identifier: 'weather.precipitationCard', + frame: { x: 20, y: 300, width: 340, height: 140 }, + actions: ['tap'], }, ], - outputStyle: 'minimal', - }), - ).toEqual({ - schema: 'xcodebuildmcp.output.build-result', - schemaVersion: '2', - didError: false, - error: null, - data: { - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, + actions: [], }, - nextSteps: ['Get built app path: xcodebuildmcp macos get-app-path --scheme CalculatorApp'], - }); - }); - - it('uses null data when minimal pruning removes the only data field', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: false, - error: null, - request: { scheme: 'CalculatorApp' }, }; - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '2', { - outputStyle: 'minimal', - }), - ).toEqual({ - schema: 'xcodebuildmcp.output.build-result', - schemaVersion: '2', - didError: false, - error: null, - data: null, + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2'); + + expect(envelope.data).toMatchObject({ + capture: { + screenHash: 'screen-two', + seq: 2, + targets: [ + 'e82|tap|button|PRECIP., 78%, Next 24 hours||weather.precipitationCard', + 'e8|typeText|text-field||Portland|', + 'e3|tap|button|Settings||', + 'e9|tap|button|Clear search||', + ], + }, }); }); - it('leaves minimal structured output without request frontmatter unchanged', () => { - const result: StructuredOutputEnvelope<{ simulators: [] }> = { - schema: 'xcodebuildmcp.output.simulator-list', - schemaVersion: '1', - didError: false, - error: null, - data: { simulators: [] }, + it('compacts runtime snapshot candidates inside recoverable UI errors by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ + { + ref: 'e8', + role: 'text-field', + value: 'Lisbon', + identifier: 'weather.locationsSheet', + frame: { x: 65, y: 482, width: 272, height: 18 }, + actions: ['tap', 'typeText', 'longPress', 'touch'], + }, + { + ref: 'e11', + role: 'button', + label: 'Lisbon, Portugal', + value: 'saved', + frame: { x: 40, y: 552, width: 89, height: 49 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + }, }; - expect( - toStructuredEnvelope( - { - kind: 'simulator-list', - didError: result.didError, - error: result.error, - simulators: [], + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + data: { + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ + 'e8|typeText|text-field||Lisbon|weather.locationsSheet', + 'e11|tap|button|Lisbon, Portugal|saved|', + ], }, - result.schema, - result.schemaVersion, - { outputStyle: 'minimal' }, - ), - ).toEqual(result); + }, + }); }); - it('serializes label-only next steps as text lines', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ + it('can keep full runtime snapshots and candidates for verbose callers', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-three', + seq: 3, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ { - label: 'Open Simulator', - params: {}, + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], }, ], + actions: [{ action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }], + }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + ], + }, + }; + + expect( + toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshot: 'full', }), - ).toMatchObject({ - nextSteps: ['Open Simulator'], + ).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + data: { + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: result.capture, + uiError: result.uiError, + }, }); }); }); diff --git a/src/utils/axe-helpers.ts b/src/utils/axe-helpers.ts index 3ae6e4ae1..436b5fca5 100644 --- a/src/utils/axe-helpers.ts +++ b/src/utils/axe-helpers.ts @@ -5,14 +5,14 @@ * Prefers bundled AXe when present, but allows env and PATH fallback. */ -import { accessSync, constants, existsSync } from 'fs'; +import { accessSync, constants, existsSync, readdirSync, statSync } from 'fs'; import { delimiter, join, resolve } from 'path'; import type { CommandExecutor } from './execution/index.ts'; import { getDefaultCommandExecutor } from './execution/index.ts'; import { getConfig } from './config-store.ts'; import { getBundledAxePath, getBundledFrameworksDir } from '../core/resource-root.ts'; -export type AxeBinarySource = 'env' | 'bundled' | 'path'; +export type AxeBinarySource = 'env' | 'source' | 'bundled' | 'path'; export type AxeBinary = { path: string; @@ -35,6 +35,50 @@ function resolveAxePathFromConfig(): string | null { return isExecutable(resolved) ? resolved : null; } +function isDirectory(path: string): boolean { + try { + return statSync(path).isDirectory(); + } catch { + return false; + } +} + +function getAxeSourceBuildCandidates(sourcePath: string): string[] { + const candidates = [join(sourcePath, '.build', 'release', 'axe')]; + const swiftBuildDir = join(sourcePath, '.build'); + + if (isDirectory(swiftBuildDir)) { + for (const entry of readdirSync(swiftBuildDir, { withFileTypes: true })) { + if (entry.isDirectory() && entry.name.endsWith('-apple-macosx')) { + candidates.push(join(swiftBuildDir, entry.name, 'release', 'axe')); + } + } + } + + return candidates; +} + +function resolveAxePathFromSourceConfig(): string | null { + const value = getConfig().axeSourcePath; + if (!value) return null; + + const sourcePath = resolve(value); + if (!isDirectory(sourcePath)) { + throw new Error(`Configured axeSourcePath does not exist or is not a directory: ${sourcePath}`); + } + + const candidates = getAxeSourceBuildCandidates(sourcePath); + for (const candidate of candidates) { + if (isExecutable(candidate)) { + return candidate; + } + } + + throw new Error( + `Configured axeSourcePath does not contain an executable release AXe build. Expected one of: ${candidates.join(', ')}`, + ); +} + function resolveBundledAxePath(): string | null { const candidates = new Set(); candidates.add(getBundledAxePath()); @@ -66,6 +110,11 @@ export function resolveAxeBinary(): AxeBinary | null { return { path: configPath, source: 'env' }; } + const sourcePath = resolveAxePathFromSourceConfig(); + if (sourcePath) { + return { path: sourcePath, source: 'source' }; + } + const bundledPath = resolveBundledAxePath(); if (bundledPath) { return { path: bundledPath, source: 'bundled' }; @@ -118,6 +167,7 @@ export function areAxeToolsAvailable(): boolean { export const AXE_NOT_AVAILABLE_MESSAGE = 'AXe tool not found. UI automation features are not available.\n\n' + 'Install AXe (brew tap cameroncooke/axe && brew install axe) or set XCODEBUILDMCP_AXE_PATH.\n' + + 'For local source validation, set XCODEBUILDMCP_AXE_SOURCE_PATH to an AXe checkout with a release build.\n' + 'Ensure bundled artifacts are included or PATH is configured.'; /** diff --git a/src/utils/config-store.ts b/src/utils/config-store.ts index dafd35759..c7e0aadc5 100644 --- a/src/utils/config-store.ts +++ b/src/utils/config-store.ts @@ -28,6 +28,7 @@ export type RuntimeConfigOverrides = Partial<{ dapLogEvents: boolean; launchJsonWaitMs: number; axePath: string; + axeSourcePath: string; iosTemplatePath: string; iosTemplateVersion: string; macosTemplatePath: string; @@ -54,6 +55,7 @@ export type ResolvedRuntimeConfig = { dapLogEvents: boolean; launchJsonWaitMs: number; axePath?: string; + axeSourcePath?: string; iosTemplatePath?: string; iosTemplateVersion?: string; macosTemplatePath?: string; @@ -227,6 +229,9 @@ function readEnvConfig(env: NodeJS.ProcessEnv): RuntimeConfigOverrides { const axePath = env.XCODEBUILDMCP_AXE_PATH ?? env.AXE_PATH; if (axePath) config.axePath = axePath; + const axeSourcePath = env.XCODEBUILDMCP_AXE_SOURCE_PATH ?? env.AXE_SOURCE_PATH; + if (axeSourcePath) config.axeSourcePath = axeSourcePath; + const iosTemplatePath = env.XCODEBUILDMCP_IOS_TEMPLATE_PATH; if (iosTemplatePath) config.iosTemplatePath = iosTemplatePath; @@ -557,6 +562,12 @@ function resolveConfig(opts: { fileConfig: opts.fileConfig, envConfig, }), + axeSourcePath: resolveFromLayers({ + key: 'axeSourcePath', + overrides: opts.overrides, + fileConfig: opts.fileConfig, + envConfig, + }), iosTemplatePath: resolveFromLayers({ key: 'iosTemplatePath', overrides: opts.overrides, diff --git a/src/utils/project-config.ts b/src/utils/project-config.ts index ce291bb9d..155397161 100644 --- a/src/utils/project-config.ts +++ b/src/utils/project-config.ts @@ -206,7 +206,7 @@ function normalizeCustomWorkflows(value: unknown): Record { function resolveRelativeTopLevelPaths(config: ProjectConfig, cwd: string): ProjectConfig { const resolved: ProjectConfig = { ...config }; - const pathKeys = ['axePath', 'iosTemplatePath', 'macosTemplatePath'] as const; + const pathKeys = ['axePath', 'axeSourcePath', 'iosTemplatePath', 'macosTemplatePath'] as const; for (const key of pathKeys) { const value = resolved[key]; diff --git a/src/utils/renderers/__tests__/cli-text-renderer.test.ts b/src/utils/renderers/__tests__/cli-text-renderer.test.ts index 78da2b98f..cc8789a4b 100644 --- a/src/utils/renderers/__tests__/cli-text-renderer.test.ts +++ b/src/utils/renderers/__tests__/cli-text-renderer.test.ts @@ -571,6 +571,552 @@ describe('cli-text-renderer', () => { expect(output).toContain('└ App Path: /tmp/MyApp.app'); }); + it('renders runtime UI snapshots as compact target lists', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + identifier: 'add-button', + value: 'selected', + frame: { x: 10, y: 20, width: 30, height: 40 }, + state: { enabled: true, visible: true }, + actions: ['tap', 'longPress'], + }, + { + ref: 'e2', + role: 'text', + label: 'Total', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: [], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Add' }], + }, + }, + }, + }); + + expect(output).toContain('📷 Snapshot UI'); + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Add|selected|add-button'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 0 scroll areas.', + ); + expect(output).not.toContain('- Use scroll refs with swipe.'); + expect(output).not.toContain('Accessibility Hierarchy'); + expect(output).not.toContain('```json'); + }); + + it('renders suppressed runtime evidence without callable refs', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { runtimeSnapshot: { suppressedTargetRefs: ['e2'] } }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + frame: { x: 10, y: 20, width: 60, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e2', + role: 'button', + label: 'London, England', + value: 'not saved', + frame: { x: 20, y: 80, width: 200, height: 72 }, + state: { visible: true }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Add' }, + { action: 'tap', elementRef: 'e2', label: 'London, England' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Add||'); + expect(output).toContain('Evidence (1) — role|label|value|id'); + expect(output).toContain('button|London, England|not saved|'); + expect(output).not.toContain('e2|tap|button|London, England|not saved|'); + }); + + it('renders unchanged runtime UI snapshots compactly', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 2, + }, + }, + }, + }); + + expect(output).toContain('📷 Snapshot UI'); + expect(output).toContain('Runtime UI snapshot unchanged (screenHash: screen-hash, seq: 2).'); + expect(output).not.toContain('Targets ('); + expect(output).not.toContain('Tips'); + }); + + it('orders useful runtime targets before chrome controls in compact output', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e2', + role: 'button', + label: 'Sheet Grabber', + value: 'Expanded', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + frame: { x: 320, y: 40, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e8', + role: 'text-field', + value: 'Portland', + frame: { x: 20, y: 100, width: 200, height: 40 }, + actions: ['typeText'], + }, + { + ref: 'e9', + role: 'button', + label: 'Clear search', + frame: { x: 230, y: 100, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: 'Remove', + identifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e82', + role: 'button', + label: 'PRECIP., 78%, Next 24 hours', + identifier: 'weather.precipitationCard', + frame: { x: 20, y: 300, width: 340, height: 140 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e2', label: 'Sheet Grabber' }, + { action: 'tap', elementRef: 'e3', label: 'Settings' }, + { action: 'typeText', elementRef: 'e8' }, + { action: 'tap', elementRef: 'e9', label: 'Clear search' }, + { action: 'tap', elementRef: 'e10', label: 'Remove' }, + { action: 'tap', elementRef: 'e82', label: 'PRECIP., 78%, Next 24 hours' }, + ], + }, + }, + }, + }); + + const precipitationIndex = output.indexOf( + 'e82|tap|button|PRECIP., 78%, Next 24 hours||weather.precipitationCard', + ); + const searchIndex = output.indexOf('e8|typeText|text-field||Portland|'); + const settingsIndex = output.indexOf('e3|tap|button|Settings||'); + const clearSearchIndex = output.indexOf('e9|tap|button|Clear search||'); + const removeIndex = output.indexOf('e10|tap|button|Remove||trash'); + + expect(precipitationIndex).toBeGreaterThanOrEqual(0); + expect(searchIndex).toBeGreaterThan(precipitationIndex); + expect(settingsIndex).toBeGreaterThan(searchIndex); + expect(output).not.toContain('e2|tap|button|Sheet Grabber|Expanded|'); + expect(clearSearchIndex).toBeGreaterThan(settingsIndex); + expect(removeIndex).toBeGreaterThan(settingsIndex); + }); + + it('orders unselected segmented controls before already-selected controls in compact output', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e9', + role: 'button', + label: '°F', + value: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: '°C', + value: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e9', label: '°F' }, + { action: 'tap', elementRef: 'e10', label: '°C' }, + ], + }, + }, + }, + }); + + const selectedIndex = output.indexOf('e9|tap|button|°F|selected|'); + const unselectedIndex = output.indexOf('e10|tap|button|°C|not selected|'); + + expect(unselectedIndex).toBeGreaterThanOrEqual(0); + expect(selectedIndex).toBeGreaterThan(unselectedIndex); + }); + + it('does not list static text as a likely runtime target when only low-level actions are present', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Settings', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap', 'longPress', 'touch'], + }, + { + ref: 'e2', + role: 'text', + label: 'Updated just now', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Settings' }, + { action: 'longPress', elementRef: 'e2', label: 'Updated just now' }, + { action: 'touch', elementRef: 'e2', label: 'Updated just now' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Settings||'); + expect(output).not.toContain('e2|'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 0 scroll areas.', + ); + }); + + it('renders runtime UI snapshot scroll areas separately from likely targets', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'Settings', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'Settings' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e2|tap|button|Settings||'); + expect(output).toContain('Scroll (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|swipe|application|Weather||'); + expect(output).toContain('- Use scroll refs with swipe.'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 1 scroll area.', + ); + }); + + it('renders wait_for_ui output with wait-specific text', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { headerTitle: 'Wait for UI' }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'exists', + matches: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Continue' }], + }, + }, + }, + }); + + expect(output).toContain('âš™ī¸ Wait for UI'); + expect(output).toContain('Matched exists (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Continue||'); + expect(output).toContain( + 'Wait completed; runtime UI snapshot refreshed with 1 element, 1 likely target, and 0 scroll areas.', + ); + }); + + it('renders static wait matches with no primary action', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { headerTitle: 'Wait for UI' }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'textContains', + matches: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'longPress', elementRef: 'e11', label: 'No matches' }, + { action: 'touch', elementRef: 'e11', label: 'No matches' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Matched textContains (1) — ref|action|role|label|value|id'); + expect(output).toContain('e11|none|text|No matches||'); + expect(output).not.toContain('e11|longPress|text|No matches||'); + }); + + it('renders typed UI action recovery hints', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + result: { + kind: 'ui-action-result', + didError: true, + error: 'Element reference e9 was not found in the current runtime snapshot.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + action: { type: 'tap', elementRef: 'e9' }, + uiError: { + code: 'ELEMENT_REF_NOT_FOUND', + message: 'Element reference e9 was not found in the current runtime snapshot.', + recoveryHint: 'Run snapshot_ui again and retry with a current element reference.', + elementRef: 'e9', + candidates: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + }, + }, + }, + }); + + expect(output).toContain('Recovery'); + expect(output).toContain('Code: ELEMENT_REF_NOT_FOUND'); + expect(output).toContain('Element: e9'); + expect(output).toContain( + 'Hint: Run snapshot_ui again and retry with a current element reference.', + ); + expect(output).toContain('Candidates (1):'); + expect(output).toContain('e1|tap|button|Add||'); + expect(output).toContain( + '❌ Element reference e9 was not found in the current runtime snapshot.', + ); + }); + it('renders structured output path artifacts as a tree when requested', () => { const output = renderCliTextTranscript({ filePathRenderStyle: 'tree', diff --git a/src/utils/renderers/cli-text-renderer.ts b/src/utils/renderers/cli-text-renderer.ts index 2afa21fb1..5001f0801 100644 --- a/src/utils/renderers/cli-text-renderer.ts +++ b/src/utils/renderers/cli-text-renderer.ts @@ -73,6 +73,7 @@ interface CliTextProcessorOptions { showTestTiming: boolean; filePathRenderStyle: FilePathRenderStyle; includeHeaderDetails: boolean; + includeNextSteps: boolean; } interface CliTextRendererOptions { @@ -81,6 +82,7 @@ interface CliTextRendererOptions { showTestTiming?: boolean; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } export interface CliTextTranscriptInput { @@ -92,6 +94,7 @@ export interface CliTextTranscriptInput { showTestTiming?: boolean; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } interface XcodebuildParserState { @@ -110,6 +113,7 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps, } = options; const groupedCompilerErrors: CompilerErrorRenderItem[] = []; const groupedWarnings: CompilerWarningRenderItem[] = []; @@ -470,7 +474,9 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen groupedCompilerErrors.length = 0; groupedTestFailures.length = 0; groupedWarnings.length = 0; - const nextStepsBlock = createNextStepsBlock(nextSteps, nextStepsRuntime); + const nextStepsBlock = includeNextSteps + ? createNextStepsBlock(nextSteps, nextStepsRuntime) + : null; if (nextStepsBlock && !sawProgressNextSteps) { processItem(nextStepsBlock); } @@ -506,6 +512,7 @@ export function createCliTextRenderer(options: CliTextRendererOptions): Transcri showTestTiming: options.showTestTiming ?? false, filePathRenderStyle: options.filePathRenderStyle ?? 'list', includeHeaderDetails: options.includeHeaderDetails ?? true, + includeNextSteps: options.includeNextSteps ?? true, sink: { clearTransient(): void { reporter.clear(); @@ -531,6 +538,7 @@ export function renderCliTextTranscript(input: CliTextTranscriptInput = {}): str showTestTiming: input.showTestTiming ?? false, filePathRenderStyle: input.filePathRenderStyle ?? 'list', includeHeaderDetails: input.includeHeaderDetails ?? true, + includeNextSteps: input.includeNextSteps ?? true, sink: { clearTransient(): void {}, updateTransient(): void {}, diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index 40a5f6186..34e3276ea 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -5,6 +5,13 @@ import type { TestDiagnostics, ToolDomainResult, } from '../../types/domain-results.ts'; +import type { + RuntimeElementV1, + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, + UiAutomationRecoverableError, + UiWaitMatch, +} from '../../types/ui-snapshot.ts'; import type { RenderHints } from '../../rendering/types.ts'; import type { XcodebuildOperation } from '../../types/domain-fragments.ts'; import type { @@ -160,6 +167,8 @@ type CaptureResultWithVideo = Extract= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + element.state?.selected === true || normalizedRuntimeSnapshotText(element.value) === 'selected' + ); +} + +function getRuntimeTargetDisplayPriority(element: RuntimeElementV1): number { + if (isLowPriorityRuntimeTarget(element)) { + return 90; + } + if (isAlreadySelectedRuntimeTarget(element)) { + return 70; + } + if (isContentRichTapTarget(element)) { + return 0; + } + if (element.actions.includes('typeText')) { + return 10; + } + if (element.actions.includes('tap')) { + return 20; + } + return 50; +} + +function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const priorityDelta = + getRuntimeTargetDisplayPriority(left.element) - + getRuntimeTargetDisplayPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); +} + +function getPrimaryRuntimeElementAction(element: RuntimeElementV1, action?: string): string { + if (action) { + return action; + } + if (element.actions.includes('typeText')) { + return 'typeText'; + } + if (element.actions.includes('tap')) { + return 'tap'; + } + if (element.actions.includes('swipeWithin')) { + return 'swipe'; + } + return 'none'; +} + +function formatRuntimeElementLine(element: RuntimeElementV1, action?: string): string { + const primaryAction = getPrimaryRuntimeElementAction(element, action); + return [ + element.ref, + primaryAction, + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function formatSuppressedRuntimeEvidenceLine(element: RuntimeElementV1): string { + return [ + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function getSuppressedRuntimeTargetRefs(hints?: RenderHints): Set { + return new Set(hints?.runtimeSnapshot?.suppressedTargetRefs ?? []); +} + +function hasRuntimeTextEvidence(element: RuntimeElementV1): boolean { + return ( + compactRuntimeSnapshotText(element.label).length > 0 || + compactRuntimeSnapshotText(element.value).length > 0 + ); +} + +function isLikelyRuntimeTarget( + element: RuntimeElementV1, + suppressedTargetRefs: ReadonlySet = new Set(), +): boolean { + return ( + !suppressedTargetRefs.has(element.ref) && + !isHiddenRuntimeTarget(element) && + element.actions.some((action) => action === 'tap' || action === 'typeText') + ); +} + +function isSuppressedRuntimeTextEvidenceElement( + element: RuntimeElementV1, + suppressedTargetRefs: ReadonlySet, +): boolean { + return ( + suppressedTargetRefs.has(element.ref) && + element.state?.visible !== false && + !isHiddenRuntimeTarget(element) && + !isLowPriorityRuntimeTarget(element) && + hasRuntimeTextEvidence(element) + ); +} + +function isScrollableRuntimeArea(element: RuntimeElementV1): boolean { + return element.actions.includes('swipeWithin') && !isLikelyRuntimeTarget(element); +} + +function countLikelyRuntimeTargets( + snapshot: RuntimeSnapshotV1, + suppressedTargetRefs: ReadonlySet = new Set(), +): number { + return snapshot.elements.filter((element) => isLikelyRuntimeTarget(element, suppressedTargetRefs)) + .length; +} + +function countScrollableRuntimeAreas(snapshot: RuntimeSnapshotV1): number { + return snapshot.elements.filter(isScrollableRuntimeArea).length; +} + +function createRuntimeSnapshotTargetsSection( + snapshot: RuntimeSnapshotV1, + suppressedTargetRefs: ReadonlySet = new Set(), +): SectionTextBlock { + const likelyTargets = sortRuntimeTargetsForDisplay( + snapshot.elements.filter((element) => isLikelyRuntimeTarget(element, suppressedTargetRefs)), + ); + const lines = likelyTargets.map((element) => formatRuntimeElementLine(element)); + + return createSection( + `Targets (${likelyTargets.length}) — ref|action|role|label|value|id`, + lines.length > 0 ? lines : ['(no likely interaction targets found)'], + ); +} + +function createRuntimeSnapshotEvidenceSection( + snapshot: RuntimeSnapshotV1, + suppressedTargetRefs: ReadonlySet, +): SectionTextBlock | null { + const evidenceElements = snapshot.elements.filter((element) => + isSuppressedRuntimeTextEvidenceElement(element, suppressedTargetRefs), + ); + if (evidenceElements.length === 0) { + return null; + } + + return createSection( + `Evidence (${evidenceElements.length}) — role|label|value|id`, + evidenceElements.map((element) => formatSuppressedRuntimeEvidenceLine(element)), + ); +} + +function createRuntimeSnapshotScrollAreasSection( + snapshot: RuntimeSnapshotV1, +): SectionTextBlock | null { + const scrollAreas = snapshot.elements.filter(isScrollableRuntimeArea); + if (scrollAreas.length === 0) { + return null; + } + + return createSection( + `Scroll (${scrollAreas.length}) — ref|action|role|label|value|id`, + scrollAreas.map((element) => formatRuntimeElementLine(element, 'swipe')), + ); +} + +function createWaitMatchSection(waitMatch: UiWaitMatch): SectionTextBlock { + return createSection( + `Matched ${waitMatch.predicate} (${waitMatch.matches.length}) — ref|action|role|label|value|id`, + waitMatch.matches.length > 0 + ? waitMatch.matches.map((element) => formatRuntimeElementLine(element)) + : ['(no matching elements found)'], + ); +} + +function createUiErrorItems(uiError?: UiAutomationRecoverableError): TextRenderableItem[] { + if (!uiError) { + return []; + } + + const lines = [ + `Code: ${uiError.code}`, + `Message: ${uiError.message}`, + ...(uiError.elementRef ? [`Element: ${uiError.elementRef}`] : []), + ...(typeof uiError.timeoutMs === 'number' ? [`Timeout: ${uiError.timeoutMs}ms`] : []), + `Hint: ${uiError.recoveryHint}`, + ]; + + if (uiError.candidates && uiError.candidates.length > 0) { + lines.push( + `Candidates (${uiError.candidates.length}):`, + ...uiError.candidates.map((candidate) => ` ${formatRuntimeElementLine(candidate)}`), + ); + } + + return [createSection('Recovery', lines)]; +} + function createSimulatorActionItems( result: Extract, ): TextRenderableItem[] { @@ -1215,6 +1473,7 @@ function createSimulatorActionItems( function createCaptureResultItems( rawResult: Extract, + hints?: RenderHints, ): TextRenderableItem[] { const result = rawResult as CaptureResultWithVideo; @@ -1254,10 +1513,18 @@ function createCaptureResultItems( return items; } + const capture = result.capture; + const isRuntimeSnapshot = + capture !== undefined && 'type' in capture && capture.type === 'runtime-snapshot'; + const isRuntimeSnapshotUnchanged = + capture !== undefined && 'type' in capture && capture.type === 'runtime-snapshot-unchanged'; const isUiHierarchy = - (result.capture && 'type' in result.capture && result.capture.type === 'ui-hierarchy') || - result.error?.includes('accessibility hierarchy') === true; - const title = isUiHierarchy ? 'Snapshot UI' : 'Screenshot'; + (capture !== undefined && 'type' in capture && capture.type === 'ui-hierarchy') || + isRuntimeSnapshot || + isRuntimeSnapshotUnchanged || + result.error?.includes('accessibility hierarchy') === true || + result.error?.includes('runtime UI snapshot') === true; + const title = hints?.headerTitle ?? (isUiHierarchy ? 'Snapshot UI' : 'Screenshot'); const items: TextRenderableItem[] = [ createHeader(title, [ ...(result.artifacts.simulatorId @@ -1267,10 +1534,63 @@ function createCaptureResultItems( ]; if (result.didError) { + items.push(...createStandardDiagnosticSections(result.diagnostics)); + items.push(...createUiErrorItems(result.uiError)); + let fallbackError = 'Failed to capture screenshot.'; + if (isRuntimeSnapshot) { + fallbackError = 'Failed to get runtime UI snapshot.'; + } else if (isUiHierarchy) { + fallbackError = 'Failed to get accessibility hierarchy.'; + } + + items.push(createStatus('error', result.error ?? fallbackError)); + return items; + } + + if (isRuntimeSnapshotUnchanged) { + const unchangedCapture = result.capture as RuntimeSnapshotUnchangedV1; items.push( - ...createFailureStatusWithDiagnostics( - result, - isUiHierarchy ? 'Failed to get accessibility hierarchy.' : 'Failed to capture screenshot.', + ...createStandardDiagnosticSections(result.diagnostics), + createStatus( + 'success', + `Runtime UI snapshot unchanged (screenHash: ${unchangedCapture.screenHash}, seq: ${unchangedCapture.seq}).`, + ), + ); + return items; + } + + if (isRuntimeSnapshot) { + const snapshot = result.capture as RuntimeSnapshotV1; + const suppressedTargetRefs = getSuppressedRuntimeTargetRefs(hints); + const likelyTargetCount = countLikelyRuntimeTargets(snapshot, suppressedTargetRefs); + const scrollAreaCount = countScrollableRuntimeAreas(snapshot); + const evidenceSection = createRuntimeSnapshotEvidenceSection(snapshot, suppressedTargetRefs); + const scrollAreasSection = createRuntimeSnapshotScrollAreasSection(snapshot); + if (title === 'Wait for UI' && result.waitMatch) { + items.push(createWaitMatchSection(result.waitMatch)); + } + items.push(createRuntimeSnapshotTargetsSection(snapshot, suppressedTargetRefs)); + if (evidenceSection) { + items.push(evidenceSection); + } + if (scrollAreasSection) { + items.push(scrollAreasSection); + } + items.push( + createSection('Tips', [ + '- Use target refs with tap, type_text, long_press, and touch.', + ...(scrollAreaCount > 0 ? ['- Use scroll refs with swipe.'] : []), + '- Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output.', + '- Use wait_for_ui for text/assertions or changing UI.', + ]), + ); + items.push( + ...createStandardDiagnosticSections(result.diagnostics), + createStatus( + 'success', + title === 'Wait for UI' + ? `Wait completed; runtime UI snapshot refreshed with ${pluralize(snapshot.elements.length, 'element')}, ${pluralize(likelyTargetCount, 'likely target')}, and ${pluralize(scrollAreaCount, 'scroll area')}.` + : `Runtime UI snapshot captured with ${pluralize(snapshot.elements.length, 'element')}, ${pluralize(likelyTargetCount, 'likely target')}, and ${pluralize(scrollAreaCount, 'scroll area')}.`, ), ); return items; @@ -2057,7 +2377,7 @@ function createSpecialCaseItems( case 'simulator-action-result': return createSimulatorActionItems(result); case 'capture-result': - return createCaptureResultItems(result); + return createCaptureResultItems(result, hints); case 'process-list': return createProcessListItems(result); case 'coverage-result': @@ -2086,6 +2406,7 @@ function createSpecialCaseItems( const headerTitleMap: Record = { tap: 'Tap', swipe: 'Swipe', + drag: 'Drag', touch: 'Touch', 'long-press': 'Long Press', button: 'Button', @@ -2093,6 +2414,7 @@ function createSpecialCaseItems( 'type-text': 'Type Text', 'key-press': 'Key Press', 'key-sequence': 'Key Sequence', + batch: 'Batch UI Actions', }; const items: TextRenderableItem[] = [ createHeader(headerTitleMap[result.action.type], [ @@ -2100,40 +2422,41 @@ function createSpecialCaseItems( ]), ]; if (result.didError) { - items.push(...createFailureStatusWithDiagnostics(result, 'UI action failed.')); + items.push(...createStandardDiagnosticSections(result.diagnostics)); + items.push(...createUiErrorItems(result.uiError)); + items.push(createStatus('error', result.error ?? 'UI action failed.')); return items; } let successMessage = 'UI action completed successfully.'; switch (result.action.type) { case 'tap': - successMessage = - typeof result.action.x === 'number' && typeof result.action.y === 'number' - ? `Tap at (${result.action.x}, ${result.action.y}) simulated successfully.` - : result.action.id - ? `Tap on element id "${result.action.id}" simulated successfully.` - : result.action.label - ? `Tap on element label "${result.action.label}" simulated successfully.` - : successMessage; + successMessage = `Tap on elementRef ${result.action.elementRef} simulated successfully.`; break; case 'swipe': { const durationText = typeof result.action.durationSeconds === 'number' ? ` duration=${result.action.durationSeconds}s` : ''; - if (result.action.from && result.action.to) { - successMessage = - `Swipe from (${result.action.from.x}, ${result.action.from.y}) to (${result.action.to.x}, ${result.action.to.y})` + - `${durationText} simulated successfully.`; - } + successMessage = + `Swipe ${result.action.direction} within elementRef ${result.action.withinElementRef}` + + `${durationText} simulated successfully.`; + break; + } + case 'drag': { + const durationText = + typeof result.action.durationSeconds === 'number' + ? ` duration=${result.action.durationSeconds}s` + : ''; + successMessage = + `Drag ${result.action.direction} from elementRef ${result.action.elementRef}` + + `${durationText} simulated successfully.`; break; } case 'touch': - if (typeof result.action.x === 'number' && typeof result.action.y === 'number') { - successMessage = `Touch event (${result.action.event ?? 'touch'}) at (${result.action.x}, ${result.action.y}) executed successfully.`; - } + successMessage = `Touch event (${result.action.event ?? 'touch'}) on elementRef ${result.action.elementRef} executed successfully.`; break; case 'long-press': - successMessage = `Long press at (${result.action.x}, ${result.action.y}) for ${result.action.durationMs}ms simulated successfully.`; + successMessage = `Long press on elementRef ${result.action.elementRef} for ${result.action.durationMs}ms simulated successfully.`; break; case 'button': successMessage = `Hardware button '${result.action.button}' pressed successfully.`; @@ -2141,15 +2464,26 @@ function createSpecialCaseItems( case 'gesture': successMessage = `Gesture '${result.action.gesture}' executed successfully.`; break; - case 'type-text': - successMessage = 'Text typing simulated successfully.'; + case 'type-text': { + const targetText = result.action.elementRef + ? ` into elementRef ${result.action.elementRef}` + : ''; + const lengthText = + typeof result.action.textLength === 'number' + ? ` (${pluralize(result.action.textLength, 'character')})` + : ''; + successMessage = `Text typed${targetText}${lengthText} successfully.`; break; + } case 'key-press': successMessage = `Key press (code: ${result.action.keyCode}) simulated successfully.`; break; case 'key-sequence': successMessage = `Key sequence [${result.action.keyCodes.join(',')}] executed successfully.`; break; + case 'batch': + successMessage = `Batch UI automation completed successfully (${pluralize(result.action.stepCount, 'step')}).`; + break; } items.push( ...createStandardDiagnosticSections(result.diagnostics), diff --git a/src/utils/responses/__tests__/next-steps-renderer.test.ts b/src/utils/responses/__tests__/next-steps-renderer.test.ts index 4903fb0b0..35b2a255a 100644 --- a/src/utils/responses/__tests__/next-steps-renderer.test.ts +++ b/src/utils/responses/__tests__/next-steps-renderer.test.ts @@ -130,6 +130,27 @@ describe('next-steps-renderer', () => { ); }); + it('should format complex CLI params through json', () => { + const step: NextStep = { + tool: 'batch', + cliTool: 'batch', + workflow: 'ui-automation', + label: 'Batch same-screen taps', + params: { + simulatorId: 'ABC123', + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + }; + + const result = renderNextStep(step, 'cli'); + expect(result).toBe( + 'Batch same-screen taps: xcodebuildmcp ui-automation batch --json \'{"simulatorId":"ABC123","steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}\'', + ); + }); + it('should format step for MCP with no params', () => { const step: NextStep = { tool: 'open_sim', @@ -175,6 +196,25 @@ describe('next-steps-renderer', () => { expect(result).toBe('Do something: some_tool({ verbose: true })'); }); + it('should format complex MCP params as JSON instead of object string coercions', () => { + const step: NextStep = { + tool: 'batch', + label: 'Batch same-screen taps', + params: { + simulatorId: 'ABC123', + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + }; + + const result = renderNextStep(step, 'mcp'); + expect(result).toBe( + 'Batch same-screen taps: batch({ simulatorId: "ABC123", steps: [{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}] })', + ); + }); + it('should handle daemon runtime same as MCP', () => { const step: NextStep = { tool: 'open_sim', diff --git a/src/utils/responses/next-step-formatting.ts b/src/utils/responses/next-step-formatting.ts index a149e580c..1031a2a6b 100644 --- a/src/utils/responses/next-step-formatting.ts +++ b/src/utils/responses/next-step-formatting.ts @@ -1,5 +1,5 @@ import type { RuntimeKind } from '../../runtime/types.ts'; -import type { NextStep } from '../../types/common.ts'; +import type { NextStep, NextStepParamValue } from '../../types/common.ts'; import { toKebabCase } from '../../runtime/naming.ts'; import { shellEscapeArg } from '../shell-escape.ts'; @@ -23,6 +23,17 @@ function formatCliArg(value: string): string { : shellEscapeArg(value); } +function hasComplexCliParamValue(value: NextStepParamValue): boolean { + return typeof value === 'object' && value !== null; +} + +function formatCliParamValue(value: Exclude): string { + if (typeof value === 'string' || typeof value === 'number') { + return formatCliArg(String(value)); + } + return shellEscapeArg(JSON.stringify(value)); +} + function formatNextStepForCli(step: NextStep): string { const commandName = step.cliTool ?? (step.tool ? toKebabCase(step.tool) : undefined); if (!commandName) { @@ -35,22 +46,28 @@ function formatNextStepForCli(step: NextStep): string { } parts.push(commandName); - for (const [key, value] of Object.entries(step.params ?? {})) { + const params = step.params ?? {}; + if (Object.values(params).some(hasComplexCliParamValue)) { + parts.push('--json', formatCliParamValue(params)); + return parts.join(' '); + } + + for (const [key, value] of Object.entries(params)) { const flagName = toKebabCase(key); if (typeof value === 'boolean') { if (value) { parts.push(`--${flagName}`); } } else { - parts.push(`--${flagName}`, formatCliArg(String(value))); + parts.push(`--${flagName}`, formatCliParamValue(value)); } } return parts.join(' '); } -function formatMcpValue(value: string | number | boolean): string { - if (typeof value === 'string') { +function formatMcpValue(value: NextStepParamValue): string { + if (typeof value === 'string' || (typeof value === 'object' && value !== null)) { return JSON.stringify(value); } return String(value); diff --git a/src/utils/runtime-config-schema.ts b/src/utils/runtime-config-schema.ts index 7f4766bc7..9fb123c83 100644 --- a/src/utils/runtime-config-schema.ts +++ b/src/utils/runtime-config-schema.ts @@ -19,6 +19,7 @@ export const runtimeConfigFileSchema = z dapLogEvents: z.boolean().optional(), launchJsonWaitMs: z.number().int().nonnegative().optional(), axePath: z.string().optional(), + axeSourcePath: z.string().optional(), iosTemplatePath: z.string().optional(), iosTemplateVersion: z.string().optional(), macosTemplatePath: z.string().optional(), diff --git a/src/utils/sentry.ts b/src/utils/sentry.ts index c31af55b0..747b8e841 100644 --- a/src/utils/sentry.ts +++ b/src/utils/sentry.ts @@ -31,7 +31,7 @@ export interface SentryRuntimeContext { uiDebuggerGuardMode?: string; xcodeIdeWorkflowEnabled?: boolean; axeAvailable?: boolean; - axeSource?: 'env' | 'bundled' | 'path' | 'unavailable'; + axeSource?: 'env' | 'source' | 'bundled' | 'path' | 'unavailable'; axeVersion?: string; xcodeDeveloperDir?: string; xcodebuildPath?: string; diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts index 919832c42..ddfe003c8 100644 --- a/src/utils/structured-output-envelope.ts +++ b/src/utils/structured-output-envelope.ts @@ -2,6 +2,12 @@ import type { RuntimeKind } from '../runtime/types.ts'; import type { NextStep, OutputStyle } from '../types/common.ts'; import type { ToolDomainResult } from '../types/domain-results.ts'; import type { StructuredOutputEnvelope } from '../types/structured-output.ts'; +import type { + RuntimeActionNameV1, + RuntimeElementV1, + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, +} from '../types/ui-snapshot.ts'; import { serializeNextSteps } from './responses/next-step-formatting.ts'; type DomainResultData = Omit< @@ -9,13 +15,61 @@ type DomainResultData = Omit< 'kind' | 'didError' | 'error' >; +export type RuntimeSnapshotEnvelopeMode = 'compact' | 'full'; + export interface StructuredEnvelopeOptions { nextSteps?: readonly NextStep[]; nextStepRuntime?: RuntimeKind; outputStyle?: OutputStyle; + runtimeSnapshot?: RuntimeSnapshotEnvelopeMode; + runtimeSnapshotSuppressedTargetRefs?: readonly string[]; } +type RuntimeSnapshotCompactCapture = { + type: 'runtime-snapshot'; + rs: '1'; + screenHash: string; + seq: number; + count: number; + targets: string[]; + scroll: string[]; + text?: string[]; + evidence?: string[]; + udid: string; +}; + +type RuntimeSnapshotUnchangedCompactCapture = { + type: 'runtime-snapshot-unchanged'; + rs: '1'; + screenHash: string; + seq: number; + unchanged: true; + udid: string; +}; + const MINIMAL_DATA_PRUNE_KEYS = ['request'] as const; +const COMPACT_RUNTIME_TARGET_LIMIT = 64; +const COMPACT_RUNTIME_SCROLL_LIMIT = 32; +const COMPACT_RUNTIME_TEXT_LIMIT = 64; +const COMPACT_RUNTIME_EVIDENCE_LIMIT = 64; +const HIDDEN_RUNTIME_TARGET_LABELS = new Set(['sheet grabber']); +const LOW_PRIORITY_RUNTIME_TARGET_LABELS = new Set([ + 'sheet grabber', + 'close', + 'clear search', + 'remove', + 'delete', + 'clear', + 'c', + 'ac', + 'Âą', + '%', + 'Ãˇ', + '×', + '-', + '+', + '=', +]); function isRecord(value: unknown): value is Record { return typeof value === 'object' && value !== null && !Array.isArray(value); @@ -49,14 +103,331 @@ function applyStructuredOutputStyle( }; } +function compactRuntimeSnapshotText(value: string | undefined): string { + return (value ?? '').replace(/\s+/g, ' ').replace(/\|/g, '/').trim(); +} + +function normalizedRuntimeSnapshotText(value: string | undefined): string { + return compactRuntimeSnapshotText(value).toLocaleLowerCase(); +} + +function isHiddenRuntimeTarget(element: RuntimeElementV1): boolean { + return HIDDEN_RUNTIME_TARGET_LABELS.has(normalizedRuntimeSnapshotText(element.label)); +} + +function isLowPriorityRuntimeTarget(element: RuntimeElementV1): boolean { + return LOW_PRIORITY_RUNTIME_TARGET_LABELS.has(normalizedRuntimeSnapshotText(element.label)); +} + +function isContentRichTapTarget(element: RuntimeElementV1): boolean { + if (!element.actions.includes('tap')) { + return false; + } + + const label = compactRuntimeSnapshotText(element.label); + const identifier = compactRuntimeSnapshotText(element.identifier); + return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + element.state?.selected === true || normalizedRuntimeSnapshotText(element.value) === 'selected' + ); +} + +function getRuntimeTargetDisplayPriority(element: RuntimeElementV1): number { + if (isLowPriorityRuntimeTarget(element)) { + return 90; + } + if (isAlreadySelectedRuntimeTarget(element)) { + return 70; + } + if (isContentRichTapTarget(element)) { + return 0; + } + if (element.actions.includes('typeText')) { + return 10; + } + if (element.actions.includes('tap')) { + return 20; + } + return 50; +} + +function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const priorityDelta = + getRuntimeTargetDisplayPriority(left.element) - + getRuntimeTargetDisplayPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); +} + +function sortRuntimeTextForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const yDelta = left.element.frame.y - right.element.frame.y; + if (yDelta !== 0) { + return yDelta; + } + + const xDelta = left.element.frame.x - right.element.frame.x; + return xDelta === 0 ? left.index - right.index : xDelta; + }) + .map(({ element }) => element); +} + +function compactRuntimeElementRow(element: RuntimeElementV1, action: string): string { + return [ + element.ref, + action, + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function compactSuppressedRuntimeEvidenceRow(element: RuntimeElementV1): string { + return [ + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNameV1 | 'none' { + if (element.actions.includes('typeText')) { + return 'typeText'; + } + if (element.actions.includes('tap')) { + return 'tap'; + } + if (element.actions.includes('swipeWithin')) { + return 'swipeWithin'; + } + return 'none'; +} + +function hasRuntimeTextEvidence(element: RuntimeElementV1): boolean { + return ( + compactRuntimeSnapshotText(element.label).length > 0 || + compactRuntimeSnapshotText(element.value).length > 0 + ); +} + +function isRuntimeTextSummaryElement(element: RuntimeElementV1): boolean { + return ( + element.role === 'text' && element.state?.visible !== false && hasRuntimeTextEvidence(element) + ); +} + +function isSuppressedRuntimeTextEvidenceElement( + element: RuntimeElementV1, + suppressedTargetRefs: ReadonlySet, +): boolean { + return ( + suppressedTargetRefs.has(element.ref) && + element.state?.visible !== false && + !isHiddenRuntimeTarget(element) && + !isLowPriorityRuntimeTarget(element) && + hasRuntimeTextEvidence(element) + ); +} + +function uniqueRuntimeElements(elements: RuntimeElementV1[]): RuntimeElementV1[] { + const seenRefs = new Set(); + return elements.filter((element) => { + if (seenRefs.has(element.ref)) { + return false; + } + seenRefs.add(element.ref); + return true; + }); +} + +function toRuntimeSnapshotCompactCapture( + snapshot: RuntimeSnapshotV1, + options: { suppressedTargetRefs?: readonly string[] } = {}, +): RuntimeSnapshotCompactCapture { + const suppressedTargetRefs = new Set(options.suppressedTargetRefs ?? []); + const targets = sortRuntimeTargetsForDisplay( + snapshot.elements.filter( + (element) => + !suppressedTargetRefs.has(element.ref) && + !isHiddenRuntimeTarget(element) && + (element.actions.includes('tap') || element.actions.includes('typeText')), + ), + ) + .slice(0, COMPACT_RUNTIME_TARGET_LIMIT) + .map((element) => { + const action = element.actions.includes('typeText') ? 'typeText' : 'tap'; + return compactRuntimeElementRow(element, action); + }); + const scroll = snapshot.elements + .filter( + (element) => + element.actions.includes('swipeWithin') && + !element.actions.includes('tap') && + !element.actions.includes('typeText'), + ) + .slice(0, COMPACT_RUNTIME_SCROLL_LIMIT) + .map((element) => compactRuntimeElementRow(element, 'swipe')); + const suppressedTextEvidence = sortRuntimeTextForDisplay( + snapshot.elements.filter((element) => + isSuppressedRuntimeTextEvidenceElement(element, suppressedTargetRefs), + ), + ); + const ordinaryTextEvidence = sortRuntimeTextForDisplay( + snapshot.elements.filter( + (element) => !suppressedTargetRefs.has(element.ref) && isRuntimeTextSummaryElement(element), + ), + ); + const text = uniqueRuntimeElements(ordinaryTextEvidence) + .slice(0, COMPACT_RUNTIME_TEXT_LIMIT) + .map((element) => compactRuntimeElementRow(element, 'text')); + const evidence = uniqueRuntimeElements(suppressedTextEvidence) + .slice(0, COMPACT_RUNTIME_EVIDENCE_LIMIT) + .map(compactSuppressedRuntimeEvidenceRow); + + return { + type: 'runtime-snapshot', + rs: '1', + screenHash: snapshot.screenHash, + seq: snapshot.seq, + count: snapshot.elements.length, + targets, + scroll, + ...(text.length > 0 ? { text } : {}), + ...(evidence.length > 0 ? { evidence } : {}), + udid: snapshot.simulatorId, + }; +} + +function compactRuntimeElementCandidate(element: RuntimeElementV1): string { + return compactRuntimeElementRow(element, primaryRuntimeElementAction(element)); +} + +function isRuntimeElement(candidate: unknown): candidate is RuntimeElementV1 { + return ( + typeof candidate === 'object' && + candidate !== null && + 'ref' in candidate && + typeof candidate.ref === 'string' && + 'actions' in candidate && + Array.isArray(candidate.actions) + ); +} + +function isRuntimeSnapshotCapture(capture: unknown): capture is RuntimeSnapshotV1 { + return ( + typeof capture === 'object' && + capture !== null && + 'type' in capture && + capture.type === 'runtime-snapshot' && + 'elements' in capture && + Array.isArray(capture.elements) + ); +} + +function isRuntimeSnapshotUnchangedCapture( + capture: unknown, +): capture is RuntimeSnapshotUnchangedV1 { + return ( + typeof capture === 'object' && + capture !== null && + 'type' in capture && + capture.type === 'runtime-snapshot-unchanged' + ); +} + +function toRuntimeSnapshotUnchangedCompactCapture( + capture: RuntimeSnapshotUnchangedV1, +): RuntimeSnapshotUnchangedCompactCapture { + return { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: capture.screenHash, + seq: capture.seq, + unchanged: true, + udid: capture.simulatorId, + }; +} + +function projectRuntimeSnapshotData( + data: TData, + options: StructuredEnvelopeOptions, +): unknown { + if (options.runtimeSnapshot === 'full' || typeof data !== 'object' || data === null) { + return data; + } + + const dataWithCapture = data as TData & { capture?: unknown }; + let projectedData: typeof dataWithCapture = dataWithCapture; + + if (isRuntimeSnapshotCapture(dataWithCapture.capture)) { + projectedData = { + ...dataWithCapture, + capture: toRuntimeSnapshotCompactCapture( + dataWithCapture.capture, + options.runtimeSnapshotSuppressedTargetRefs + ? { suppressedTargetRefs: options.runtimeSnapshotSuppressedTargetRefs } + : {}, + ), + }; + } else if (isRuntimeSnapshotUnchangedCapture(dataWithCapture.capture)) { + projectedData = { + ...dataWithCapture, + capture: toRuntimeSnapshotUnchangedCompactCapture(dataWithCapture.capture), + }; + } + + const dataWithRuntimeRows = projectedData as typeof projectedData & { + uiError?: { candidates?: unknown[] }; + waitMatch?: { matches?: unknown[] }; + }; + const uiError = Array.isArray(dataWithRuntimeRows.uiError?.candidates) + ? { + ...dataWithRuntimeRows.uiError, + candidates: dataWithRuntimeRows.uiError.candidates.map((candidate) => + isRuntimeElement(candidate) ? compactRuntimeElementCandidate(candidate) : candidate, + ), + } + : dataWithRuntimeRows.uiError; + const waitMatch = Array.isArray(dataWithRuntimeRows.waitMatch?.matches) + ? { + ...dataWithRuntimeRows.waitMatch, + matches: dataWithRuntimeRows.waitMatch.matches.map((match) => + isRuntimeElement(match) ? compactRuntimeElementCandidate(match) : match, + ), + } + : dataWithRuntimeRows.waitMatch; + + if (uiError === dataWithRuntimeRows.uiError && waitMatch === dataWithRuntimeRows.waitMatch) { + return projectedData; + } + + return { + ...projectedData, + ...(uiError ? { uiError } : {}), + ...(waitMatch ? { waitMatch } : {}), + }; +} + export function toStructuredEnvelope( result: TResult, schema: string, schemaVersion: string, options: StructuredEnvelopeOptions = {}, -): StructuredOutputEnvelope> { +): StructuredOutputEnvelope { const { nextSteps, nextStepRuntime = 'cli', outputStyle = 'normal' } = options; const { kind: _kind, didError, error, ...data } = result; + const projectedData = projectRuntimeSnapshotData(data as DomainResultData, options); const serializedNextSteps = schema === 'xcodebuildmcp.output.error' ? undefined @@ -64,12 +435,12 @@ export function toStructuredEnvelope( runtime: nextStepRuntime, }); - const envelope: StructuredOutputEnvelope> = { + const envelope: StructuredOutputEnvelope = { schema, schemaVersion, didError, error, - data: Object.keys(data).length === 0 ? null : (data as DomainResultData), + data: isRecord(projectedData) && Object.keys(projectedData).length === 0 ? null : projectedData, ...(serializedNextSteps ? { nextSteps: serializedNextSteps } : {}), }; diff --git a/src/utils/tool-registry.ts b/src/utils/tool-registry.ts index afd5910fb..56cca768a 100644 --- a/src/utils/tool-registry.ts +++ b/src/utils/tool-registry.ts @@ -32,6 +32,8 @@ function buildStructuredContent( return undefined; } + const suppressedTargetRefs = structuredOutput.renderHints?.runtimeSnapshot?.suppressedTargetRefs; + return toStructuredEnvelope( structuredOutput.result, structuredOutput.schema, @@ -40,6 +42,9 @@ function buildStructuredContent( nextSteps: session.getNextSteps?.(), nextStepRuntime: 'mcp', outputStyle: 'minimal', + ...(suppressedTargetRefs + ? { runtimeSnapshotSuppressedTargetRefs: suppressedTargetRefs } + : {}), }, ); } diff --git a/src/utils/typed-tool-factory.ts b/src/utils/typed-tool-factory.ts index fe96ae9da..f409cd058 100644 --- a/src/utils/typed-tool-factory.ts +++ b/src/utils/typed-tool-factory.ts @@ -101,6 +101,37 @@ function missingFromMerged( return keys.filter((k) => merged[k] == null); } +function getObjectSchemaKeys(schema: z.ZodType): Set | null { + if (typeof schema !== 'object' || schema === null || !('shape' in schema)) { + return null; + } + + const shape = (schema as { shape?: unknown }).shape; + if (typeof shape !== 'object' || shape === null) { + return null; + } + + return new Set(Object.keys(shape)); +} + +function filterSessionDefaultsForSchema( + defaults: SessionDefaults, + schema: z.ZodType, +): Record { + const schemaKeys = getObjectSchemaKeys(schema); + if (!schemaKeys) { + return defaults; + } + + const filteredDefaults: Record = {}; + for (const [key, value] of Object.entries(defaults)) { + if (schemaKeys.has(key)) { + filteredDefaults[key] = value; + } + } + return filteredDefaults; +} + function formatRequirementError(opts: { message: string; setHint?: string; @@ -201,7 +232,7 @@ function createSessionAwareHandler(opts: { } } - const sessionDefaults = sessionStore.getAll(); + const sessionDefaults = filterSessionDefaultsForSchema(sessionStore.getAll(), internalSchema); const merged = mergeSessionDefaultArgs({ defaults: sessionDefaults, explicitArgs: sanitizedArgs,