From 6e5c24e94d0a2b76dd6f4fe22d45e30283ac8962 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Wed, 13 May 2026 14:17:41 +0100 Subject: [PATCH 01/35] feat(ui-automation): Add rs/1 runtime automation parity Add batch execution, wait predicates, runtime snapshot refs, and screen-hash unchanged responses so agents can drive AXe with fewer process launches and less repeated snapshot output. Tighten action validation, stale-snapshot recovery, compact rendering, and fixture coverage so UI automation flows are easier for agents to use reliably. Co-Authored-By: OpenAI Codex --- CHANGELOG.md | 47 + example_projects/.xcodebuildmcp/config.yaml | 2 - .../Weather/.xcodebuildmcp/config.yaml | 2 +- example_projects/Weather/README.md | 4 +- .../Services/MockWeatherAPIClient.swift | 6 +- .../Views/Overlays/LocationPickerView.swift | 9 +- .../Weather/Views/Overlays/LocationRows.swift | 1 + .../Views/Overlays/SettingsSheetView.swift | 1 + .../Weather/WeatherTests/WeatherTests.swift | 3 + .../iOS/.xcodebuildmcp/config.yaml | 2 +- .../iOS_Calculator/.xcodebuildmcp/config.yaml | 3 +- manifests/tools/batch.yaml | 16 + manifests/tools/key_press.yaml | 2 +- manifests/tools/key_sequence.yaml | 2 +- manifests/tools/long_press.yaml | 4 +- manifests/tools/snapshot_ui.yaml | 22 +- manifests/tools/swipe.yaml | 4 +- manifests/tools/tap.yaml | 4 +- manifests/tools/touch.yaml | 4 +- manifests/tools/type_text.yaml | 4 +- manifests/tools/wait_for_ui.yaml | 28 + manifests/workflows/ui-automation.yaml | 4 +- .../2.schema.json | 383 +++---- .../2.schema.json | 293 +++--- scripts/bundle-axe.sh | 41 +- .../__tests__/register-tool-commands.test.ts | 546 ++++++++++ src/cli/__tests__/schema-to-yargs.test.ts | 14 + src/cli/register-tool-commands.ts | 47 +- src/cli/schema-to-yargs.ts | 20 +- .../structured-output-schema.test.ts | 119 +++ .../__tests__/_keyboard_shortcut.test.ts | 2 + .../_keyboard_shortcut.ts | 2 +- .../simulator/__tests__/boot_sim.test.ts | 43 + .../__tests__/install_app_sim.test.ts | 53 + .../__tests__/launch_app_sim.test.ts | 49 +- .../simulator/__tests__/screenshot.test.ts | 109 +- .../simulator/__tests__/stop_app_sim.test.ts | 33 + src/mcp/tools/simulator/boot_sim.ts | 45 +- src/mcp/tools/simulator/install_app_sim.ts | 29 +- src/mcp/tools/simulator/launch_app_sim.ts | 31 +- src/mcp/tools/simulator/stop_app_sim.ts | 31 +- .../ui-automation/__tests__/batch.test.ts | 211 ++++ .../ui-automation/__tests__/button.test.ts | 70 +- .../ui-automation/__tests__/gesture.test.ts | 4 + .../ui-automation/__tests__/key_press.test.ts | 2 + .../__tests__/key_sequence.test.ts | 4 + .../__tests__/long_press.test.ts | 500 +++------ .../__tests__/non_streaming_progress.test.ts | 107 +- .../__tests__/runtime-snapshot.test.ts | 562 +++++++++++ .../__tests__/screenshot.test.ts | 168 +++- .../__tests__/snapshot-ui-state.test.ts | 142 +++ .../__tests__/snapshot_ui.test.ts | 633 +++++++++++- .../ui-automation/__tests__/swipe.test.ts | 626 ++++-------- .../tools/ui-automation/__tests__/tap.test.ts | 947 ++++++------------ .../ui-automation/__tests__/touch.test.ts | 709 +++---------- .../ui-automation/__tests__/type_text.test.ts | 757 +++++++------- .../__tests__/ui-action-test-helpers.ts | 88 ++ .../__tests__/wait_for_ui.test.ts | 708 +++++++++++++ src/mcp/tools/ui-automation/batch.ts | 142 +++ src/mcp/tools/ui-automation/button.ts | 23 +- src/mcp/tools/ui-automation/gesture.ts | 16 +- src/mcp/tools/ui-automation/key_press.ts | 11 +- src/mcp/tools/ui-automation/key_sequence.ts | 15 +- src/mcp/tools/ui-automation/long_press.ts | 50 +- src/mcp/tools/ui-automation/screenshot.ts | 61 +- .../ui-automation/shared/domain-result.ts | 43 +- .../ui-automation/shared/runtime-snapshot.ts | 701 +++++++++++++ .../ui-automation/shared/semantic-tap.ts | 138 +++ .../ui-automation/shared/snapshot-ui-state.ts | 145 ++- .../ui-automation/shared/wait-predicate.ts | 361 +++++++ src/mcp/tools/ui-automation/snapshot_ui.ts | 194 +++- src/mcp/tools/ui-automation/swipe.ts | 83 +- src/mcp/tools/ui-automation/tap.ts | 185 ++-- src/mcp/tools/ui-automation/touch.ts | 78 +- src/mcp/tools/ui-automation/type_text.ts | 109 +- src/mcp/tools/ui-automation/wait_for_ui.ts | 365 +++++++ src/rendering/render.ts | 7 +- src/runtime/__tests__/tool-invoker.test.ts | 44 + src/runtime/tool-invoker.ts | 24 +- .../long-press--error-no-simulator.txt | 8 +- .../ui-automation/long-press--success.txt | 6 +- .../ui-automation/snapshot-ui--success.txt | 606 +---------- .../swipe--error-no-simulator.txt | 8 +- .../swipe--error-not-actionable.txt | 11 + .../cli/text/ui-automation/swipe--success.txt | 10 - .../ui-automation/tap--error-no-simulator.txt | 8 +- .../cli/text/ui-automation/tap--success.txt | 6 +- .../touch--error-no-simulator.txt | 8 +- .../cli/text/ui-automation/touch--success.txt | 6 +- .../type-text--error-no-simulator.txt | 8 +- .../type-text--error-not-actionable.txt | 11 + .../text/ui-automation/type-text--success.txt | 6 - .../ui-automation/wait-for-ui--success.txt | 36 + .../long-press--error-no-simulator.json | 16 +- .../ui-automation/long-press--success.json | 11 +- .../ui-automation/snapshot-ui--success.json | 417 +------- .../swipe--error-no-simulator.json | 17 +- .../swipe--error-not-actionable.json | 27 + .../json/ui-automation/swipe--success.json | 33 - .../tap--error-no-simulator.json | 16 +- .../mcp/json/ui-automation/tap--success.json | 11 +- .../touch--error-no-simulator.json | 17 +- .../json/ui-automation/touch--success.json | 13 +- .../type-text--error-no-simulator.json | 17 +- .../type-text--error-not-actionable.json | 27 + .../ui-automation/type-text--success.json | 17 - .../ui-automation/wait-for-ui--success.json | 44 + .../long-press--error-no-simulator.txt | 8 +- .../ui-automation/long-press--success.txt | 6 +- .../ui-automation/snapshot-ui--success.txt | 604 +---------- .../swipe--error-no-simulator.txt | 8 +- .../swipe--error-not-actionable.txt | 9 + .../mcp/text/ui-automation/swipe--success.txt | 8 - .../ui-automation/tap--error-no-simulator.txt | 8 +- .../mcp/text/ui-automation/tap--success.txt | 6 +- .../touch--error-no-simulator.txt | 8 +- .../mcp/text/ui-automation/touch--success.txt | 6 +- .../type-text--error-no-simulator.txt | 8 +- .../type-text--error-not-actionable.txt | 9 + .../text/ui-automation/type-text--success.txt | 4 - .../ui-automation/wait-for-ui--success.txt | 34 + .../__tests__/json-normalize.test.ts | 160 +-- src/snapshot-tests/json-normalize.ts | 18 +- .../suites/ui-automation-suite.ts | 98 +- src/types/domain-results.ts | 37 +- src/types/ui-snapshot.ts | 148 +++ .../structured-output-envelope.test.ts | 526 +++++----- .../__tests__/cli-text-renderer.test.ts | 492 +++++++++ src/utils/renderers/cli-text-renderer.ts | 10 +- src/utils/renderers/domain-result-text.ts | 305 +++++- src/utils/structured-output-envelope.ts | 270 ++++- 131 files changed, 9893 insertions(+), 5415 deletions(-) create mode 100644 manifests/tools/batch.yaml create mode 100644 manifests/tools/wait_for_ui.yaml create mode 100644 src/mcp/tools/ui-automation/__tests__/batch.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts create mode 100644 src/mcp/tools/ui-automation/batch.ts create mode 100644 src/mcp/tools/ui-automation/shared/runtime-snapshot.ts create mode 100644 src/mcp/tools/ui-automation/shared/semantic-tap.ts create mode 100644 src/mcp/tools/ui-automation/shared/wait-predicate.ts create mode 100644 src/mcp/tools/ui-automation/wait_for_ui.ts create mode 100644 src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt create mode 100644 src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--success.txt create mode 100644 src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt create mode 100644 src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-not-actionable.json delete mode 100644 src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json create mode 100644 src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-not-actionable.json delete mode 100644 src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--success.json create mode 100644 src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json create mode 100644 src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt create mode 100644 src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--success.txt create mode 100644 src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt create mode 100644 src/types/ui-snapshot.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index f924cc6f7..66091b316 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,53 @@ ### Added - Added `nextSteps` hint lines to MCP `structuredContent` and CLI `--output json` envelopes so agents can consume follow-up actions without scraping text. CLI JSON renders shell command lines; MCP structured content renders MCP tool-call hints. Structured result schemas that include `nextSteps` now use schema version 2; existing version 1 schema files remain available for current validators. +- Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged. +- Added `batch` for executing multiple AXe UI automation steps in one simulator session. +- Added `wait_for_ui` for polling rs/1 runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. + +### Fixed + +- Fixed compact runtime snapshots so top-level app and window refs are not advertised as swipe targets just because a generic descendant overflows their frame. +- Fixed `wait_for_ui` focus waits so elements that do not expose focus state return a typed recoverable error instead of timing out. +- Fixed invalid `touch` calls so structured output no longer reports a fake touch event when neither `down` nor `up` was requested. +- Fixed compact runtime snapshots so standalone `other` elements, such as keyboard suggestions, are not advertised as swipe targets unless they behave like scrollable containers. +- Fixed runtime snapshots so off-screen elements, and clipped elements whose activation point is offscreen, are not advertised as actionable targets. +- Fixed full-screen swipe gestures so app-level scroll refs avoid unsafe screen edges such as the status bar and notch area. +- Clarified runtime snapshot tips so agents know element refs are snapshot-specific and must come from the latest `snapshot_ui` or `wait_for_ui` output, and only show swipe guidance when the snapshot includes a scroll ref. +- Made `wait_for_ui` `textContains` matching case-insensitive so assertions survive platform text normalization such as keyboard auto-capitalization, treat duplicate exact text matches as successful presence assertions, narrow broad selectors by text before reporting ambiguity, reject `text` on non-`textContains` predicates instead of silently ignoring it, and keep recoverable-error candidates compact in structured output. +- Fixed `tap` on SwiftUI switch element refs by using a touch down/up activation instead of AXe's coordinate tap path. +- Fixed selector fallback for AXe duplicate-match diagnostics that include parenthesized match counts. +- Fixed semantic taps and text-field focusing so element refs with duplicate AXe selectors use their resolved snapshot coordinates immediately. +- Fixed bottom-clipped UI automation targets so taps, touches, and long presses use a visible activation point instead of the hidden center of the accessibility frame. +- Fixed app-level horizontal swipes so full-screen refs use a content-area y-coordinate instead of missing horizontal carousels by swiping near the hero area. +- Fixed CLI commands with `simulatorId`-only contracts so `simulatorName` session defaults are resolved to a simulator ID without adding conflicting simulator arguments to tools that already accept `simulatorName`, and fixed simulator lifecycle tools so name-only defaults resolve before simctl operations. +- Fixed `snapshot_ui` and `wait_for_ui` next steps so they use the resolved simulator ID instead of leaking `SIMULATOR_UUID` placeholders. +- Fixed the Weather example app so saved-location rows are not reused as search-result rows after editing locations. +- Fixed the Weather example app's current-location button so it selects the current saved location instead of appearing as a no-op UI automation target. +- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it. +- Fixed `type_text` so AXe-unsupported international/accented characters fail before focusing the field, with a clear recoverable error instead of a generic typing failure. +- Fixed `snapshot_ui` next-step guidance so the suggested tap ref prefers useful tappable controls over text fields, sheet grabbers, close buttons, and clear-search buttons. +- Fixed compact runtime snapshot JSON so target ordering matches compact text output and prioritizes useful content targets before low-value sheet chrome. +- Fixed `wait_for_ui` success output so compact text and JSON include the matched elements that satisfied the wait predicate. +- Fixed `wait_for_ui textContains` so duplicate elements with the same matching visible text satisfy presence-style assertions instead of reporting ambiguity. +- Fixed CLI `--style minimal` so final text output suppresses generated next steps for daemon-routed tools as intended. +- Fixed `snapshot_ui` next-step guidance so snapshots with no tappable targets no longer suggest tapping the first non-actionable element. +- Fixed next-step rendering for tools shared across workflows so follow-up commands prefer the workflow that produced the result instead of drifting to another workflow alias. +- Fixed `snapshot_ui` next-step guidance so calculator-style utility and operator buttons no longer outrank more useful digit/content controls. +- Fixed `snapshot_ui` compact text, JSON, and next-step guidance so already-selected segmented controls no longer outrank unselected choices. +- Fixed compact runtime snapshots and next-step guidance so sheet grabbers remain visible as low-priority targets, allowing agents to expand or dismiss sheets without outranking useful content controls. +- Fixed compact wait-match rows so static assertion matches render with `none` instead of exposing low-level long-press/touch actions as if they were primary agent actions. +- Fixed compact runtime snapshot ordering and next-step guidance so destructive controls such as Remove/Delete are demoted behind safer content and navigation targets. +- Clarified simulator keyboard shortcut failures when Simulator.app is running without a visible device window. +- Fixed hardware button automation so successful button presses wait briefly for system UI transitions before returning, reducing stale immediate follow-up snapshots. +- Fixed runtime snapshots so modal sheet hosts remain swipeable after the currently visible sheet content fits inside the viewport. +- Fixed `wait_for_ui` validation so unknown JSON fields are rejected instead of silently broadening waits. +- Fixed CLI numeric array flags so comma-separated values such as `--key-codes 23,18,14` are parsed as numbers instead of failing validation. +- Fixed runtime snapshots so unlabeled internal custom-action nodes, such as SpringBoard icon subviews, are no longer advertised as likely tap targets. +- Fixed AXe bundling so downloaded artifacts must report the pinned AXe version, and dirty local AXe builds require an explicit opt-in. +- Fixed runtime snapshot tips so compact output names all target-ref action tools, including `long_press` and `touch`. +- Clarified key press and key sequence tool descriptions so agents know key codes are AXe/macOS virtual key codes and should prefer `type_text` for text entry. +- Clarified `wait_for_ui` timeout recovery hints so agents know selector fields match exact values and should use `textContains` for partial visible text. ## [2.5.2] diff --git a/example_projects/.xcodebuildmcp/config.yaml b/example_projects/.xcodebuildmcp/config.yaml index 57308faa0..7b569a1d5 100644 --- a/example_projects/.xcodebuildmcp/config.yaml +++ b/example_projects/.xcodebuildmcp/config.yaml @@ -4,13 +4,11 @@ sessionDefaultsProfiles: workspacePath: ./iOS_Calculator/CalculatorApp.xcworkspace scheme: CalculatorApp simulatorName: iPhone 17 Pro - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 simulatorPlatform: iOS Simulator ios-test: projectPath: ./iOS/MCPTest.xcodeproj scheme: MCPTest simulatorName: iPhone 17 Pro - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 simulatorPlatform: iOS Simulator macos-test: projectPath: ./macOS/MCPTest.xcodeproj diff --git a/example_projects/Weather/.xcodebuildmcp/config.yaml b/example_projects/Weather/.xcodebuildmcp/config.yaml index 6663899fa..899ef7cf5 100644 --- a/example_projects/Weather/.xcodebuildmcp/config.yaml +++ b/example_projects/Weather/.xcodebuildmcp/config.yaml @@ -7,7 +7,7 @@ sentryDisabled: false sessionDefaults: projectPath: Weather.xcodeproj scheme: Weather - simulatorName: iPhone 17 Pro + simulatorName: iPhone 17 Pro Max setupPreferences: platforms: - iOS diff --git a/example_projects/Weather/README.md b/example_projects/Weather/README.md index 8becf8103..6879347f4 100644 --- a/example_projects/Weather/README.md +++ b/example_projects/Weather/README.md @@ -13,9 +13,7 @@ Build and run the app with XcodeBuildMCP first: Then relaunch the installed app with the mock API argument: ```bash -../../build/cli.js simulator launch-app \ - --bundle-id com.sentry.weather.Weather \ - --args=--mock-weather-api +../../build/cli.js simulator launch-app --json '{"bundleId":"com.sentry.weather.Weather","launchArgs":["--mock-weather-api"]}' ``` ## JSON fixtures diff --git a/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift b/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift index 6217871d5..730549e5e 100644 --- a/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift +++ b/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift @@ -29,8 +29,10 @@ struct MockWeatherAPIClient: WeatherAPIClient, Sendable { guard !trimmed.isEmpty else { return [] } let needle = trimmed.localizedLowercase - return fixtures.searchPool.filter { location in - location.name.localizedLowercase.contains(needle) + var seenLocationIDs = Set() + return (fixtures.locations + fixtures.searchPool).filter { location in + guard seenLocationIDs.insert(location.id).inserted else { return false } + return location.name.localizedLowercase.contains(needle) || location.subtitle.localizedLowercase.contains(needle) || (location.country?.localizedLowercase.contains(needle) ?? false) } diff --git a/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift b/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift index 7b643f61e..765a9f094 100644 --- a/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift +++ b/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift @@ -103,7 +103,7 @@ struct LocationPickerView: View { } private var currentLocationButton: some View { - Button(action: {}) { + Button(action: selectCurrentLocation) { HStack(spacing: 12) { Image(systemName: "location.fill") .font(.system(size: 14)) @@ -145,6 +145,7 @@ struct LocationPickerView: View { onSelect: { select(location) }, onRemove: { remove(location) } ) + .id("saved-\(location.id)-\(isEditing)") } } else if isLoading { ForEach(0..<3, id: \.self) { _ in SearchSkeletonRow() } @@ -160,6 +161,7 @@ struct LocationPickerView: View { onPreview: { preview(location) }, onAdd: { add(location) } ) + .id("search-\(location.id)-\(isSaved(location))-\(justAddedID == location.id)") } } } @@ -229,6 +231,11 @@ struct LocationPickerView: View { justAddedID = location.id } + private func selectCurrentLocation() { + guard let currentLocation = savedLocations.first else { return } + select(currentLocation) + } + private func clearAddedIndicator() async { guard let id = justAddedID else { return } try? await Task.sleep(for: .milliseconds(1_400)) diff --git a/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift b/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift index a6412cfbb..1fd30bbd3 100644 --- a/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift +++ b/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift @@ -96,6 +96,7 @@ struct SearchLocationRow: View { .frame(maxWidth: .infinity, alignment: .leading) } .buttonStyle(.plain) + .accessibilityValue(saved || added ? "saved" : "not saved") VStack(alignment: .trailing, spacing: 3) { Text(WeatherUnitFormatter.temperatureString(location.temperatureC, units: units)) diff --git a/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift b/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift index a571db22e..e118c23b0 100644 --- a/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift +++ b/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift @@ -132,6 +132,7 @@ private struct SegmentRow: View { Button(optionLabel(option)) { selection = option } + .accessibilityValue(selection == option ? "selected" : "not selected") .font(.system(size: 13, weight: .medium)) .foregroundStyle(selection == option ? .black : .white) .padding(.horizontal, 14) diff --git a/example_projects/Weather/WeatherTests/WeatherTests.swift b/example_projects/Weather/WeatherTests/WeatherTests.swift index 1a8d8f9bb..c0a2ae8bb 100644 --- a/example_projects/Weather/WeatherTests/WeatherTests.swift +++ b/example_projects/Weather/WeatherTests/WeatherTests.swift @@ -47,6 +47,9 @@ struct WeatherTests { let byCountry = try await service.searchLocations(matching: "gb") #expect(byCountry.map(\.name).contains("London")) + + let savedLocationByName = try await service.searchLocations(matching: "tokyo") + #expect(savedLocationByName.contains { $0.name == "Tokyo" }) } @Test func emptySearchReturnsNoResults() async throws { diff --git a/example_projects/iOS/.xcodebuildmcp/config.yaml b/example_projects/iOS/.xcodebuildmcp/config.yaml index 568d5e4d2..ee106f9ab 100644 --- a/example_projects/iOS/.xcodebuildmcp/config.yaml +++ b/example_projects/iOS/.xcodebuildmcp/config.yaml @@ -3,7 +3,7 @@ enabledWorkflows: ['simulator', 'ui-automation', 'debugging', 'logging'] sessionDefaults: projectPath: ./MCPTest.xcodeproj scheme: MCPTest - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 + simulatorName: iPhone 17 Pro useLatestOS: true platform: iOS Simulator bundleId: io.sentry.MCPTest diff --git a/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml b/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml index b84c72162..44458a91a 100644 --- a/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml +++ b/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml @@ -11,12 +11,11 @@ sessionDefaults: workspacePath: CalculatorApp.xcworkspace scheme: CalculatorApp configuration: Debug - simulatorId: A2C64636-37E9-4B68-B872-E7F0A82A5670 simulatorPlatform: iOS Simulator useLatestOS: true arch: arm64 suppressWarnings: false - derivedDataPath: ./iOS_Calculator/.derivedData + derivedDataPath: ./.build/DerivedData preferXcodebuild: true bundleId: io.sentry.calculatorapp simulatorName: iPhone 17 Pro diff --git a/manifests/tools/batch.yaml b/manifests/tools/batch.yaml new file mode 100644 index 000000000..b9209d703 --- /dev/null +++ b/manifests/tools/batch.yaml @@ -0,0 +1,16 @@ +id: batch +module: mcp/tools/ui-automation/batch +names: + mcp: batch + cli: batch +description: Execute multiple AXe UI interaction steps in one simulator session to reduce process launches. +outputSchema: + schema: xcodebuildmcp.output.ui-action-result + version: "2" +routing: + stateful: true +annotations: + title: Batch UI Actions + readOnlyHint: true + destructiveHint: false + openWorldHint: false diff --git a/manifests/tools/key_press.yaml b/manifests/tools/key_press.yaml index 1d2d60a7f..b282b3fb4 100644 --- a/manifests/tools/key_press.yaml +++ b/manifests/tools/key_press.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/key_press names: mcp: key_press cli: key-press -description: Press key by keycode. +description: Press one hardware key using an AXe HID key code. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" diff --git a/manifests/tools/key_sequence.yaml b/manifests/tools/key_sequence.yaml index d313f71a0..9b2cb8bf3 100644 --- a/manifests/tools/key_sequence.yaml +++ b/manifests/tools/key_sequence.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/key_sequence names: mcp: key_sequence cli: key-sequence -description: Press a sequence of keys by their keycodes. +description: Press hardware keys using AXe HID key codes. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" diff --git a/manifests/tools/long_press.yaml b/manifests/tools/long_press.yaml index 0e39ab876..7aee1112e 100644 --- a/manifests/tools/long_press.yaml +++ b/manifests/tools/long_press.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/long_press names: mcp: long_press cli: long-press -description: Long press at coords. +description: Long press a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Long Press readOnlyHint: true diff --git a/manifests/tools/snapshot_ui.yaml b/manifests/tools/snapshot_ui.yaml index 81d659216..5d7622195 100644 --- a/manifests/tools/snapshot_ui.yaml +++ b/manifests/tools/snapshot_ui.yaml @@ -3,28 +3,12 @@ module: mcp/tools/ui-automation/snapshot_ui names: mcp: snapshot_ui cli: snapshot-ui -description: Print view hierarchy with precise view coordinates (x, y, width, height) for visible elements. +description: Capture a semantic rs/1 runtime UI snapshot with stable elementRef targets for UI automation. outputSchema: schema: xcodebuildmcp.output.capture-result version: "2" -nextSteps: - - label: Refresh after layout changes - toolId: snapshot_ui - params: - simulatorId: SIMULATOR_UUID - when: success - - label: Tap on element - toolId: tap - params: - simulatorId: SIMULATOR_UUID - x: 0 - y: 0 - when: success - - label: Take screenshot for verification - toolId: screenshot - params: - simulatorId: SIMULATOR_UUID - when: success +routing: + stateful: true annotations: title: Snapshot UI readOnlyHint: true diff --git a/manifests/tools/swipe.yaml b/manifests/tools/swipe.yaml index e365373ab..6d0c00b03 100644 --- a/manifests/tools/swipe.yaml +++ b/manifests/tools/swipe.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/swipe names: mcp: swipe cli: swipe -description: Swipe between points. +description: Swipe within a UI element by withinElementRef and direction from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Swipe readOnlyHint: true diff --git a/manifests/tools/tap.yaml b/manifests/tools/tap.yaml index f2c3ba405..6fdff7fc2 100644 --- a/manifests/tools/tap.yaml +++ b/manifests/tools/tap.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/tap names: mcp: tap cli: tap -description: Tap UI element by accessibility id/label (recommended) or coordinates as fallback. +description: Tap a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Tap readOnlyHint: true diff --git a/manifests/tools/touch.yaml b/manifests/tools/touch.yaml index 4faf99470..3849c5bc8 100644 --- a/manifests/tools/touch.yaml +++ b/manifests/tools/touch.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/touch names: mcp: touch cli: touch -description: Touch down/up at coords. +description: Send touch down/up events to a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Touch readOnlyHint: true diff --git a/manifests/tools/type_text.yaml b/manifests/tools/type_text.yaml index de6a08a9a..86c825668 100644 --- a/manifests/tools/type_text.yaml +++ b/manifests/tools/type_text.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/type_text names: mcp: type_text cli: type-text -description: Type text. +description: Type text into a UI element by elementRef from a current rs/1 runtime snapshot, optionally replacing existing field contents. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" +routing: + stateful: true annotations: title: Type Text readOnlyHint: true diff --git a/manifests/tools/wait_for_ui.yaml b/manifests/tools/wait_for_ui.yaml new file mode 100644 index 000000000..4ae62a0bc --- /dev/null +++ b/manifests/tools/wait_for_ui.yaml @@ -0,0 +1,28 @@ +id: wait_for_ui +module: mcp/tools/ui-automation/wait_for_ui +names: + mcp: wait_for_ui + cli: wait-for-ui +description: Poll rs/1 runtime UI snapshots until a selector-based UI predicate, selector-free textContains predicate, or selector-free settled predicate is satisfied. Select with elementRef, identifier, label, role, or value when a selector is needed. +outputSchema: + schema: xcodebuildmcp.output.capture-result + version: "2" +routing: + stateful: true +nextSteps: + - label: Refresh runtime snapshot + toolId: snapshot_ui + params: + simulatorId: SIMULATOR_UUID + when: success + - label: Wait again + toolId: wait_for_ui + params: + simulatorId: SIMULATOR_UUID + predicate: settled + when: success +annotations: + title: Wait for UI + readOnlyHint: true + destructiveHint: false + openWorldHint: false diff --git a/manifests/workflows/ui-automation.yaml b/manifests/workflows/ui-automation.yaml index c11e5dd72..6d8dd1f26 100644 --- a/manifests/workflows/ui-automation.yaml +++ b/manifests/workflows/ui-automation.yaml @@ -3,6 +3,9 @@ title: UI Automation description: UI automation and accessibility testing tools for iOS simulators. Perform gestures, interactions, screenshots, and UI analysis for automated testing workflows. targetPlatforms: [iOS] tools: + - snapshot_ui + - wait_for_ui + - batch - tap - touch - long_press @@ -13,4 +16,3 @@ tools: - key_sequence - type_text - screenshot - - snapshot_ui diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json index 7ff1cd8b4..9bf964c63 100644 --- a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -13,132 +13,206 @@ "type": "object", "additionalProperties": false, "properties": { - "x": { - "type": "number" - }, - "y": { - "type": "number" - }, - "width": { - "type": "number" + "x": { "type": "number" }, + "y": { "type": "number" }, + "width": { "type": "number" }, + "height": { "type": "number" } + }, + "required": ["x", "y", "width", "height"] + }, + "runtimeActionName": { + "enum": ["tap", "typeText", "longPress", "touch", "swipeWithin"] + }, + "runtimeElementRole": { + "enum": [ + "application", + "button", + "cell", + "image", + "keyboard-key", + "list", + "menu", + "other", + "scroll-view", + "slider", + "switch", + "tab", + "text", + "text-field", + "window" + ] + }, + "runtimeElementState": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { "type": "boolean" }, + "focused": { "type": "boolean" }, + "selected": { "type": "boolean" }, + "visible": { "type": "boolean" } + } + }, + "runtimeElement": { + "type": "object", + "additionalProperties": false, + "properties": { + "ref": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "role": { "$ref": "#/$defs/runtimeElementRole" }, + "label": { "type": "string" }, + "value": { "type": "string" }, + "identifier": { "type": "string" }, + "frame": { "$ref": "#/$defs/frame" }, + "state": { "$ref": "#/$defs/runtimeElementState" }, + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionName" } + } + }, + "required": ["ref", "frame", "actions"] + }, + "runtimeActionHint": { + "type": "object", + "additionalProperties": false, + "properties": { + "action": { "$ref": "#/$defs/runtimeActionName" }, + "elementRef": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "label": { "type": "string" } + }, + "required": ["action", "elementRef"] + }, + "runtimeSnapshot": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot" }, + "protocol": { "const": "rs/1" }, + "simulatorId": { "type": "string" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "capturedAtMs": { "type": "integer", "minimum": 0 }, + "expiresAtMs": { "type": "integer", "minimum": 0 }, + "elements": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeElement" } }, - "height": { - "type": "number" + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionHint" } } }, "required": [ - "x", - "y", - "width", - "height" + "type", + "protocol", + "simulatorId", + "screenHash", + "seq", + "capturedAtMs", + "expiresAtMs", + "elements", + "actions" ] }, - "accessibilityNode": { + "compactRuntimeSnapshot": { "type": "object", - "additionalProperties": true, + "additionalProperties": false, "properties": { - "frame": { - "$ref": "#/$defs/frame" - }, - "type": { - "type": "string" + "type": { "const": "runtime-snapshot" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "count": { "type": "integer", "minimum": 0 }, + "targets": { + "type": "array", + "items": { "type": "string" } }, - "role": { - "type": "string" + "scroll": { + "type": "array", + "items": { "type": "string" } }, - "children": { + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] + }, + "runtimeSnapshotUnchanged": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot-unchanged" }, + "protocol": { "const": "rs/1" }, + "simulatorId": { "type": "string" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "protocol", "simulatorId", "screenHash", "seq"] + }, + "compactRuntimeSnapshotUnchanged": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot-unchanged" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "unchanged": { "const": true }, + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "unchanged", "udid"] + }, + "waitPredicate": { + "enum": ["exists", "gone", "enabled", "focused", "textContains", "settled"] + }, + "waitMatch": { + "type": "object", + "additionalProperties": false, + "properties": { + "predicate": { "$ref": "#/$defs/waitPredicate" }, + "matches": { "type": "array", "items": { - "$ref": "#/$defs/accessibilityNode" + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] } + } + }, + "required": ["predicate", "matches"] + }, + "recoverableUiError": { + "type": "object", + "additionalProperties": false, + "properties": { + "code": { + "enum": [ + "SNAPSHOT_MISSING", + "SNAPSHOT_EXPIRED", + "SNAPSHOT_PARSE_FAILED", + "ELEMENT_REF_NOT_FOUND", + "TARGET_NOT_FOUND", + "TARGET_AMBIGUOUS", + "TARGET_NOT_ACTIONABLE", + "WAIT_TIMEOUT", + "UI_STATE_CHANGED", + "ACTION_FAILED" + ] }, - "enabled": { - "type": "boolean" - }, - "custom_actions": { + "message": { "type": "string" }, + "recoveryHint": { "type": "string" }, + "elementRef": { "type": "string" }, + "candidates": { "type": "array", "items": { - "type": "string" + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] } }, - "AXFrame": { - "type": "string" - }, - "AXUniqueId": { - "type": [ - "string", - "null" - ] - }, - "role_description": { - "type": [ - "string", - "null" - ] - }, - "AXLabel": { - "type": [ - "string", - "null" - ] - }, - "content_required": { - "type": "boolean" - }, - "title": { - "type": [ - "string", - "null" - ] - }, - "help": { - "type": [ - "string", - "null" - ] - }, - "AXValue": { - "type": [ - "string", - "null" - ] - }, - "subrole": { - "type": [ - "string", - "null" - ] - }, - "pid": { - "type": "number" - } + "snapshotAgeMs": { "type": "integer", "minimum": 0 }, + "timeoutMs": { "type": "integer", "minimum": 0 } }, - "required": [ - "frame", - "type", - "role", - "children", - "enabled", - "custom_actions" - ] + "required": ["code", "message", "recoveryHint"] } }, "properties": { - "schema": { - "const": "xcodebuildmcp.output.capture-result" - }, - "schemaVersion": { - "const": "2" - }, - "didError": { - "type": "boolean" - }, - "error": { - "type": [ - "string", - "null" - ] - }, + "schema": { "const": "xcodebuildmcp.output.capture-result" }, + "schemaVersion": { "const": "2" }, + "didError": { "type": "boolean" }, + "error": { "type": ["string", "null"] }, "data": { "type": "object", "additionalProperties": false, @@ -150,16 +224,10 @@ "type": "object", "additionalProperties": false, "properties": { - "simulatorId": { - "type": "string" - }, - "screenshotPath": { - "type": "string" - } + "simulatorId": { "type": "string" }, + "screenshotPath": { "type": "string" } }, - "required": [ - "simulatorId" - ] + "required": ["simulatorId"] }, "capture": { "oneOf": [ @@ -167,92 +235,29 @@ "type": "object", "additionalProperties": false, "properties": { - "format": { - "type": "string" - }, - "width": { - "type": "integer", - "minimum": 0 - }, - "height": { - "type": "integer", - "minimum": 0 - } - }, - "required": [ - "format", - "width", - "height" - ] - }, - { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "const": "ui-hierarchy" - }, - "uiHierarchy": { - "type": "array", - "items": { - "$ref": "#/$defs/accessibilityNode" - } - } + "format": { "type": "string" }, + "width": { "type": "integer", "minimum": 0 }, + "height": { "type": "integer", "minimum": 0 } }, - "required": [ - "type", - "uiHierarchy" - ] + "required": ["format", "width", "height"] }, - { - "type": "object", - "additionalProperties": false, - "properties": { - "type": { - "const": "video-recording" - }, - "state": { - "enum": [ - "started", - "stopped" - ] - }, - "fps": { - "type": "integer", - "minimum": 1 - }, - "outputFile": { - "type": "string" - }, - "sessionId": { - "type": "string" - } - }, - "required": [ - "type", - "state" - ] - } + { "$ref": "#/$defs/runtimeSnapshot" }, + { "$ref": "#/$defs/compactRuntimeSnapshot" }, + { "$ref": "#/$defs/runtimeSnapshotUnchanged" }, + { "$ref": "#/$defs/compactRuntimeSnapshotUnchanged" } ] }, "diagnostics": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/basicDiagnostics" - } + }, + "uiError": { "$ref": "#/$defs/recoverableUiError" }, + "waitMatch": { "$ref": "#/$defs/waitMatch" } }, - "required": [ - "summary", - "artifacts" - ] + "required": ["summary", "artifacts"] }, "nextSteps": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/nextSteps" } }, - "required": [ - "schema", - "schemaVersion", - "didError", - "error", - "data" - ] + "required": ["schema", "schemaVersion", "didError", "error", "data"] } diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json index 78625e55c..e38ead610 100644 --- a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -8,22 +8,109 @@ "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/errorConsistency" } ], - "properties": { - "schema": { - "const": "xcodebuildmcp.output.ui-action-result" - }, - "schemaVersion": { - "const": "2" + "$defs": { + "frame": { + "type": "object", + "additionalProperties": false, + "properties": { + "x": { "type": "number" }, + "y": { "type": "number" }, + "width": { "type": "number" }, + "height": { "type": "number" } + }, + "required": ["x", "y", "width", "height"] }, - "didError": { - "type": "boolean" + "runtimeActionName": { + "enum": ["tap", "typeText", "longPress", "touch", "swipeWithin"] }, - "error": { - "type": [ - "string", - "null" + "runtimeElementRole": { + "enum": [ + "application", + "button", + "cell", + "image", + "keyboard-key", + "list", + "menu", + "other", + "scroll-view", + "slider", + "switch", + "tab", + "text", + "text-field", + "window" ] }, + "runtimeElementState": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { "type": "boolean" }, + "focused": { "type": "boolean" }, + "selected": { "type": "boolean" }, + "visible": { "type": "boolean" } + } + }, + "runtimeElement": { + "type": "object", + "additionalProperties": false, + "properties": { + "ref": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "role": { "$ref": "#/$defs/runtimeElementRole" }, + "label": { "type": "string" }, + "value": { "type": "string" }, + "identifier": { "type": "string" }, + "frame": { "$ref": "#/$defs/frame" }, + "state": { "$ref": "#/$defs/runtimeElementState" }, + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionName" } + } + }, + "required": ["ref", "frame", "actions"] + }, + "direction": { + "enum": ["up", "down", "left", "right"] + }, + "recoverableUiError": { + "type": "object", + "additionalProperties": false, + "properties": { + "code": { + "enum": [ + "SNAPSHOT_MISSING", + "SNAPSHOT_EXPIRED", + "SNAPSHOT_PARSE_FAILED", + "ELEMENT_REF_NOT_FOUND", + "TARGET_NOT_FOUND", + "TARGET_AMBIGUOUS", + "TARGET_NOT_ACTIONABLE", + "WAIT_TIMEOUT", + "UI_STATE_CHANGED", + "ACTION_FAILED" + ] + }, + "message": { "type": "string" }, + "recoveryHint": { "type": "string" }, + "elementRef": { "type": "string" }, + "candidates": { + "type": "array", + "items": { + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] + } + }, + "snapshotAgeMs": { "type": "integer", "minimum": 0 }, + "timeoutMs": { "type": "integer", "minimum": 0 } + }, + "required": ["code", "message", "recoveryHint"] + } + }, + "properties": { + "schema": { "const": "xcodebuildmcp.output.ui-action-result" }, + "schemaVersion": { "const": "2" }, + "didError": { "type": "boolean" }, + "error": { "type": ["string", "null"] }, "data": { "type": "object", "additionalProperties": false, @@ -37,174 +124,99 @@ "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "tap" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - }, - "id": { - "type": "string" - }, - "label": { - "type": "string" - } + "type": { "const": "tap" }, + "elementRef": { "type": "string" } }, - "required": [ - "type" - ] + "required": ["type", "elementRef"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "swipe" - }, - "from": { - "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/point" - }, - "to": { - "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/point" - }, - "durationSeconds": { - "type": "number", - "minimum": 0 - } + "type": { "const": "swipe" }, + "withinElementRef": { "type": "string" }, + "direction": { "$ref": "#/$defs/direction" }, + "durationSeconds": { "type": "number", "minimum": 0 } }, - "required": [ - "type" - ] + "required": ["type", "withinElementRef", "direction"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "touch" - }, - "event": { - "type": "string" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - } + "type": { "const": "touch" }, + "elementRef": { "type": "string" }, + "event": { "type": "string" } }, - "required": [ - "type" - ] + "required": ["type", "elementRef"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "long-press" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - }, - "durationMs": { - "type": "integer", - "minimum": 0 - } + "type": { "const": "long-press" }, + "elementRef": { "type": "string" }, + "durationMs": { "type": "integer", "minimum": 0 } }, - "required": [ - "type", - "x", - "y", - "durationMs" - ] + "required": ["type", "elementRef", "durationMs"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "button" - }, - "button": { - "type": "string" - } + "type": { "const": "button" }, + "button": { "type": "string" } }, - "required": [ - "type", - "button" - ] + "required": ["type", "button"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "gesture" - }, - "gesture": { - "type": "string" - } + "type": { "const": "gesture" }, + "gesture": { "type": "string" } }, - "required": [ - "type", - "gesture" - ] + "required": ["type", "gesture"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "type-text" - } + "type": { "const": "type-text" }, + "elementRef": { "type": "string" }, + "textLength": { "type": "integer", "minimum": 0 } }, - "required": [ - "type" - ] + "required": ["type", "elementRef"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "key-press" - }, - "keyCode": { - "type": "integer", - "minimum": 0 - } + "type": { "const": "key-press" }, + "keyCode": { "type": "integer", "minimum": 0 } }, - "required": [ - "type", - "keyCode" - ] + "required": ["type", "keyCode"] }, { "type": "object", "additionalProperties": false, "properties": { - "type": { - "const": "key-sequence" - }, + "type": { "const": "key-sequence" }, "keyCodes": { "type": "array", - "items": { - "type": "integer", - "minimum": 0 - } + "items": { "type": "integer", "minimum": 0 } } }, - "required": [ - "type", - "keyCodes" - ] + "required": ["type", "keyCodes"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "batch" }, + "stepCount": { "type": "integer", "minimum": 1 } + }, + "required": ["type", "stepCount"] } ] }, @@ -212,33 +224,20 @@ "type": "object", "additionalProperties": false, "properties": { - "simulatorId": { - "type": "string" - } + "simulatorId": { "type": "string" } }, - "required": [ - "simulatorId" - ] + "required": ["simulatorId"] }, "diagnostics": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/basicDiagnostics" - } + }, + "uiError": { "$ref": "#/$defs/recoverableUiError" } }, - "required": [ - "summary", - "action", - "artifacts" - ] + "required": ["summary", "action", "artifacts"] }, "nextSteps": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/nextSteps" } }, - "required": [ - "schema", - "schemaVersion", - "didError", - "error", - "data" - ] + "required": ["schema", "schemaVersion", "didError", "error", "data"] } diff --git a/scripts/bundle-axe.sh b/scripts/bundle-axe.sh index eb15c664b..c692c32a8 100755 --- a/scripts/bundle-axe.sh +++ b/scripts/bundle-axe.sh @@ -94,10 +94,17 @@ else echo "đŸ“Ĩ Downloading latest AXe release from GitHub..." - AXE_RELEASE_BASE_URL="https://github.com/cameroncooke/AXe/releases/download/v${PINNED_AXE_VERSION}" - AXE_HOMEBREW_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-homebrew-v${PINNED_AXE_VERSION}.tar.gz" - AXE_UNIVERSAL_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-v${PINNED_AXE_VERSION}-universal.tar.gz" - AXE_LEGACY_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-v${PINNED_AXE_VERSION}.tar.gz" + if [[ "$PINNED_AXE_VERSION" == staging-* ]]; then + AXE_RELEASE_TAG="$PINNED_AXE_VERSION" + AXE_ASSET_VERSION="$PINNED_AXE_VERSION" + else + AXE_RELEASE_TAG="v${PINNED_AXE_VERSION}" + AXE_ASSET_VERSION="v${PINNED_AXE_VERSION}" + fi + AXE_RELEASE_BASE_URL="https://github.com/cameroncooke/AXe/releases/download/${AXE_RELEASE_TAG}" + AXE_HOMEBREW_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-homebrew-${AXE_ASSET_VERSION}.tar.gz" + AXE_UNIVERSAL_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-${AXE_ASSET_VERSION}-universal.tar.gz" + AXE_LEGACY_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-${AXE_ASSET_VERSION}.tar.gz" # Create temp directory mkdir -p "$AXE_TEMP_DIR" @@ -258,7 +265,8 @@ if [ "$OS_NAME" = "Darwin" ]; then ad_hoc_sign_bundled_axe_assets fi - if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ]; then + if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ] || [ "$AXE_ARCHIVE_FLAVOR" = "local-signed" ]; then + ad_hoc_sign_bundled_axe_assets echo "â„šī¸ ${AXE_ARCHIVE_FLAVOR} AXe archive detected; using ad-hoc signatures for local runtime compatibility" else echo "🔏 Verifying AXe signatures..." @@ -284,7 +292,7 @@ if [ "$OS_NAME" = "Darwin" ]; then done < <(find "$BUNDLED_DIR/Frameworks" -name "*.framework" -type d) fi - if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ]; then + if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ] || [ "$AXE_ARCHIVE_FLAVOR" = "local-signed" ]; then echo "â„šī¸ Skipping Gatekeeper assessment for ${AXE_ARCHIVE_FLAVOR} AXe archive" else echo "đŸ›Ąī¸ Assessing AXe with Gatekeeper..." @@ -316,6 +324,27 @@ else echo "âš ī¸ Skipping AXe binary verification on non-macOS (detected $OS_NAME)" AXE_VERSION="unknown (verification skipped)" fi +validate_axe_version_metadata() { + if [ "$AXE_VERSION" = "unknown (verification skipped)" ]; then + return + fi + + if [[ "$AXE_VERSION" == *dirty* ]] && [ "${AXE_ALLOW_DIRTY_LOCAL:-0}" != "1" ]; then + echo "❌ Bundled AXe reports a dirty version: $AXE_VERSION" + echo " Rebuild AXe from a clean checkout or set AXE_ALLOW_DIRTY_LOCAL=1 for explicit local testing." + exit 1 + fi + + if [ "$USE_LOCAL_AXE" = false ]; then + if [ "$AXE_VERSION" != "$PINNED_AXE_VERSION" ] && [ "$AXE_VERSION" != "v$PINNED_AXE_VERSION" ]; then + echo "❌ Bundled AXe version '$AXE_VERSION' does not match pinned version '$PINNED_AXE_VERSION'" + exit 1 + fi + fi +} + +validate_axe_version_metadata + echo "📋 AXe version: $AXE_VERSION" # Clean up temp directory if it was used diff --git a/src/cli/__tests__/register-tool-commands.test.ts b/src/cli/__tests__/register-tool-commands.test.ts index bd604ddd7..1b78378ef 100644 --- a/src/cli/__tests__/register-tool-commands.test.ts +++ b/src/cli/__tests__/register-tool-commands.test.ts @@ -6,6 +6,7 @@ import type { ToolHandlerContext } from '../../rendering/types.ts'; import { DefaultToolInvoker } from '../../runtime/tool-invoker.ts'; import type { ResolvedRuntimeConfig } from '../../utils/config-store.ts'; import { registerToolCommands } from '../register-tool-commands.ts'; +import * as simulatorResolver from '../../utils/simulator-resolver.ts'; function createTool(overrides: Partial = {}): ToolDefinition { return { @@ -260,6 +261,90 @@ describe('registerToolCommands', () => { stdoutWrite.mockRestore(); }); + it('resolves configured simulatorName for CLI tools that require simulatorId', async () => { + const resolveSimulatorNameToId = vi + .spyOn(simulatorResolver, 'resolveSimulatorNameToId') + .mockResolvedValue({ + success: true, + simulatorId: 'SIM-RESOLVED', + simulatorName: 'iPhone 17 Pro', + }); + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + simulatorId: z.string().describe('Simulator ID'), + }, + mcpSchema: { + simulatorId: z.string().describe('Simulator ID'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: { + simulatorName: 'iPhone 17 Pro', + }, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect(app.parseAsync(['simulator', 'run-tool'])).resolves.toBeDefined(); + + expect(resolveSimulatorNameToId).toHaveBeenCalledWith(expect.any(Function), 'iPhone 17 Pro'); + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + simulatorId: 'SIM-RESOLVED', + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + + it('does not synthesize simulatorId for tools that already accept simulatorName', async () => { + const resolveSimulatorNameToId = vi.spyOn(simulatorResolver, 'resolveSimulatorNameToId'); + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + simulatorId: z.string().optional().describe('Simulator ID'), + simulatorName: z.string().optional().describe('Simulator name'), + }, + mcpSchema: { + simulatorId: z.string().optional().describe('Simulator ID'), + simulatorName: z.string().optional().describe('Simulator name'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: { + simulatorName: 'iPhone 17 Pro', + }, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect(app.parseAsync(['simulator', 'run-tool'])).resolves.toBeDefined(); + + expect(resolveSimulatorNameToId).not.toHaveBeenCalled(); + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + simulatorName: 'iPhone 17 Pro', + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + it('keeps the normal missing-argument error when no hydrated default exists', async () => { const consoleError = vi.spyOn(console, 'error').mockImplementation(() => {}); @@ -458,6 +543,97 @@ describe('registerToolCommands', () => { stdoutWrite.mockRestore(); }); + it('parses comma-separated numeric array args', async () => { + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + workspacePath: z.string().describe('Workspace path'), + keyCodes: z.array(z.number()).describe('Key codes'), + }, + mcpSchema: { + workspacePath: z.string().describe('Workspace path'), + keyCodes: z.array(z.number()).describe('Key codes'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: undefined, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect( + app.parseAsync([ + 'simulator', + 'run-tool', + '--workspace-path', + 'App.xcworkspace', + '--key-codes', + '23,18,14', + ]), + ).resolves.toBeDefined(); + + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + workspacePath: 'App.xcworkspace', + keyCodes: [23, 18, 14], + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + + it('honors --style minimal by hiding next steps', async () => { + vi.spyOn(DefaultToolInvoker.prototype, 'invokeDirect').mockImplementation( + async (_tool, _args, opts) => { + opts.renderSession?.setStructuredOutput?.({ + schema: 'xcodebuildmcp.output.app-path', + schemaVersion: '1', + result: { + kind: 'app-path', + didError: false, + error: null, + artifacts: { appPath: '/tmp/MyApp.app' }, + }, + }); + opts.renderSession?.setNextSteps?.( + [ + { + label: 'Run again', + tool: 'run_tool', + workflow: 'simulator', + cliTool: 'run-tool', + }, + ], + 'cli', + ); + }, + ); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool(); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--style', 'minimal']), + ).resolves.toBeDefined(); + + const output = stdoutChunks.join(''); + expect(output).toContain('Get App Path'); + expect(output).not.toContain('Next steps:'); + expect(output).not.toContain('Run again'); + }); + it('applies --file-path-render-style to text output without forwarding it to tool args', async () => { vi.spyOn(DefaultToolInvoker.prototype, 'invokeDirect').mockImplementation( async (tool, args, opts) => { @@ -683,6 +859,376 @@ describe('registerToolCommands', () => { ); }); + it('writes compact rs/1 capture JSON for runtime snapshots by default', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'San Francisco', + value: 'selected', + identifier: 'weather.locationButton', + frame: { x: 12, y: 81.33, width: 178, height: 33.33 }, + actions: ['tap', 'longPress', 'touch'], + }, + { + ref: 'e3', + role: 'button', + label: 'Sheet Grabber', + value: 'Half screen', + frame: { x: 150, y: 10, width: 80, height: 20 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'San Francisco' }, + { action: 'tap', elementRef: 'e3', label: 'Sheet Grabber' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + expect(stdoutChunks.join('')).toBe( + `${JSON.stringify( + { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + rs: '1', + screenHash: 'screen-hash', + seq: 1, + count: 3, + targets: ['e2|tap|button|San Francisco|selected|weather.locationButton'], + scroll: ['e1|swipe|application|Weather||'], + udid: 'SIMULATOR-1', + }, + }, + }, + null, + 2, + )}\n`, + ); + }); + + it('orders destructive controls after useful targets in compact JSON', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Remove', + identifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e2', + role: 'button', + label: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 140, width: 300, height: 80 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Remove' }, + { action: 'tap', elementRef: 'e2', label: 'Portland, 1:24 PM ¡ Light Rain' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { capture: { targets: string[] } }; + }; + expect(output.data.capture.targets).toEqual([ + 'e2|tap|button|Portland, 1:24 PM ¡ Light Rain||', + 'e1|tap|button|Remove||trash', + ]); + }); + + it('orders unselected segmented controls before already-selected controls in compact JSON', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e9', + role: 'button', + label: '°F', + value: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: '°C', + value: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e9', label: '°F' }, + { action: 'tap', elementRef: 'e10', label: '°C' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { capture: { targets: string[] } }; + }; + expect(output.data.capture.targets).toEqual([ + 'e10|tap|button|°C|not selected|', + 'e9|tap|button|°F|selected|', + ]); + }); + + it('writes compact wait matches with no primary action for static text', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'textContains', + matches: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'longPress', elementRef: 'e11', label: 'No matches' }, + { action: 'touch', elementRef: 'e11', label: 'No matches' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { waitMatch: { matches: string[] } }; + }; + expect(output.data.waitMatch.matches).toEqual(['e11|none|text|No matches||']); + }); + + it('writes the full runtime snapshot envelope for verbose JSON output', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + ], + actions: [{ action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json', '--verbose']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { schema: string; data: unknown }; + expect(output.schema).toBe('xcodebuildmcp.output.capture-result'); + expect(output.data).toEqual( + expect.objectContaining({ + capture: expect.objectContaining({ + type: 'runtime-snapshot', + elements: [expect.objectContaining({ ref: 'e1', actions: ['swipeWithin'] })], + }), + }), + ); + }); + it('writes one NDJSON line per domain fragment for jsonl output and omits the final envelope', async () => { mockInvokeDirectThroughHandler(); const stdoutChunks: string[] = []; diff --git a/src/cli/__tests__/schema-to-yargs.test.ts b/src/cli/__tests__/schema-to-yargs.test.ts index 014868d80..371191415 100644 --- a/src/cli/__tests__/schema-to-yargs.test.ts +++ b/src/cli/__tests__/schema-to-yargs.test.ts @@ -25,4 +25,18 @@ describe('schemaToYargsOptions', () => { expect(options.get('workspace-path')?.demandOption).toBe(false); }); + + it('coerces comma-separated numeric array flags', () => { + const options = schemaToYargsOptions({ + keyCodes: z.array(z.number()), + }); + + const coerce = options.get('key-codes')?.coerce; + + expect(typeof coerce).toBe('function'); + expect(coerce?.('23,18,14')).toEqual([23, 18, 14]); + expect(coerce?.('23, 18, 14')).toEqual([23, 18, 14]); + expect(coerce?.(['23', '18,14'])).toEqual([23, 18, 14]); + expect(coerce?.('23,')).toEqual([23, Number.NaN]); + }); }); diff --git a/src/cli/register-tool-commands.ts b/src/cli/register-tool-commands.ts index da4f66c7d..16fe678f5 100644 --- a/src/cli/register-tool-commands.ts +++ b/src/cli/register-tool-commands.ts @@ -17,6 +17,7 @@ import { getCliSessionDefaultsForTool, isKnownCliSessionDefaultsProfile, mergeCliSessionDefaults, + resolveCliSessionDefaults, } from './session-defaults.ts'; import { createRenderSession } from '../rendering/render.ts'; import { toStructuredEnvelope } from '../utils/structured-output-envelope.ts'; @@ -26,6 +27,8 @@ import { STRUCTURED_ERROR_SCHEMA_VERSION, } from '../utils/structured-error.ts'; import { toCliJsonlEvent } from './jsonl-event.ts'; +import { resolveSimulatorNameToId } from '../utils/simulator-resolver.ts'; +import { getDefaultCommandExecutor } from '../utils/execution/index.ts'; export interface RegisterToolCommandsOptions { workspaceRoot: string; @@ -96,7 +99,7 @@ function createBufferedHandlerContext( function writeJsonOutput( handlerContext: ToolHandlerContext, session: ReturnType, - outputStyle: OutputStyle, + options: { outputStyle: OutputStyle; verbose?: boolean }, ): boolean { const { structuredOutput } = handlerContext; const envelope = structuredOutput @@ -106,7 +109,9 @@ function writeJsonOutput( structuredOutput.schemaVersion, { nextSteps: session.getNextSteps?.(), - outputStyle, + nextStepRuntime: session.getNextStepsRuntime?.(), + outputStyle: options.outputStyle, + runtimeSnapshot: options.verbose ? 'full' : 'compact', }, ) : toStructuredEnvelope( @@ -117,7 +122,7 @@ function writeJsonOutput( }).result, STRUCTURED_ERROR_SCHEMA, STRUCTURED_ERROR_SCHEMA_VERSION, - { outputStyle }, + { outputStyle: options.outputStyle }, ); process.stdout.write(JSON.stringify(envelope, null, 2) + '\n'); @@ -251,12 +256,18 @@ function registerToolSubcommand( describe: 'Output format', }); + subYargs.option('verbose', { + type: 'boolean', + default: false, + describe: 'Render verbose output data when supported', + }); + // Group options for cleaner help display if (toolArgNames.length > 0) { subYargs.group(toolArgNames, 'Tool Arguments:'); } subYargs.group(['profile'], 'Session Defaults:'); - subYargs.group(['json', 'output'], 'Output Options:'); + subYargs.group(['json', 'output', 'verbose'], 'Output Options:'); // Add note about unsupported keys if any if (unsupportedKeys.length > 0) { @@ -287,7 +298,9 @@ function registerToolSubcommand( const outputStyle: OutputStyle = argv.style === 'minimal' ? 'minimal' : 'normal'; const socketPath = argv.socket as string; const logLevel = argv['log-level'] as string | undefined; + const style = argv.style as string | undefined; const filePathRenderStyle = argv.filePathRenderStyle as FilePathRenderStyle | undefined; + const verboseOutput = argv.verbose === true; if ( profileOverride && @@ -322,6 +335,7 @@ function registerToolSubcommand( 'logLevel', 'file-path-render-style', 'filePathRenderStyle', + 'verbose', '_', '$0', ]); @@ -335,6 +349,10 @@ function registerToolSubcommand( // Merge: flag args first, then JSON overrides const explicitArgs = { ...toolParams, ...jsonArgs }; + const rawDefaults = resolveCliSessionDefaults({ + runtimeConfig: opts.runtimeConfig, + profileOverride, + }); const args = mergeCliSessionDefaults({ defaults: getCliSessionDefaultsForTool({ tool, @@ -344,6 +362,24 @@ function registerToolSubcommand( explicitArgs, }); + if ( + args.simulatorId === undefined && + tool.cliSchema.simulatorId !== undefined && + tool.cliSchema.simulatorName === undefined && + typeof rawDefaults.simulatorName === 'string' + ) { + const resolvedSimulator = await resolveSimulatorNameToId( + getDefaultCommandExecutor(), + rawDefaults.simulatorName, + ); + if (!resolvedSimulator.success) { + console.error(`Error: ${resolvedSimulator.error}`); + process.exitCode = 1; + return; + } + args.simulatorId = resolvedSimulator.simulatorId; + } + const missingRequiredFlags = requiredFlagNames.filter((flagName) => { const camelKey = convertArgvToToolParams({ [flagName]: true }); const [toolKey] = Object.keys(camelKey); @@ -373,6 +409,7 @@ function registerToolSubcommand( runtime: 'cli', outputStyle, filePathRenderStyle, + includeNextSteps: style !== 'minimal', }); const writeJsonlFragment = outputFormat === 'jsonl' @@ -406,7 +443,7 @@ function registerToolSubcommand( } if (outputFormat === 'json') { - if (writeJsonOutput(handlerContext, session, outputStyle)) { + if (writeJsonOutput(handlerContext, session, { outputStyle, verbose: verboseOutput })) { process.exitCode = 1; } return; diff --git a/src/cli/schema-to-yargs.ts b/src/cli/schema-to-yargs.ts index 175068097..e68fe8cbf 100644 --- a/src/cli/schema-to-yargs.ts +++ b/src/cli/schema-to-yargs.ts @@ -7,6 +7,16 @@ export interface YargsOptionConfig extends Options { type: 'string' | 'number' | 'boolean' | 'array'; } +function coerceNumberArray(value: unknown): number[] { + const values = Array.isArray(value) ? value : [value]; + return values.flatMap((entry) => + String(entry) + .split(',') + .map((item) => item.trim()) + .map((item) => (item === '' ? Number.NaN : Number(item))), + ); +} + export interface ZodToYargsOptionOptions { hasHydratedDefault?: boolean; } @@ -195,9 +205,17 @@ export function zodToYargsOption( const element = getArrayElement(unwrapped); if (element) { const elemTypeName = getZodTypeName(unwrap(element)); - if (elemTypeName === 'string' || elemTypeName === 'number') { + if (elemTypeName === 'string') { return { type: 'array', describe: description, demandOption: false }; } + if (elemTypeName === 'number') { + return { + type: 'array', + describe: description, + demandOption: false, + coerce: coerceNumberArray, + }; + } } // Complex array types - use --json fallback return null; diff --git a/src/core/__tests__/structured-output-schema.test.ts b/src/core/__tests__/structured-output-schema.test.ts index 006f38257..54bc6e97c 100644 --- a/src/core/__tests__/structured-output-schema.test.ts +++ b/src/core/__tests__/structured-output-schema.test.ts @@ -383,6 +383,125 @@ describe('structured output schema bundling', () => { ).toBe(true); }); + it('accepts ui automation v2 runtime snapshots and semantic action errors', () => { + const ajv = new Ajv2020({ allErrors: true, strict: true, validateSchema: true }); + const captureValidate = ajv.compile( + getMcpOutputSchema({ schema: 'xcodebuildmcp.output.capture-result', version: '2' }), + ); + const actionValidate = ajv.compile( + getMcpOutputSchema({ schema: 'xcodebuildmcp.output.ui-action-result', version: '2' }), + ); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIM-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 100, height: 40 }, + state: { enabled: true, selected: true, visible: true }, + actions: ['tap'], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Continue' }], + }, + }, + }), + ).toBe(true); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIM-1', + screenHash: 'screen-hash', + seq: 2, + }, + }, + }), + ).toBe(true); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: 'screen-hash', + seq: 2, + unchanged: true, + udid: 'SIM-1', + }, + }, + }), + ).toBe(true); + + expect( + actionValidate({ + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + didError: true, + error: 'Element ref was not found in the current snapshot.', + data: { + summary: { status: 'FAILED' }, + action: { type: 'tap', elementRef: 'e404' }, + artifacts: { simulatorId: 'SIM-1' }, + uiError: { + code: 'ELEMENT_REF_NOT_FOUND', + message: 'Element ref was not found in the current snapshot.', + recoveryHint: 'Run snapshot_ui again and retry with a current elementRef.', + elementRef: 'e404', + snapshotAgeMs: 1_000, + }, + }, + }), + ).toBe(true); + + expect( + actionValidate({ + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + action: { type: 'batch', stepCount: 2 }, + artifacts: { simulatorId: 'SIM-1' }, + }, + }), + ).toBe(true); + }); + it('accepts xcode bridge call-result artifacts', () => { const schema = getMcpOutputSchema({ schema: 'xcodebuildmcp.output.xcode-bridge-call-result', diff --git a/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts b/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts index b2325eed9..f9c677b82 100644 --- a/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts +++ b/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts @@ -183,6 +183,8 @@ describe('sendKeyboardShortcut', () => { expect(result.success).toBe(false); if (!result.success) { expect(result.error).toContain('iPhone 15 Pro'); + expect(result.error).toContain('without a device window'); + expect(result.error).toContain('retry the keyboard shortcut'); } expect(calls).toHaveLength(3); }); diff --git a/src/mcp/tools/simulator-management/_keyboard_shortcut.ts b/src/mcp/tools/simulator-management/_keyboard_shortcut.ts index a4ea377e3..1eb5c67ae 100644 --- a/src/mcp/tools/simulator-management/_keyboard_shortcut.ts +++ b/src/mcp/tools/simulator-management/_keyboard_shortcut.ts @@ -123,7 +123,7 @@ export async function sendKeyboardShortcut( if (focusResult.output.trim() === 'NO_WINDOW') { return { success: false, - error: `No Simulator window found for "${device.name}". Is the simulator window visible?`, + error: `No visible Simulator window found for "${device.name}". Simulator.app may be running without a device window; open the simulator device window manually, then retry the keyboard shortcut.`, }; } diff --git a/src/mcp/tools/simulator/__tests__/boot_sim.test.ts b/src/mcp/tools/simulator/__tests__/boot_sim.test.ts index 5e40e5af5..e7064d5db 100644 --- a/src/mcp/tools/simulator/__tests__/boot_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/boot_sim.test.ts @@ -8,6 +8,12 @@ import { sessionStore } from '../../../../utils/session-store.ts'; import { schema, handler, boot_simLogic } from '../boot_sim.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + describe('boot_sim tool', () => { beforeEach(() => { sessionStore.clear(); @@ -105,6 +111,43 @@ describe('boot_sim tool', () => { expect(result.isError).toBe(true); }); + it('should resolve simulatorName before booting', async () => { + const calls: Array<{ + command: string[]; + description?: string; + allowStderr?: boolean; + }> = []; + const mockExecutor = async ( + command: string[], + description?: string, + allowStderr?: boolean, + ) => { + calls.push({ command, description, allowStderr }); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ + success: true, + output: 'Simulator booted successfully', + }); + }; + + const result = await runLogic(() => + boot_simLogic({ simulatorName: 'iPhone 17' }, mockExecutor), + ); + + expect(result.isError).toBeFalsy(); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + install_app_sim: { simulatorId: 'resolved-uuid', appPath: 'PATH_TO_YOUR_APP' }, + launch_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'YOUR_APP_BUNDLE_ID' }, + }); + expect(calls.map((call) => call.command)).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'boot', 'resolved-uuid'], + ]); + }); + it('should verify command generation with mock executor', async () => { const calls: Array<{ command: string[]; diff --git a/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts index 21d892997..dcbc11b2b 100644 --- a/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts @@ -11,6 +11,12 @@ import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { schema, handler, install_app_simLogic } from '../install_app_sim.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + describe('install_app_sim tool', () => { beforeEach(() => { sessionStore.clear(); @@ -100,6 +106,53 @@ describe('install_app_sim tool', () => { ]); }); + it('should resolve simulatorName before installing', async () => { + const executorCalls: Array> = []; + const mockExecutor: CommandExecutor = (...args) => { + executorCalls.push(args); + const command = args[0]; + if (command.includes('list')) { + return Promise.resolve( + createMockCommandResponse({ success: true, output: availableSimulatorsJson }), + ); + } + if (command[0] === 'defaults') { + return Promise.resolve( + createMockCommandResponse({ success: true, output: 'io.sentry.myapp' }), + ); + } + return Promise.resolve( + createMockCommandResponse({ success: true, output: 'App installed' }), + ); + }; + + const mockFileSystem = createMockFileSystemExecutor({ + existsSync: () => true, + }); + + const result = await runLogic(() => + install_app_simLogic( + { + simulatorName: 'iPhone 17', + appPath: '/path/to/app.app', + }, + mockExecutor, + mockFileSystem, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + launch_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'io.sentry.myapp' }, + }); + expect(executorCalls.map((call) => call[0])).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'install', 'resolved-uuid', '/path/to/app.app'], + ['defaults', 'read', '/path/to/app.app/Info', 'CFBundleIdentifier'], + ]); + }); + it('should generate command with different simulator identifier', async () => { const executorCalls: Array> = []; const mockExecutor: CommandExecutor = (...args) => { diff --git a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts index d3408553d..73bbe4b65 100644 --- a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts @@ -1,11 +1,20 @@ import { describe, it, expect, beforeEach } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor } from '../../../../test-utils/mock-executors.ts'; +import { + createMockCommandResponse, + createMockExecutor, +} from '../../../../test-utils/mock-executors.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; import { schema, handler, launch_app_simLogic, type SimulatorLauncher } from '../launch_app_sim.ts'; import type { LaunchWithLoggingResult } from '../../../../utils/simulator-steps.ts'; import { runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + function createMockLauncher(overrides?: Partial): SimulatorLauncher { return async (_uuid, _bundleId, _executor, _opts?) => ({ success: true, @@ -143,6 +152,44 @@ describe('launch_app_sim tool', () => { expect(capturedEnv).toEqual({ STAGING_ENABLED: '1' }); }); + it('should resolve simulatorName before checking install and launching', async () => { + const executorCalls: string[][] = []; + const installCheckExecutor = async (command: string[]) => { + executorCalls.push(command); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ success: true, output: '/path/to/app/container' }); + }; + let launchedUuid: string | undefined; + const trackingLauncher: SimulatorLauncher = async (uuid, _bundleId, _executor, _opts?) => { + launchedUuid = uuid; + return { success: true, processId: 12345, logFilePath: '/tmp/test.log' }; + }; + + const result = await runLogic(() => + launch_app_simLogic( + { + simulatorName: 'iPhone 17', + bundleId: 'io.sentry.testapp', + }, + installCheckExecutor, + trackingLauncher, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(launchedUuid).toBe('resolved-uuid'); + expect(executorCalls).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'get_app_container', 'resolved-uuid', 'io.sentry.testapp', 'app'], + ]); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + stop_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'io.sentry.testapp' }, + }); + }); + it('should display friendly name when simulatorName is provided alongside resolved simulatorId', async () => { const installCheckExecutor = async () => ({ success: true, diff --git a/src/mcp/tools/simulator/__tests__/screenshot.test.ts b/src/mcp/tools/simulator/__tests__/screenshot.test.ts index 4432513e0..cdb390bb2 100644 --- a/src/mcp/tools/simulator/__tests__/screenshot.test.ts +++ b/src/mcp/tools/simulator/__tests__/screenshot.test.ts @@ -13,6 +13,15 @@ import { schema, handler, screenshotLogic } from '../../ui-automation/screenshot import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; describe('screenshot plugin', () => { + const bootedDeviceListJson = JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { udid: 'test-uuid', name: 'iPhone 15 Pro', state: 'Booted' }, + { udid: 'another-uuid', name: 'iPhone 15', state: 'Booted' }, + ], + }, + }); + beforeEach(() => { sessionStore.clear(); }); @@ -89,7 +98,11 @@ describe('screenshot plugin', () => { expect(capturedCommands).toHaveLength(5); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); + + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -98,10 +111,6 @@ describe('screenshot plugin', () => { '/tmp/screenshot_mock-uuid-123.png', ]); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); - expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -168,7 +177,11 @@ describe('screenshot plugin', () => { expect(capturedCommands).toHaveLength(5); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); + + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -177,10 +190,6 @@ describe('screenshot plugin', () => { '/tmp/screenshot_different-uuid-456.png', ]); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); - expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -234,21 +243,21 @@ describe('screenshot plugin', () => { ), ); - // Should execute all commands in sequence: screenshot, list devices, orientation detection, optimization, dimensions + // Should execute all commands in sequence: list devices, screenshot, orientation detection, optimization, dimensions expect(capturedCommands).toHaveLength(5); - const firstCommand = capturedCommands[0]; - expect(firstCommand).toHaveLength(6); - expect(firstCommand[0]).toBe('xcrun'); - expect(firstCommand[1]).toBe('simctl'); - expect(firstCommand[2]).toBe('io'); - expect(firstCommand[3]).toBe('test-uuid'); - expect(firstCommand[4]).toBe('screenshot'); - expect(firstCommand[5]).toMatch(/\/.*\/screenshot_.*\.png/); + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); + const screenshotCommand = capturedCommands[1]; + expect(screenshotCommand).toHaveLength(6); + expect(screenshotCommand[0]).toBe('xcrun'); + expect(screenshotCommand[1]).toBe('simctl'); + expect(screenshotCommand[2]).toBe('io'); + expect(screenshotCommand[3]).toBe('test-uuid'); + expect(screenshotCommand[4]).toBe('screenshot'); + expect(screenshotCommand[5]).toMatch(/\/.*\/screenshot_.*\.png/); expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -267,7 +276,9 @@ describe('screenshot plugin', () => { const mockImageBuffer = Buffer.from('fake-image-data'); const mockExecutor = createCommandMatchingMockExecutor({ - 'xcrun simctl': { success: true, output: 'Screenshot saved' }, + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, sips: { success: true, output: 'Image optimized' }, }); @@ -320,11 +331,21 @@ describe('screenshot plugin', () => { }); it('should handle command failure', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Command failed', - }); + const mockExecutor: CommandExecutor = async (command) => { + const cmdStr = command.join(' '); + if (cmdStr.includes('simctl list devices')) { + return { + success: true, + output: bootedDeviceListJson, + error: undefined, + process: mockProcess, + }; + } + if (cmdStr.includes('simctl io')) { + return { success: false, output: '', error: 'Command failed', process: mockProcess }; + } + return { success: true, output: '', error: undefined, process: mockProcess }; + }; const mockPathDeps = { tmpdir: () => '/tmp', @@ -354,10 +375,11 @@ describe('screenshot plugin', () => { }); it('should handle file read failure', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, + const mockExecutor = createCommandMatchingMockExecutor({ + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, + sips: { success: true, output: 'Image optimized' }, }); const mockFileSystemExecutor = createMockFileSystemExecutor({ @@ -446,18 +468,18 @@ describe('screenshot plugin', () => { expect(capturedArgs).toHaveLength(5); - expect(capturedArgs[0]).toEqual([ + expect(capturedArgs[0][0][0]).toBe('xcrun'); + expect(capturedArgs[0][0][1]).toBe('simctl'); + expect(capturedArgs[0][0][2]).toBe('list'); + expect(capturedArgs[0][1]).toBe('[Screenshot]: list devices'); + expect(capturedArgs[0][2]).toBe(false); + + expect(capturedArgs[1]).toEqual([ ['xcrun', 'simctl', 'io', 'test-uuid', 'screenshot', '/tmp/screenshot_mock-uuid-123.png'], '[Screenshot]: screenshot', false, ]); - expect(capturedArgs[1][0][0]).toBe('xcrun'); - expect(capturedArgs[1][0][1]).toBe('simctl'); - expect(capturedArgs[1][0][2]).toBe('list'); - expect(capturedArgs[1][1]).toBe('[Screenshot]: list devices'); - expect(capturedArgs[1][2]).toBe(false); - expect(capturedArgs[2][0][0]).toBe('swift'); expect(capturedArgs[2][0][1]).toBe('-e'); expect(capturedArgs[2][1]).toBe('[Screenshot]: detect orientation'); @@ -578,10 +600,11 @@ describe('screenshot plugin', () => { }); it('should handle file read error with fileSystemExecutor', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, + const mockExecutor = createCommandMatchingMockExecutor({ + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, + sips: { success: true, output: 'Image optimized' }, }); const mockFileSystemExecutor = createMockFileSystemExecutor({ diff --git a/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts index 8dc13ef9a..6673d4a8f 100644 --- a/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts @@ -22,6 +22,12 @@ import * as path from 'node:path'; import type { ChildProcess } from 'node:child_process'; import { setRuntimeInstanceForTests } from '../../../../utils/runtime-instance.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + function createTrackedChild(options?: { pid?: number; killImplementation?: (signal?: NodeJS.Signals | number) => boolean; @@ -191,6 +197,33 @@ describe('stop_app_sim tool', () => { expect(text).not.toContain('Tracked OSLog sessions cleaned up'); }); + it('should resolve simulatorName before stopping', async () => { + const calls: string[][] = []; + const mockExecutor: CommandExecutor = async (command) => { + calls.push(command); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ success: true, output: '' }); + }; + + const result = await runLogic(() => + stop_app_simLogic( + { + simulatorName: 'iPhone 17', + bundleId: 'io.sentry.App', + }, + mockExecutor, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(calls).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'terminate', 'resolved-uuid', 'io.sentry.App'], + ]); + }); + it('should display friendly name when simulatorName is provided alongside resolved simulatorId', async () => { const mockExecutor = createMockExecutor({ success: true, output: '' }); diff --git a/src/mcp/tools/simulator/boot_sim.ts b/src/mcp/tools/simulator/boot_sim.ts index dce3d3f34..d0cf77767 100644 --- a/src/mcp/tools/simulator/boot_sim.ts +++ b/src/mcp/tools/simulator/boot_sim.ts @@ -11,6 +11,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { createBasicDiagnostics } from '../../../utils/diagnostics.ts'; @@ -30,11 +31,12 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), }); type BootSimParams = z.infer; +type ResolvedBootSimParams = BootSimParams & { simulatorId: string }; type BootSimResult = SimulatorActionResultDomainResult; const publicSchemaObject = z.strictObject( @@ -45,7 +47,7 @@ const publicSchemaObject = z.strictObject( ); function createBootSimResult(params: { - simulatorId: string; + simulatorId?: string; didError: boolean; error?: string; diagnosticMessage?: string; @@ -63,9 +65,13 @@ function createBootSimResult(params: { ...(params.diagnosticMessage ? { diagnostics: createBasicDiagnostics({ errors: [params.diagnosticMessage] }) } : {}), - artifacts: { - simulatorId: params.simulatorId, - }, + ...(params.simulatorId + ? { + artifacts: { + simulatorId: params.simulatorId, + }, + } + : {}), }; } @@ -79,7 +85,7 @@ function setStructuredOutput(ctx: ToolHandlerContext, result: BootSimResult): vo export function createBootSimExecutor( executor: CommandExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { try { const result = await executor( @@ -118,11 +124,28 @@ export async function boot_simLogic( params: BootSimParams, executor: CommandExecutor, ): Promise { - log('info', `Starting xcrun simctl boot request for simulator ${params.simulatorId}`); - const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = createBootSimResult({ + didError: true, + error: 'Boot simulator operation failed.', + diagnosticMessage: `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + }); + setStructuredOutput(ctx, result); + log('error', `Error during boot simulator operation: ${result.error ?? 'Unknown error'}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedBootSimParams = { ...params, simulatorId: simulatorResult.uuid }; + log('info', `Starting xcrun simctl boot request for simulator ${resolvedParams.simulatorId}`); + const executeBootSim = createBootSimExecutor(executor); - const result = await executeBootSim(params); + const result = await executeBootSim(resolvedParams); setStructuredOutput(ctx, result); if (result.didError) { @@ -132,8 +155,8 @@ export async function boot_simLogic( ctx.nextStepParams = { open_sim: {}, - install_app_sim: { simulatorId: params.simulatorId, appPath: 'PATH_TO_YOUR_APP' }, - launch_app_sim: { simulatorId: params.simulatorId, bundleId: 'YOUR_APP_BUNDLE_ID' }, + install_app_sim: { simulatorId: resolvedParams.simulatorId, appPath: 'PATH_TO_YOUR_APP' }, + launch_app_sim: { simulatorId: resolvedParams.simulatorId, bundleId: 'YOUR_APP_BUNDLE_ID' }, }; } diff --git a/src/mcp/tools/simulator/install_app_sim.ts b/src/mcp/tools/simulator/install_app_sim.ts index 9faa8ba6a..029696d21 100644 --- a/src/mcp/tools/simulator/install_app_sim.ts +++ b/src/mcp/tools/simulator/install_app_sim.ts @@ -11,6 +11,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { installAppOnSimulator } from '../../../utils/simulator-steps.ts'; import { @@ -36,12 +37,13 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), appPath: z.string(), }); type InstallAppSimParams = z.infer; +type ResolvedInstallAppSimParams = InstallAppSimParams & { simulatorId: string }; const publicSchemaObject = z.strictObject( baseSchemaObject.omit({ @@ -56,8 +58,27 @@ export async function install_app_simLogic( fileSystem?: FileSystemExecutor, ): Promise { const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildInstallFailure( + { appPath: params.appPath }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setInstallResultStructuredOutput(ctx, result); + log('error', `Error during install app in simulator operation: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedInstallAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; const executeInstallAppSim = createInstallAppSimExecutor(executor, fileSystem); - const result = await executeInstallAppSim(params); + const result = await executeInstallAppSim(resolvedParams); setInstallResultStructuredOutput(ctx, result); @@ -73,7 +94,7 @@ export async function install_app_simLogic( ctx.nextStepParams = { open_sim: {}, launch_app_sim: { - simulatorId: params.simulatorId, + simulatorId: resolvedParams.simulatorId, bundleId: bundleId || 'YOUR_APP_BUNDLE_ID', }, }; @@ -103,7 +124,7 @@ async function extractBundleId( export function createInstallAppSimExecutor( executor: CommandExecutor, fileSystem?: FileSystemExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { const artifacts = { simulatorId: params.simulatorId, appPath: params.appPath }; diff --git a/src/mcp/tools/simulator/launch_app_sim.ts b/src/mcp/tools/simulator/launch_app_sim.ts index 065958b4b..0299bc75b 100644 --- a/src/mcp/tools/simulator/launch_app_sim.ts +++ b/src/mcp/tools/simulator/launch_app_sim.ts @@ -14,6 +14,7 @@ import { launchSimulatorAppWithLogging, type LaunchWithLoggingResult, } from '../../../utils/simulator-steps.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { buildLaunchFailure, @@ -49,7 +50,7 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), bundleId: z.string(), launchArgs: z.array(z.string()).optional(), @@ -57,6 +58,7 @@ const internalSchemaObject = z.object({ }); export type LaunchAppSimParams = z.infer; +type ResolvedLaunchAppSimParams = LaunchAppSimParams & { simulatorId: string }; type LaunchAppSimResult = LaunchResultDomainResult; export type SimulatorLauncher = typeof launchSimulatorAppWithLogging; @@ -67,8 +69,27 @@ export async function launch_app_simLogic( launcher: SimulatorLauncher = launchSimulatorAppWithLogging, ): Promise { const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildLaunchFailure( + { bundleId: params.bundleId }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setLaunchResultStructuredOutput(ctx, result); + log('error', `Error during launch app in simulator operation: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedLaunchAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; const executeLaunchAppSim = createLaunchAppSimExecutor(executor, launcher); - const result = await executeLaunchAppSim(params); + const result = await executeLaunchAppSim(resolvedParams); setLaunchResultStructuredOutput(ctx, result); @@ -82,12 +103,12 @@ export async function launch_app_simLogic( ctx.nextStepParams = { open_sim: {}, - stop_app_sim: { simulatorId: params.simulatorId, bundleId: params.bundleId }, + stop_app_sim: { simulatorId: resolvedParams.simulatorId, bundleId: params.bundleId }, }; } function buildSuccessArtifacts( - params: LaunchAppSimParams, + params: ResolvedLaunchAppSimParams, launchResult: LaunchWithLoggingResult, ): LaunchResultArtifacts { return { @@ -102,7 +123,7 @@ function buildSuccessArtifacts( export function createLaunchAppSimExecutor( executor: CommandExecutor, launcher: SimulatorLauncher = launchSimulatorAppWithLogging, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { log('info', `Starting xcrun simctl launch request for simulator ${params.simulatorId}`); diff --git a/src/mcp/tools/simulator/stop_app_sim.ts b/src/mcp/tools/simulator/stop_app_sim.ts index 15c57c43c..0f2f06bf7 100644 --- a/src/mcp/tools/simulator/stop_app_sim.ts +++ b/src/mcp/tools/simulator/stop_app_sim.ts @@ -10,6 +10,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { stopSimulatorLaunchOsLogSessionsForApp } from '../../../utils/log-capture/index.ts'; import { @@ -35,17 +36,18 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), bundleId: z.string(), }); export type StopAppSimParams = z.infer; +type ResolvedStopAppSimParams = StopAppSimParams & { simulatorId: string }; type StopAppSimResult = StopResultDomainResult; export function createStopAppSimExecutor( executor: CommandExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { const simulatorId = params.simulatorId; const artifacts = { simulatorId, bundleId: params.bundleId }; @@ -92,13 +94,32 @@ export async function stop_app_simLogic( params: StopAppSimParams, executor: CommandExecutor, ): Promise { - const simulatorId = params.simulatorId; + const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildStopFailure( + { bundleId: params.bundleId }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setStopResultStructuredOutput(ctx, result); + log('error', `Error stopping app in simulator: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedStopAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; + const simulatorId = resolvedParams.simulatorId; log('info', `Stopping app ${params.bundleId} in simulator ${simulatorId}`); - const ctx = getHandlerContext(); const executeStopAppSim = createStopAppSimExecutor(executor); - const result = await executeStopAppSim(params); + const result = await executeStopAppSim(resolvedParams); setStopResultStructuredOutput(ctx, result); if (result.didError) { diff --git a/src/mcp/tools/ui-automation/__tests__/batch.test.ts b/src/mcp/tools/ui-automation/__tests__/batch.test.ts new file mode 100644 index 000000000..1bd022c27 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/batch.test.ts @@ -0,0 +1,211 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import * as z from 'zod'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; +import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { batchLogic, createBatchExecutor, handler, schema } from '../batch.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runBatch( + params: Parameters[0], + executor = createTrackingExecutor().executor, + axeHelpers = createMockAxeHelpers(), +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => batchLogic(params, executor, axeHelpers)); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} + +describe('Batch UI Automation Tool', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes batch steps and AXe batch options', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('steps'); + expect(schema).toHaveProperty('axCache'); + expect(schema).toHaveProperty('tapStyle'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ steps: ['tap --id login'] }).success).toBe(true); + expect( + schemaObject.safeParse({ + steps: ['tap --id login', 'type user@example.com'], + axCache: 'perBatch', + typeSubmission: 'chunked', + typeChunkSize: 8, + tapStyle: 'automatic', + continueOnError: true, + waitTimeout: 2, + pollInterval: 0.25, + }).success, + ).toBe(true); + expect(schemaObject.safeParse({ steps: [] }).success).toBe(false); + expect(schemaObject.safeParse({ steps: [''] }).success).toBe(false); + expect(schemaObject.safeParse({ steps: ['tap --id login'], pollInterval: 0 }).success).toBe( + false, + ); + }); + }); + + describe('Command Generation', () => { + it('builds repeated AXe --step arguments', async () => { + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { + simulatorId, + steps: ['tap --id username-field', 'type user@example.com'], + }, + executor, + ); + + expect(result).toMatchObject({ + didError: false, + action: { type: 'batch', stepCount: 2 }, + }); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'batch', + '--step', + 'tap --id username-field', + '--step', + 'type user@example.com', + '--udid', + simulatorId, + ], + ]); + }); + + it('passes AXe batch options through unchanged', async () => { + const { calls, executor } = createTrackingExecutor(); + + await runBatch( + { + simulatorId, + steps: ['tap --id login'], + axCache: 'perStep', + typeSubmission: 'composite', + typeChunkSize: 4, + tapStyle: 'physical', + continueOnError: true, + waitTimeout: 3, + pollInterval: 0.5, + }, + executor, + ); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'batch', + '--step', + 'tap --id login', + '--ax-cache', + 'perStep', + '--type-submission', + 'composite', + '--type-chunk-size', + '4', + '--tap-style', + 'physical', + '--continue-on-error', + '--wait-timeout', + '3', + '--poll-interval', + '0.5', + '--udid', + simulatorId, + ]); + }); + }); + + describe('Runtime snapshot invalidation', () => { + it('clears the cached runtime snapshot after a successful batch', async () => { + recordSnapshot([createNode()]); + + const result = await runBatch({ simulatorId, steps: ['tap --id login'] }); + + expect(result.didError).toBe(false); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + + it('clears the cached runtime snapshot when AXe runs and reports batch failure', async () => { + recordSnapshot([createNode()]); + + const result = await runBatch( + { simulatorId, steps: ['type Secret123'] }, + createFailingExecutor('step failed: type Secret123'), + ); + + expect(result.didError).toBe(true); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + + it('preserves the cached runtime snapshot when AXe is unavailable before execution', async () => { + recordSnapshot([createNode()]); + const { executor } = createTrackingExecutor(); + + const result = await runBatch( + { simulatorId, steps: ['tap --id login'] }, + executor, + createMockAxeHelpers({ getAxePathReturn: null }), + ); + + expect(result.didError).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('preserves the cached runtime snapshot when the debugger guard blocks before AXe runs', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + const debuggerManager = new DebuggerManager(); + vi.spyOn(debuggerManager, 'findSessionForSimulator').mockReturnValue({ + id: 'debug-session-1', + backend: 'dap', + simulatorId, + pid: 1234, + createdAt: 0, + lastUsedAt: 0, + }); + vi.spyOn(debuggerManager, 'getExecutionState').mockResolvedValue({ + status: 'stopped', + reason: 'breakpoint', + }); + const executeBatch = createBatchExecutor(executor, createMockAxeHelpers(), debuggerManager); + + const result = await executeBatch({ simulatorId, steps: ['tap --id login'] }); + + expect(result.didError).toBe(true); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + }); + + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { steps: ['tap --id login'] }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/button.test.ts b/src/mcp/tools/ui-automation/__tests__/button.test.ts index d83851720..0d672889c 100644 --- a/src/mcp/tools/ui-automation/__tests__/button.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/button.test.ts @@ -1,11 +1,11 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import * as z from 'zod'; import { createMockExecutor, createNoopExecutor, createMockCommandResponse, } from '../../../../test-utils/mock-executors.ts'; -import { schema, handler, buttonLogic } from '../button.ts'; +import { schema, handler, buttonLogic, createButtonExecutor } from '../button.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; @@ -23,6 +23,8 @@ describe('Button Plugin', () => { expect(schemaObj.safeParse({ buttonType: 'home', duration: 2.5 }).success).toBe(true); expect(schemaObj.safeParse({ buttonType: 'invalid-button' }).success).toBe(false); expect(schemaObj.safeParse({ buttonType: 'home', duration: -1 }).success).toBe(false); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 10.1 }).success).toBe(false); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', @@ -60,6 +62,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -97,6 +101,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -135,6 +141,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -171,6 +179,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -184,6 +194,46 @@ describe('Button Plugin', () => { }); }); + describe('Executor Behavior', () => { + it('waits briefly after successful button presses so system UI transitions can settle', async () => { + vi.useFakeTimers(); + try { + const mockExecutor = createMockExecutor({ + success: true, + output: 'button press completed', + error: undefined, + process: { pid: 12345 }, + }); + + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + const executeButton = createButtonExecutor(mockExecutor, mockAxeHelpers, undefined, 500); + let settled = false; + const resultPromise = executeButton({ + simulatorId: '12345678-1234-4234-8234-123456789012', + buttonType: 'home', + }).then((result) => { + settled = true; + return result; + }); + + await vi.advanceTimersByTimeAsync(499); + expect(settled).toBe(false); + + await vi.advanceTimersByTimeAsync(1); + const result = await resultPromise; + + expect(settled).toBe(true); + expect(result.didError).toBe(false); + } finally { + vi.useRealTimers(); + } + }); + }); + describe('Handler Behavior (Complete Literal Returns)', () => { it('should surface session default requirement when simulatorId is missing', async () => { const result = await callHandler(handler, { buttonType: 'home' }); @@ -235,7 +285,7 @@ describe('Button Plugin', () => { expect(result.isError).toBe(true); expect(allText(result)).toContain('Parameter validation failed'); - expect(allText(result)).toContain('Duration must be non-negative'); + expect(allText(result)).toContain('Duration must be greater than 0 seconds'); }); it('should return success for valid button press', async () => { @@ -259,6 +309,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -288,6 +340,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -309,6 +363,8 @@ describe('Button Plugin', () => { }, createNoopExecutor(), mockAxeHelpers, + undefined, + 0, ), ); @@ -337,6 +393,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -364,6 +422,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -391,6 +451,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -418,6 +480,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); diff --git a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts index a010bcd32..172cdf512 100644 --- a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts @@ -37,7 +37,11 @@ describe('Gesture Plugin', () => { ).toBe(true); expect(schemaObj.safeParse({ preset: 'invalid-preset' }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', screenWidth: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', screenWidth: 2001 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', screenHeight: 3001 }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', duration: -1 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', duration: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', delta: 201 }).success).toBe(false); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts index b9d8be0ec..bd7f28160 100644 --- a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts @@ -36,6 +36,8 @@ describe('Key Press Tool', () => { expect(schemaObj.safeParse({ keyCode: 'invalid' }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: -1 }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: 256 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCode: 40, duration: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCode: 40, duration: 10.1 }).success).toBe(false); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts b/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts index 9e71d84a6..576469e1a 100644 --- a/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts @@ -29,6 +29,10 @@ describe('Key Sequence Tool', () => { expect(schemaObj.safeParse({ keyCodes: [-1] }).success).toBe(false); expect(schemaObj.safeParse({ keyCodes: [256] }).success).toBe(false); expect(schemaObj.safeParse({ keyCodes: [40], delay: -0.1 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCodes: [40], delay: 5.1 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCodes: Array.from({ length: 101 }, () => 40) }).success).toBe( + false, + ); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts index d6be95a7e..35a641078 100644 --- a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts @@ -1,457 +1,191 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, long_pressLogic } from '../long_press.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runLongPress( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => long_pressLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} describe('Long Press Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); - describe('Export Field Validation (Literal)', () => { - it('should have handler function', () => { + describe('Schema Validation', () => { + it('exposes elementRef and duration without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('duration'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: 1500, - }).success, - ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 100.5, - y: 200, - duration: 1500, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200.5, - duration: 1500, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: 0, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: -100, - }).success, - ).toBe(false); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); - }); - }); - - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await callHandler(handler, { x: 100, y: 200, duration: 1500 }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); - - it('should surface validation errors once simulator default exists', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { x: 100, y: 200, duration: 0 }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('duration: Duration of the long press in milliseconds'); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 1500 }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 0 }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 10_001 }).success).toBe(false); + expect(schemaObject.safeParse({ duration: 1500 }).success).toBe(false); }); }); describe('Command Generation', () => { - it('should generate correct axe command for basic long press', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('long presses the referenced element center and converts milliseconds to AXe seconds', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1500 }, + executor, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(result).toMatchObject({ + didError: false, + action: { type: 'long-press', elementRef: 'e1', durationMs: 1500 }, + }); + expect(calls).toHaveLength(1); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '100', + '60', '-y', - '200', + '40', '--down', '--up', '--delay', '1.5', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for long press with different coordinates', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 50, - y: 75, - duration: 2000, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'touch', - '-x', - '50', - '-y', - '75', - '--down', - '--up', - '--delay', - '2', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('uses the switch activation point for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), ]); - }); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command for short duration long press', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; + await runLongPress({ simulatorId, elementRef: 'e1', duration: 1000 }, executor); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 300, - y: 400, - duration: 500, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '300', + '307', '-y', - '400', + '903', '--down', '--up', '--delay', - '0.5', + '1', '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); - - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 250, - duration: 3000, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', - 'touch', - '-x', - '150', - '-y', - '250', - '--down', - '--up', - '--delay', - '3', - '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should return success for valid long press execution', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'long press completed', - error: '', - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Long press at (100, 200) for 1500ms simulated successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, - process: mockProcess, - }); - - const mockAxeHelpers = { - getAxePath: () => null, // Mock axe not found - getBundledAxeEnvironment: () => ({}), - }; + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - process: mockProcess, - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e404', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate long press at (100, 200).'); - expect(text).toContain('axe command failed'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle SystemError from command execution', async () => { - const mockExecutor = () => { - throw new Error('ENOENT: no such file or directory'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ role: 'AXApplication', type: 'Application' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle unexpected Error objects', async () => { - const mockExecutor = () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', duration: 1500 }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected string errors', async () => { - const mockExecutor = () => { - throw 'String error'; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode()]); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1500 }, + createFailingExecutor('long press failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts index 43244a351..914a4f0df 100644 --- a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts @@ -1,9 +1,10 @@ import { describe, expect, it } from 'vitest'; import { + createCommandMatchingMockExecutor, createMockExecutor, createMockFileSystemExecutor, } from '../../../../test-utils/mock-executors.ts'; -import { runToolLogic } from '../../../../test-utils/test-helpers.ts'; +import { createMockToolHandlerContext, runToolLogic } from '../../../../test-utils/test-helpers.ts'; import { buttonLogic } from '../button.ts'; import { gestureLogic } from '../gesture.ts'; import { key_pressLogic } from '../key_press.ts'; @@ -15,6 +16,8 @@ import { swipeLogic } from '../swipe.ts'; import { tapLogic } from '../tap.ts'; import { touchLogic } from '../touch.ts'; import { type_textLogic } from '../type_text.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; +import { createNode, recordSnapshot } from './ui-action-test-helpers.ts'; const simulatorId = '12345678-1234-4234-8234-123456789012'; @@ -71,60 +74,76 @@ describe('ui automation non-streaming tools', () => { }, { name: 'long_press', - run: () => - long_pressLogic( - { simulatorId, x: 100, y: 200, duration: 1500 }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return long_pressLogic( + { simulatorId, elementRef: 'e1', duration: 1500 }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Long press at (100, 200) for 1500ms simulated successfully.', + ); + }, }, { name: 'swipe', - run: () => - swipeLogic( - { simulatorId, x1: 10, y1: 20, x2: 30, y2: 40 }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + return swipeLogic( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Swipe from (10, 20) to (30, 40) simulated successfully.', + ); + }, }, { name: 'tap', - run: () => - tapLogic( - { simulatorId, x: 100, y: 200 }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return tapLogic( + { simulatorId, elementRef: 'e1' }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Tap at (100, 200) simulated successfully.', + ); + }, }, { name: 'touch', - run: () => - touchLogic( - { simulatorId, x: 100, y: 200, down: true }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return touchLogic( + { simulatorId, elementRef: 'e1', down: true }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Touch event (touch down) at (100, 200) executed successfully.', + ); + }, }, { name: 'type_text', - run: () => - type_textLogic( - { simulatorId, text: 'Hello' }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + return type_textLogic( + { simulatorId, elementRef: 'e1', text: 'Hello' }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Text typing simulated successfully.', + ); + }, + expectedText: 'Text typed into elementRef e1 (5 characters) successfully.', }, ]; for (const testCase of cases) { const { result } = await runToolLogic(testCase.run); expect(result.events, `${testCase.name} should not emit progress events`).toEqual([]); - expect(result.text()).toContain(testCase.expectedText); + expect(result.isError()).toBe(false); + if (testCase.expectedText) { + expect(result.text()).toContain(testCase.expectedText); + } else { + expect(result.text().trim().length).toBeGreaterThan(0); + } } }); @@ -132,7 +151,19 @@ describe('ui automation non-streaming tools', () => { const { result } = await runToolLogic(() => screenshotLogic( { simulatorId, returnFormat: 'path' }, - createMockExecutor({ success: true, output: 'Screenshot saved' }), + createCommandMatchingMockExecutor({ + 'xcrun simctl list devices -j': { + output: JSON.stringify({ + devices: { + 'iOS 26.0': [{ udid: simulatorId, name: 'iPhone 17', state: 'Booted' }], + }, + }), + }, + 'xcrun simctl io': { output: 'Screenshot saved' }, + 'swift -e': { output: '368,800' }, + 'sips -Z': { output: 'optimized' }, + 'sips -g pixelWidth': { output: 'pixelWidth: 368\npixelHeight: 800' }, + }), createMockFileSystemExecutor(), { tmpdir: () => '/tmp', join: (...paths) => paths.join('/') }, { v4: () => 'test-uuid' }, @@ -143,8 +174,9 @@ describe('ui automation non-streaming tools', () => { expect(result.text()).toContain('Screenshot captured'); }); - it('returns snapshot_ui text from structured output without progress events', async () => { - const { result } = await runToolLogic(() => + it('returns snapshot_ui structured output without emitting progress events', async () => { + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic( { simulatorId, @@ -159,8 +191,17 @@ describe('ui automation non-streaming tools', () => { ); expect(result.events).toEqual([]); - expect(result.text()).toContain('Accessibility hierarchy retrieved successfully.'); - expect(result.text()).toContain('Accessibility Hierarchy'); - expect(result.text()).toContain('"type" : "Button"'); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + elements: [expect.objectContaining({ ref: 'e1' })], + }), + ); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts new file mode 100644 index 000000000..e8bb69054 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -0,0 +1,562 @@ +import { describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import { + createRuntimeSnapshotRecord, + extractAccessibilityHierarchy, + getPrimaryRuntimeElement, + getRuntimeElementActivationPoint, + getRuntimeElementSwipePoints, + RuntimeSnapshotParseError, +} from '../shared/runtime-snapshot.ts'; + +const simulatorId = '12345678-1234-4234-8234-123456789012'; + +function createNode(overrides: Partial = {}): AccessibilityNode { + return { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + ...overrides, + }; +} + +describe('runtime snapshot normalization', () => { + it('flattens AX hierarchy into RuntimeSnapshotV1 public elements', () => { + const child = createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Email', + AXValue: 'cam@example.com', + AXUniqueId: 'email-field', + AXSelected: true, + frame: { x: 20, y: 80, width: 220, height: 44 }, + }); + const root = createNode({ + type: 'Window', + role: 'AXWindow', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [child], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + }), + ); + expect(snapshot.payload.elements.map((element) => element.ref)).toEqual(['e1', 'e2']); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + ref: 'e2', + role: 'text-field', + label: 'Email', + value: 'cam@example.com', + identifier: 'email-field', + frame: { x: 20, y: 80, width: 220, height: 44 }, + state: { enabled: true, selected: true, visible: true }, + actions: expect.arrayContaining(['tap', 'typeText', 'longPress', 'touch']), + }), + ); + expect(snapshot.payload.screenHash).toMatch(/^[a-z0-9]+$/); + expect(snapshot.payload.seq).toBe(0); + expect(snapshot.payload.actions).toContainEqual({ + action: 'typeText', + elementRef: 'e2', + label: 'Email', + }); + expect(snapshot.elements[1]?.rawNode).toBe(child); + expect('rawNode' in snapshot.payload.elements[1]!).toBe(false); + expect(snapshot.elementsByRef.get('e2')?.rawNode).toBe(child); + }); + + it('derives deterministic screen hashes from normalized UI content', () => { + const uiHierarchy = [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Cancel' })]; + + const first = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy, nowMs: 1_000 }); + const second = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy, nowMs: 2_000 }); + const changed = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Done' })], + nowMs: 1_000, + }); + + expect(first.payload.screenHash).toBe(second.payload.screenHash); + expect(first.payload.screenHash).not.toBe(changed.payload.screenHash); + }); + + it('parses AXe describe-ui response envelopes', () => { + const responseText = JSON.stringify({ + elements: [createNode({ AXLabel: 'Continue' })], + }); + + const hierarchy = extractAccessibilityHierarchy(responseText); + + expect(hierarchy).toHaveLength(1); + expect(hierarchy[0]?.AXLabel).toBe('Continue'); + }); + + it('throws typed parse errors for invalid describe-ui responses', () => { + expect(() => extractAccessibilityHierarchy('not json')).toThrow(RuntimeSnapshotParseError); + expect(() => extractAccessibilityHierarchy(JSON.stringify({ value: [] }))).toThrow( + RuntimeSnapshotParseError, + ); + }); + + it('selects the primary element for semantic next steps', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXLabel: 'Continue' })], + nowMs: 1_000, + }); + + expect(getPrimaryRuntimeElement(snapshot.payload, 'tap')?.label).toBe('Continue'); + expect(getPrimaryRuntimeElement(snapshot.payload, 'typeText')).toBe( + snapshot.payload.elements[0], + ); + }); + + it('does not infer swipeWithin on top-level applications with overflowing descendants', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Updated just now', + frame: { x: 140, y: 1200, width: 120, height: 20 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'application', + label: 'Weather', + actions: [], + }), + ); + expect(snapshot.payload.actions).not.toContainEqual({ + action: 'swipeWithin', + elementRef: 'e1', + label: 'Weather', + }); + }); + + it('does not infer swipeWithin on top-level windows with overflowing descendants', () => { + const root = createNode({ + type: 'Window', + role: 'AXWindow', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Updated just now', + frame: { x: 140, y: 1200, width: 120, height: 20 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'window', + label: 'Weather', + actions: [], + }), + ); + }); + + it('does not infer swipeWithin when descendants fit inside the container', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Visible label', + frame: { x: 20, y: 200, width: 120, height: 20 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).toEqual([]); + }); + + it('keeps sheet hosts swipeable when the current visible sheet content fits', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Expanded', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + frame: { x: 36, y: 603, width: 330, height: 28 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'application', + label: 'Weather', + actions: ['swipeWithin'], + }), + ); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'down')).toEqual({ + ok: true, + from: { x: 201, y: 372 }, + to: { x: 201, y: 677 }, + }); + }); + + it('removes actions from elements outside the viewport', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + frame: { x: 40, y: 890, width: 300, height: 30 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'switch', + label: 'Reduce transparency', + value: '0', + state: expect.objectContaining({ visible: false }), + actions: [], + }), + ); + }); + + it('removes point-based actions from clipped elements with offscreen activation points', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Lisbon', + frame: { x: 20, y: 839.33, width: 362, height: 89 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'button', + label: 'Lisbon', + state: expect.objectContaining({ visible: true }), + actions: [], + }), + ); + }); + + it('uses an upper activation point for bottom-clipped visible targets', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + frame: { x: 324.87, y: 786.62, width: 49.93, height: 85.46 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]?.actions).toContain('tap'); + expect(getRuntimeElementActivationPoint(snapshot.elements[1]!)).toEqual({ x: 350, y: 795 }); + }); + + it('does not mark unlabeled custom-action internals as tap targets', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXUniqueId: undefined, + identifier: undefined, + frame: { x: 30, y: 450, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + createNode({ + type: 'Other', + role: 'AXGroup', + AXUniqueId: 'label-view', + frame: { x: 30, y: 500, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + createNode({ + type: 'Other', + role: 'AXGroup', + AXUniqueId: 'named-custom-target', + frame: { x: 30, y: 550, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['tap']), + }), + ); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'other', + identifier: 'label-view', + actions: expect.not.arrayContaining(['tap']), + }), + ); + expect(snapshot.payload.elements[2]).toEqual( + expect.objectContaining({ + role: 'other', + identifier: 'named-custom-target', + actions: expect.arrayContaining(['tap']), + }), + ); + }); + + it('does not mark standalone other elements as swipeable', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Suggested', + frame: { x: 30, y: 450, width: 80, height: 32 }, + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Suggested', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('does not infer swipeWithin on small other wrappers with overflowing descendants', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + frame: { x: 0, y: 0, width: 80, height: 80 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 100, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('infers swipeWithin on other containers with overflowing descendants', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Scrollable panel', + frame: { x: 0, y: 0, width: 200, height: 200 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 260, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Scrollable panel', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('derives trailing activation points for wide switch rows', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementActivationPoint(snapshot.elements[0]!)).toEqual({ x: 307, y: 903 }); + }); + + it('keeps full-screen swipe points away from unsafe viewport edges', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'down')).toEqual({ + ok: true, + from: { x: 201, y: 131 }, + to: { x: 201, y: 743 }, + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'left')).toEqual({ + ok: true, + from: { x: 342, y: 524 }, + to: { x: 60, y: 524 }, + }); + }); + + it('rejects unsafe swipe point derivation', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 1, height: 1 }, + }), + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 2, height: 100 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toMatchObject({ + ok: false, + message: expect.stringContaining('too small'), + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[1]!, 'right')).toMatchObject({ + ok: false, + message: expect.stringContaining('non-degenerate'), + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts b/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts index c7e62669f..c3017dcc7 100644 --- a/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts @@ -1,7 +1,6 @@ import { describe, it, expect, beforeEach } from 'vitest'; import * as z from 'zod'; import { - createMockExecutor, createMockFileSystemExecutor, mockProcess, } from '../../../../test-utils/mock-executors.ts'; @@ -16,6 +15,43 @@ import { } from '../screenshot.ts'; import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +function isDeviceListCommand(command: string[]): boolean { + return command.join(' ') === 'xcrun simctl list devices -j'; +} + +function bootedDeviceListJson(simulatorId: string): string { + return JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { + udid: simulatorId, + name: 'iPhone 15 Pro', + state: 'Booted', + }, + ], + }, + }); +} + +function createBootedScreenshotMockExecutor(simulatorId: string) { + return async (command: string[]) => { + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson(simulatorId), + error: undefined, + process: mockProcess, + }; + } + return { + success: true, + output: 'Screenshot saved', + error: undefined, + process: mockProcess, + }; + }; +} + describe('Screenshot Plugin', () => { beforeEach(() => { sessionStore.clear(); @@ -68,6 +104,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('12345678-1234-4234-8234-123456789012'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -93,8 +137,7 @@ describe('Screenshot Plugin', () => { ), ); - // Should capture the screenshot command first - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -108,6 +151,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -133,7 +184,7 @@ describe('Screenshot Plugin', () => { ), ); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -147,6 +198,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('98765432-1098-7654-3210-987654321098'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -175,7 +234,7 @@ describe('Screenshot Plugin', () => { ), ); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -189,6 +248,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('12345678-1234-4234-8234-123456789012'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -215,24 +282,22 @@ describe('Screenshot Plugin', () => { ); // Verify the command structure but not the exact UUID since it's generated - expect(capturedCommands[0].slice(0, 5)).toEqual([ + expect(capturedCommands[1].slice(0, 5)).toEqual([ 'xcrun', 'simctl', 'io', '12345678-1234-4234-8234-123456789012', 'screenshot', ]); - expect(capturedCommands[0][5]).toMatch(/^\/tmp\/screenshot_[a-f0-9-]+\.png$/); + expect(capturedCommands[1][5]).toMatch(/^\/tmp\/screenshot_[a-f0-9-]+\.png$/); }); }); describe('Handler Behavior (Complete Literal Returns)', () => { it('should handle file reading errors', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Screenshot saved', - error: undefined, - }); + const mockExecutor = createBootedScreenshotMockExecutor( + '12345678-1234-4234-8234-123456789012', + ); const mockFileSystemExecutor = createMockFileSystemExecutor({ readFile: async () => { @@ -260,11 +325,9 @@ describe('Screenshot Plugin', () => { it('should handle file cleanup errors gracefully', async () => { const mockImageBuffer = Buffer.from('fake-image-data', 'utf8'); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Screenshot saved', - error: undefined, - }); + const mockExecutor = createBootedScreenshotMockExecutor( + '12345678-1234-4234-8234-123456789012', + ); const mockFileSystemExecutor = createMockFileSystemExecutor({ readFile: async () => mockImageBuffer.toString('utf8'), @@ -366,6 +429,45 @@ describe('Screenshot Plugin', () => { ).toBe(true); }); + it('fails before screenshot capture when the simulator is shutdown', async () => { + const capturedCommands: string[][] = []; + const mockExecutor = async (command: string[]) => { + capturedCommands.push(command); + return { + success: true, + output: JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { + udid: '12345678-1234-4234-8234-123456789012', + name: 'iPhone 15 Pro', + state: 'Shutdown', + }, + ], + }, + }), + error: undefined, + process: mockProcess, + }; + }; + + const result = await runLogic(() => + screenshotLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + }, + mockExecutor, + createMockFileSystemExecutor(), + ), + ); + + expect(result.isError).toBe(true); + const text = allText(result); + expect(text).toContain('Failed to capture screenshot.'); + expect(text).toContain('is Shutdown'); + expect(capturedCommands).toEqual([['xcrun', 'simctl', 'list', 'devices', '-j']]); + }); + it('should handle SystemError from command execution', async () => { const mockExecutor = async () => { throw new SystemError('System error occurred'); @@ -614,20 +716,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -689,20 +791,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -756,20 +858,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -819,20 +921,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts new file mode 100644 index 000000000..c03ed6219 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts @@ -0,0 +1,142 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + clearRuntimeSnapshot, + getRuntimeSnapshot, + getRuntimeSnapshotLookup, + getSnapshotUiWarning, + recordRuntimeSnapshot, + resolveElementRef, +} from '../shared/snapshot-ui-state.ts'; + +const simulatorId = '12345678-1234-4234-8234-123456789012'; + +const node: AccessibilityNode = { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + AXLabel: 'Continue', +}; + +describe('runtime snapshot store', () => { + beforeEach(() => { + __resetRuntimeSnapshotStoreForTests(); + }); + + it('stores runtime snapshots by simulator id', () => { + const nowMs = Date.now(); + const snapshot = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs }); + + recordRuntimeSnapshot(snapshot); + + expect(getRuntimeSnapshot(simulatorId, nowMs + 1_000)).toBe(snapshot); + expect(getRuntimeSnapshotLookup(simulatorId, nowMs + 1_000)).toEqual({ + status: 'available', + snapshot, + snapshotAgeMs: 1_000, + }); + expect(getSnapshotUiWarning(simulatorId)).toBeNull(); + }); + + it('assigns monotonic snapshot sequences when recording snapshots', () => { + const first = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs: 1_000 }); + const second = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs: 2_000 }); + + recordRuntimeSnapshot(first); + clearRuntimeSnapshot(simulatorId); + recordRuntimeSnapshot(second); + + expect(first.seq).toBe(1); + expect(first.payload.seq).toBe(1); + expect(second.seq).toBe(2); + expect(second.payload.seq).toBe(2); + expect(getRuntimeSnapshot(simulatorId, 2_000)).toBe(second); + }); + + it('expires stale snapshots and clears them from the store', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + expect(getRuntimeSnapshotLookup(simulatorId, 62_000)).toEqual({ + status: 'expired', + snapshot: null, + snapshotAgeMs: 61_000, + }); + expect(getRuntimeSnapshot(simulatorId, 62_000)).toBeNull(); + }); + + it('clears snapshots explicitly', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + clearRuntimeSnapshot(simulatorId); + + expect(getRuntimeSnapshotLookup(simulatorId)).toEqual({ status: 'missing', snapshot: null }); + }); + + it('resolves actionable element refs', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + expect(resolveElementRef(simulatorId, 'e1', 'tap', 2_000)).toEqual({ + ok: true, + snapshot, + element: snapshot.elements[0], + snapshotAgeMs: 1_000, + }); + }); + + it('returns typed recoverable errors for missing, expired, not-found, and not-actionable refs', () => { + expect(resolveElementRef(simulatorId, 'e1', 'tap', 1_000)).toEqual({ + ok: false, + error: expect.objectContaining({ code: 'SNAPSHOT_MISSING' }), + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + expect(resolveElementRef(simulatorId, 'e1', 'tap', 62_000)).toEqual({ + ok: false, + error: expect.objectContaining({ code: 'SNAPSHOT_EXPIRED', snapshotAgeMs: 61_000 }), + }); + + recordRuntimeSnapshot(snapshot); + expect(resolveElementRef(simulatorId, 'e404', 'tap', 2_000)).toEqual({ + ok: false, + error: expect.objectContaining({ + code: 'ELEMENT_REF_NOT_FOUND', + elementRef: 'e404', + snapshotAgeMs: 1_000, + }), + }); + + expect(resolveElementRef(simulatorId, 'e1', 'typeText', 2_000)).toEqual({ + ok: false, + error: expect.objectContaining({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + snapshotAgeMs: 1_000, + }), + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index 42a816db3..4abea4060 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -4,7 +4,16 @@ import { createMockExecutor, createNoopExecutor } from '../../../../test-utils/m import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { schema, handler, snapshot_uiLogic } from '../snapshot_ui.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +import { + allText, + callHandler, + createMockToolHandlerContext, + runLogic, +} from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; describe('Snapshot UI Plugin', () => { describe('Export Field Validation (Literal)', () => { @@ -16,6 +25,7 @@ describe('Snapshot UI Plugin', () => { const schemaObject = z.object(schema); expect(schemaObject.safeParse({}).success).toBe(true); + expect(schemaObject.safeParse({ sinceScreenHash: 'screen-hash' }).success).toBe(true); const withSimId = schemaObject.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', @@ -69,7 +79,9 @@ describe('Snapshot UI Plugin', () => { return mockExecutor(...args); }; - const result = await runLogic(() => + __resetRuntimeSnapshotStoreForTests(); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic( { simulatorId: '12345678-1234-4234-8234-123456789012', @@ -86,18 +98,613 @@ describe('Snapshot UI Plugin', () => { { env: {} }, ]); - expect(result.isError).toBeFalsy(); - const text = allText(result); - expect(text).toContain('Accessibility hierarchy retrieved successfully.'); - expect(text).toContain('Accessibility Hierarchy'); - expect(text).toContain('"type" : "Button"'); - expect(text).toContain('"width" : 50'); - expect(text).toContain('Use frame coordinates for tap/swipe'); - expect(result.nextStepParams).toEqual({ - snapshot_ui: { simulatorId: '12345678-1234-4234-8234-123456789012' }, - tap: { simulatorId: '12345678-1234-4234-8234-123456789012', x: 0, y: 0 }, - screenshot: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + expect(result.isError()).toBe(false); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + expect(ctx.structuredOutput?.result.kind).toBe('capture-result'); + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash: expect.any(String), + seq: 1, + elements: [ + expect.objectContaining({ + ref: 'e1', + role: 'button', + frame: { x: 100, y: 200, width: 50, height: 30 }, + state: { enabled: true, visible: true }, + actions: expect.arrayContaining(['tap']), + }), + ], + }), + ); + expect( + capture && 'type' in capture && capture.type === 'runtime-snapshot' ? capture.actions : [], + ).toContainEqual({ action: 'tap', elementRef: 'e1' }); + expect( + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? 'rawNode' in capture.elements[0]! + : true, + ).toBe(false); + const storedSnapshot = getRuntimeSnapshot('12345678-1234-4234-8234-123456789012'); + expect(storedSnapshot?.payload).toBe(capture); + const elementRef = + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? capture.elements[0]?.ref + : undefined; + expect(ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Tap an elementRef', + tool: 'tap', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef, + }, + }, + { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + ]); + }); + + it('should return unchanged capture when sinceScreenHash matches the current screen hash', async () => { + const uiHierarchy = + '{"elements": [{"type": "Button", "frame": {"x": 100, "y": 200, "width": 50, "height": 30}}]}'; + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const first = createMockToolHandlerContext(); + await first.run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + const firstCapture = + first.ctx.structuredOutput?.result.kind === 'capture-result' + ? first.ctx.structuredOutput.result.capture + : undefined; + const screenHash = + firstCapture && 'screenHash' in firstCapture ? firstCapture.screenHash : undefined; + expect(screenHash).toEqual(expect.any(String)); + + const second = createMockToolHandlerContext(); + await second.run(() => + snapshot_uiLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + sinceScreenHash: screenHash, + }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + second.ctx.structuredOutput?.result.kind === 'capture-result' + ? second.ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual({ + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash, + seq: 2, }); + expect(getRuntimeSnapshot('12345678-1234-4234-8234-123456789012')?.seq).toBe(2); + expect(second.ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); + }); + + it('should return full runtime snapshot when sinceScreenHash differs from the current screen hash', async () => { + const uiHierarchy = + '{"elements": [{"type": "Button", "frame": {"x": 100, "y": 200, "width": 50, "height": 30}}]}'; + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + sinceScreenHash: 'different-screen-hash', + }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash: expect.any(String), + seq: 1, + elements: [expect.objectContaining({ ref: 'e1' })], + }), + ); + }); + + it('should omit tap next-step guidance when no tap targets exist', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Loading weather...', + frame: { x: 20, y: 100, width: 200, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + ]); + }); + + it('should prefer a non-text-field tap target in next steps', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 40, width: 200, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Submit', + frame: { x: 20, y: 100, width: 100, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should prefer a useful digit over calculator utility controls for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'C', + frame: { x: 20, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Âą', + frame: { x: 100, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '%', + frame: { x: 180, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '7', + frame: { x: 20, y: 120, width: 70, height: 70 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should prefer an unselected segmented choice over an already-selected choice for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: '°F', + AXValue: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '°C', + AXValue: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should skip low-value controls for tap next-step guidance when another tap target exists', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 150, y: 10, width: 80, height: 20 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 300, y: 40, width: 60, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Clear search', + frame: { x: 30, y: 90, width: 120, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Berlin, Germany', + frame: { x: 20, y: 150, width: 320, height: 80 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should not prefer destructive controls for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + AXIdentifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 140, width: 300, height: 80 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should not suggest the sheet grabber as a tap next step', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 150, y: 10, width: 80, height: 20 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 300, y: 40, width: 60, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should prefer content-rich targets for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland', + AXIdentifier: 'weather.locationButton', + frame: { x: 20, y: 40, width: 160, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + AXIdentifier: 'weather.settingsButton', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'PRECIP., 78%, Next 24 hours', + AXIdentifier: 'weather.precipitationCard', + frame: { x: 20, y: 260, width: 340, height: 140 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e3', + }); + }); + + it('should clear runtime snapshot store when AXe output cannot be parsed', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + + const invalidJsonExecutor = createMockExecutor({ + success: true, + output: 'not json', + error: undefined, + process: { pid: 12345 }, + }); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic({ simulatorId }, invalidJsonExecutor, mockAxeHelpers)); + + expect(result.isError()).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError + : undefined, + ).toEqual( + expect.objectContaining({ + code: 'SNAPSHOT_PARSE_FAILED', + }), + ); }); it('should handle DependencyError when axe is not available', async () => { diff --git a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts index 5fe3fb62c..c85265a46 100644 --- a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts @@ -1,228 +1,156 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; -import { SystemError } from '../../../../utils/errors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; - -import { schema, handler, type AxeHelpers, swipeLogic, type SwipeParams } from '../swipe.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; - -function createMockAxeHelpers(): AxeHelpers { - return { - getAxePath: () => '/mocked/axe/path', - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; -} - -function createMockAxeHelpersWithNullPath(): AxeHelpers { - return { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { schema, handler, swipeLogic } from '../swipe.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runSwipe( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => swipeLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Swipe Tool', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes withinElementRef and direction without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('withinElementRef'); + expect(schema).toHaveProperty('direction'); + expect(schema).not.toHaveProperty('x1'); + expect(schema).not.toHaveProperty('y1'); + expect(schema).not.toHaveProperty('x2'); + expect(schema).not.toHaveProperty('y2'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - + expect(schemaObject.safeParse({ withinElementRef: 'e1', direction: 'up' }).success).toBe( + true, + ); + expect( + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'diagonal' }).success, + ).toBe(false); + expect(schemaObject.safeParse({ direction: 'up' }).success).toBe(false); + expect(schemaObject.safeParse({ withinElementRef: 'e1' }).success).toBe(false); expect( schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, + withinElementRef: 'e1', + direction: 'down', + duration: 1.5, + distance: 10, + preDelay: 0.5, + postDelay: 0.25, }).success, ).toBe(true); - expect( - schemaObject.safeParse({ - x1: 100.5, - y1: 200, - x2: 300, - y2: 400, - }).success, + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', duration: 0 }).success, ).toBe(false); - expect( - schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: -1, - }).success, + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', distance: 0 }).success, ).toBe(false); - expect( - schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: 1.5, - delta: 10, - preDelay: 0.5, - postDelay: 0.2, - }).success, - ).toBe(true); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', preDelay: 10.1 }) + .success, + ).toBe(false); }); }); describe('Command Generation', () => { - it('should generate correct axe command for basic swipe', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/mocked/axe/path', - 'swipe', - '--start-x', - '100', - '--start-y', - '200', - '--end-x', - '300', - '--end-y', - '400', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('derives safe upward swipe points within the referenced element', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), ]); - }); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command for swipe with duration', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 50, - y1: 75, - x2: 250, - y2: 350, - duration: 1.5, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(capturedCommand).toEqual([ + expect(result).toMatchObject({ + didError: false, + action: { type: 'swipe', withinElementRef: 'e1', direction: 'up' }, + }); + expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', - '50', + '100', '--start-y', - '75', + '340', '--end-x', - '250', + '100', '--end-y', - '350', - '--duration', - '1.5', + '60', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for swipe with all optional parameters', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 0, - y1: 0, - x2: 500, - y2: 800, - duration: 2.0, - delta: 10, - preDelay: 0.5, - postDelay: 0.3, - }, - trackingExecutor, - mockAxeHelpers, - ), + it('preserves optional AXe swipe flags', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runSwipe( + { + simulatorId, + withinElementRef: 'e1', + direction: 'right', + duration: 2, + distance: 10, + preDelay: 0.5, + postDelay: 0.25, + }, + executor, ); - expect(capturedCommand).toEqual([ + expect(result.action).toMatchObject({ + type: 'swipe', + withinElementRef: 'e1', + direction: 'right', + durationSeconds: 2, + }); + expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', - '0', + '30', '--start-y', - '0', + '200', '--end-x', - '500', + '170', '--end-y', - '800', + '200', '--duration', '2', '--delta', @@ -230,286 +158,148 @@ describe('Swipe Tool', () => { '--pre-delay', '0.5', '--post-delay', - '0.3', - '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); - - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - swipeLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - x1: 150, - y1: 250, - x2: 400, - y2: 600, - delta: 5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', - 'swipe', - '--start-x', - '150', - '--start-y', - '250', - '--end-x', - '400', - '--end-y', - '600', - '--delta', - '5', + '0.25', '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + simulatorId, ]); }); }); - describe('Handler Behavior', () => { - it('should return error for missing simulatorId via handler', async () => { - const result = await callHandler(handler, { x1: 100, y1: 200, x2: 300, y2: 400 }); - - expect(result.isError).toBe(true); - expect(result.content[0].type).toBe('text'); - expect(allText(result)).toContain('Missing required session defaults'); - expect(allText(result)).toContain('simulatorId is required'); - expect(allText(result)).toContain('session-set-defaults'); - }); + describe('Resolution failures', () => { + it('returns TARGET_NOT_ACTIONABLE without calling AXe when the frame is too small', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 1, height: 1 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - it('should return validation error for missing x1 once simulator default exists', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, + ); - const result = await callHandler(handler, { - y1: 200, - x2: 300, - y2: 400, + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); - - expect(result.isError).toBe(true); - expect(result.content[0].type).toBe('text'); - expect(allText(result)).toContain('Parameter validation failed'); - expect(allText(result)).toContain('x1: Invalid input: expected number, received undefined'); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); - it('should return success for valid swipe execution', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', - }); - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + it('returns TARGET_NOT_ACTIONABLE without calling AXe when derived swipe points are degenerate', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 2, height: 100 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Swipe from (100, 200) to (300, 400) simulated successfully.', + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'right' }, + executor, ); - }); - it('should return success for swipe with duration', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(calls).toEqual([]); + }); + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: 1.5, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Swipe from (100, 200) to (300, 400) duration=1.5s simulated successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot( + [createNode({ type: 'ScrollView', role: 'AXScrollArea' })], + Date.now() - 61_000, + ); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e404', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate swipe.'); - expect(text).toContain('axe command failed'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle SystemError from command execution', async () => { - // Override the executor to throw SystemError for this test - const systemErrorExecutor = async () => { - throw new SystemError('System error occurred'); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - systemErrorExecutor, - mockAxeHelpers, - ), + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ type: 'Button', role: 'AXButton' })]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: System error occurred'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle unexpected Error objects', async () => { - // Override the executor to throw an unexpected Error for this test - const unexpectedErrorExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - unexpectedErrorExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { withinElementRef: 'e1', direction: 'up' }); expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: Unexpected error'); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected string errors', async () => { - // Override the executor to throw a string error for this test - const stringErrorExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - stringErrorExecutor, - mockAxeHelpers, - ), + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + createFailingExecutor('swipe failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/tap.test.ts b/src/mcp/tools/ui-automation/__tests__/tap.test.ts index 062933648..0bb82a9ab 100644 --- a/src/mcp/tools/ui-automation/__tests__/tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/tap.test.ts @@ -1,198 +1,79 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; - -import { schema, handler, type AxeHelpers, tapLogic } from '../tap.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; - -function createMockAxeHelpers(): AxeHelpers { - return { - getAxePath: () => '/mocked/axe/path', - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; -} - -function createMockAxeHelpersWithNullPath(): AxeHelpers { - return { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { schema, handler, tapLogic } from '../tap.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createSequencedExecutor, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runTap( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => tapLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Tap Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes elementRef-only targeting fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); + expect(schema).not.toHaveProperty('id'); + expect(schema).not.toHaveProperty('label'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - - expect(schemaObject.safeParse({ x: 100, y: 200 }).success).toBe(true); - - expect(schemaObject.safeParse({ id: 'loginButton' }).success).toBe(true); - - expect(schemaObject.safeParse({ label: 'Log in' }).success).toBe(true); - - expect(schemaObject.safeParse({ x: 100, y: 200, id: 'loginButton' }).success).toBe(true); - + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(true); + expect(schemaObject.safeParse({}).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: '' }).success).toBe(false); expect( - schemaObject.safeParse({ x: 100, y: 200, id: 'loginButton', label: 'Log in' }).success, + schemaObject.safeParse({ elementRef: 'e1', preDelay: 0.5, postDelay: 1 }).success, ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - preDelay: 0.5, - postDelay: 1, - }).success, - ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 3.14, - y: 200, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 3.14, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - preDelay: -1, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - postDelay: -1, - }).success, - ).toBe(false); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', preDelay: 10.1 }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', postDelay: 10.1 }).success).toBe(false); }); }); describe('Command Generation', () => { - let callHistory: Array<{ - command: string[]; - logPrefix?: string; - useShell?: boolean; - opts?: { env?: Record; cwd?: string }; - }>; - - beforeEach(() => { - callHistory = []; - }); + it('uses AXe id targeting when the referenced element has an identifier', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command with minimal parameters', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ - '/mocked/axe/path', - 'tap', - '-x', - '100', - '-y', - '200', - '--udid', - '12345678-1234-4234-8234-123456789012', - ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); - }); - - it('should generate correct axe command with element id target', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - id: 'loginButton', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ + expect(result).toMatchObject({ didError: false, action: { type: 'tap', elementRef: 'e1' } }); + expect(calls).toHaveLength(1); + expect(calls[0]).toEqual({ command: [ '/mocked/axe/path', 'tap', '--id', - 'loginButton', + 'continue-button', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], logPrefix: '[AXe]: tap', useShell: false, @@ -200,515 +81,355 @@ describe('Tap Plugin', () => { }); }); - it('should generate correct axe command with element label target', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); + it('clears the cached runtime snapshot after a successful tap', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { executor } = createTrackingExecutor(); - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - label: 'Log in', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ - '/mocked/axe/path', - 'tap', - '--label', - 'Log in', - '--udid', - '12345678-1234-4234-8234-123456789012', - ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + expect(result.didError).toBe(false); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); - it('should prefer coordinates over id/label when both are provided', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 120, - y: 240, - id: 'loginButton', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('includes element type when tapping a referenced element with a shared identifier', async () => { + recordSnapshot([ + createNode({ + type: 'Group', + role: 'AXGroup', + AXUniqueId: 'shared-action', + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXUniqueId: 'shared-action', + AXLabel: 'Continue', + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '120', - '-y', - '240', + '--id', + 'shared-action', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ]); }); - it('should generate correct axe command with pre-delay', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 300, - preDelay: 0.5, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + it('uses coordinates immediately when the snapshot already has duplicate selector matches', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + AXUniqueId: 'trash', + AXLabel: 'Remove', + }), + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 300, y: 400, width: 50, height: 80 }, + AXUniqueId: 'trash', + AXLabel: 'Remove', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '325', '-y', '440', '--udid', simulatorId], + ]); + }); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('falls back to the resolved center when selector tap is ambiguous', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'shared-action', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Multiple accessibility elements matched selector' }, + { success: true, output: 'tapped by coordinate' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '150', - '-y', - '300', - '--pre-delay', - '0.5', + '--id', + 'shared-action', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - it('should generate correct axe command with post-delay', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 250, - y: 400, - postDelay: 1.0, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('falls back to the resolved center when selector tap reports a parenthesized match count', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'weather.locationsSheet', + AXLabel: 'Clear search', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { + success: false, + error: + "Multiple (2) accessibility elements matched --id 'weather.locationsSheet'. No tap performed.", + }, + { success: true, output: 'tapped by coordinate' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '250', - '-y', - '400', - '--post-delay', - '1', + '--id', + 'weather.locationsSheet', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - it('should generate correct axe command with both delays', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 350, - y: 500, - preDelay: 0.3, - postDelay: 0.7, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('falls back to the resolved center when selector tap reports no match', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: undefined, + AXIdentifier: undefined, + AXLabel: 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { + success: false, + error: + "No accessibility element matched --label 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°'. No tap performed.", + }, + { success: true, output: 'tapped by coordinate' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '350', - '-y', - '500', - '--pre-delay', - '0.3', - '--post-delay', - '0.7', + '--label', + 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - }); - describe('Plugin Handler Validation', () => { - it('should require simulatorId session default when not provided', async () => { - const result = await callHandler(handler, { - x: 100, - y: 200, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); + it('does not fall back for unrelated failures that mention multiple', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'shared-action', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Failed after multiple retry attempts' }, + { success: true, output: 'should not run' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(true); + expect(calls).toHaveLength(1); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'tap', + '--id', + 'shared-action', + '--element-type', + 'Button', + '--udid', + simulatorId, + ]); }); - it('should return validation error for missing x coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { - y: 200, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: X coordinate is required when y is provided.'); + it('falls back to the referenced element center when no identifier exists', async () => { + recordSnapshot([ + createNode({ frame: { x: 10, y: 20, width: 100, height: 40 }, AXLabel: undefined }), + ]); + const { calls, executor } = createTrackingExecutor(); + + await runTap({ simulatorId, elementRef: 'e1', preDelay: 0.25, postDelay: 0.5 }, executor); + + expect(calls).toHaveLength(1); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'tap', + '-x', + '60', + '-y', + '40', + '--pre-delay', + '0.25', + '--post-delay', + '0.5', + '--udid', + simulatorId, + ]); }); - it('should return validation error for missing y coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { - x: 100, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('y: Y coordinate is required when x is provided.'); + it('uses a touch down/up activation for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + AXLabel: 'Reduce transparency', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'touch', + '-x', + '307', + '-y', + '903', + '--down', + '--up', + '--udid', + simulatorId, + ]); }); + }); - it('should return validation error when both id and label are provided without coordinates', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await callHandler(handler, { - id: 'loginButton', - label: 'Log in', - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('id: Provide either id or label, not both.'); - }); + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - it('should return validation error for non-integer x coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - const result = await callHandler(handler, { - x: 3.14, - y: 200, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: X coordinate must be an integer'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return validation error for non-integer y coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const result = await callHandler(handler, { - x: 100, - y: 3.14, - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('y: Y coordinate must be an integer'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should return validation error for negative preDelay', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const result = await callHandler(handler, { - x: 100, - y: 200, - preDelay: -1, - }); + const result = await runTap({ simulatorId, elementRef: 'e404' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('preDelay: Pre-delay must be non-negative'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should return validation error for negative postDelay', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ enabled: false })]); + const { calls, executor } = createTrackingExecutor(); - const result = await callHandler(handler, { - x: 100, - y: 200, - postDelay: -1, - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('postDelay: Post-delay must be non-negative'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should return DependencyError when axe binary is not found', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - error: undefined, - }); - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - preDelay: 0.5, - postDelay: 1.0, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default before validation', async () => { + const result = await callHandler(handler, { elementRef: 'e1' }); expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle DependencyError when axe binary not found (second test)', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Coordinates out of bounds', - }); + it('returns UI_STATE_CHANGED when identifier-based AXe tap fails after ref resolution', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTap( + { simulatorId, elementRef: 'e1' }, + createFailingExecutor('element not found'), ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (third test)', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'System error occurred', + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'UI_STATE_CHANGED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); - it('should handle DependencyError when axe binary not found (fourth test)', async () => { - const mockExecutor = async () => { - throw new Error('ENOENT: no such file or directory'); - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + it('returns ACTION_FAILED when coordinate-based AXe tap fails after ref resolution', async () => { + recordSnapshot([createNode({ AXLabel: undefined })]); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (fifth test)', async () => { - const mockExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (sixth test)', async () => { - const mockExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTap( + { simulatorId, elementRef: 'e1' }, + createFailingExecutor('tap failed'), ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/touch.test.ts b/src/mcp/tools/ui-automation/__tests__/touch.test.ts index 2e89f730a..bb440fd61 100644 --- a/src/mcp/tools/ui-automation/__tests__/touch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/touch.test.ts @@ -1,657 +1,238 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, touchLogic } from '../touch.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runTouch( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => touchLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} describe('Touch Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes elementRef and touch flags without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('down'); + expect(schema).toHaveProperty('up'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); - it('should validate schema fields with safeParse', () => { - const schemaObj = z.object(schema); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - down: true, - }).success, - ).toBe(true); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - up: true, - }).success, - ).toBe(true); - - expect( - schemaObj.safeParse({ - x: 100.5, - y: 200, - down: true, - }).success, - ).toBe(false); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200.5, - down: true, - }).success, - ).toBe(false); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - down: true, - delay: -1, - }).success, - ).toBe(false); - - const withSimId = schemaObj.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', up: true }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true, delay: -1 }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true, delay: 10.1 }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ down: true }).success).toBe(false); }); }); - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await callHandler(handler, { - x: 100, - y: 200, - down: true, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); + describe('Command Generation', () => { + it('touches down at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - it('should surface parameter validation errors when defaults exist', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - const result = await callHandler(handler, { - y: 200, - down: true, + expect(result).toMatchObject({ + didError: false, + action: { type: 'touch', elementRef: 'e1', event: 'touch down' }, }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: Invalid input: expected number, received undefined'); - }); - }); - - describe('Command Generation', () => { - it('should generate correct axe command for touch down', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '100', + '60', '-y', - '200', + '40', '--down', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for touch up', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 250, - up: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'touch', - '-x', - '150', - '-y', - '250', - '--up', - '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); + it('touches up at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command for touch down+up', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 300, - y: 400, - down: true, - up: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + await runTouch({ simulatorId, elementRef: 'e1', up: true }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '300', + '60', '-y', - '400', - '--down', + '40', '--up', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for touch with delay', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 50, - y: 75, - down: true, - up: true, - delay: 1.5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + it('touches down and up with delay at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); + + await runTouch({ simulatorId, elementRef: 'e1', down: true, up: true, delay: 1.5 }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '50', + '60', '-y', - '75', + '40', '--down', '--up', '--delay', '1.5', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - x: 0, - y: 0, - up: true, - delay: 0.5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + it('uses the switch activation point for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', + await runTouch({ simulatorId, elementRef: 'e1', down: true, up: true }, executor); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '0', + '307', '-y', - '0', + '903', + '--down', '--up', - '--delay', - '0.5', '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + simulatorId, ]); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should handle axe dependency error', async () => { - const mockExecutor = createMockExecutor({ success: true }); - const mockAxeHelpers = { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Resolution failures', () => { + it('keeps down/up validation before snapshot resolution', async () => { + const { calls, executor } = createTrackingExecutor(); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); + const result = await runTouch({ simulatorId, elementRef: 'e1' }, executor); - it('should successfully perform touch down', async () => { - const mockExecutor = createMockExecutor({ success: true, output: 'Touch down completed' }); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.error).toBe('At least one of "down" or "up" must be true'); + expect(result.action).toEqual({ type: 'touch', elementRef: 'e1' }); + expect(result.uiError).toBeUndefined(); + expect(calls).toEqual([]); }); - it('should successfully perform touch up', async () => { - const mockExecutor = createMockExecutor({ success: true, output: 'Touch up completed' }); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch up) at (100, 200) executed successfully.', - ); - }); + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - it('should return error when neither down nor up is specified', async () => { - const mockExecutor = createMockExecutor({ success: true }); - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBe(true); - expect(allText(result)).toContain('At least one of "down" or "up" must be true'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return success for touch down event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should return success for touch up event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e404', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch up) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should return success for touch down+up event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ role: 'AXApplication', type: 'Application' })]); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down+up) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ success: true }); - - const mockAxeHelpers = { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('rejects delay unless both down and up are true before AXe runs', async () => { + const result = await callHandler(handler, { simulatorId, elementRef: 'e1', down: true, delay: 1 }); expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), + expect(result.content[0].text).toContain( + 'Delay can only be used when both down and up are true', ); - - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to execute touch event.'); - expect(text).toContain('axe command failed'); }); - it('should handle SystemError from command execution', async () => { - const mockExecutor = async () => { - throw new Error('System error occurred'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', down: true }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected Error objects', async () => { - const mockExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - }); + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode()]); - it('should handle unexpected string errors', async () => { - const mockExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTouch( + { simulatorId, elementRef: 'e1', down: true }, + createFailingExecutor('touch failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts index 18f481bd5..e7a01fc4c 100644 --- a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts @@ -1,481 +1,454 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { - createMockExecutor, - createNoopExecutor, - mockProcess, -} from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, type_textLogic } from '../type_text.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic, callHandler } from '../../../../test-utils/test-helpers.ts'; - -// Mock axe helpers for dependency injection -function createMockAxeHelpers( - overrides: { - getAxePathReturn?: string | null; - getBundledAxeEnvironmentReturn?: Record; - } = {}, -) { - return { - getAxePath: () => - overrides.getAxePathReturn !== undefined ? overrides.getAxePathReturn : '/usr/local/bin/axe', - getBundledAxeEnvironment: () => overrides.getBundledAxeEnvironmentReturn ?? {}, - }; -} - -// Mock executor that tracks rejections for testing -function createRejectingExecutor(error: any) { - return async () => { - throw error; - }; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runTypeText( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => type_textLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Type Text Tool', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('requires elementRef and text', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('text'); + expect(schema).toHaveProperty('replaceExisting'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - + expect(schemaObject.safeParse({ elementRef: 'e1', text: 'Hello World' }).success).toBe(true); expect( - schemaObject.safeParse({ - text: 'Hello World', - }).success, + schemaObject.safeParse({ elementRef: 'e1', text: 'Hello World', replaceExisting: true }) + .success, ).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', text: '' }).success).toBe(false); + expect(schemaObject.safeParse({ text: 'Hello World' }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(false); + }); + }); - expect( - schemaObject.safeParse({ - text: '', - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - text: 123, - }).success, - ).toBe(false); + describe('Command Generation', () => { + it('focuses the referenced text field by identifier, then types text', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Email', + AXUniqueId: 'email-field', + }), + ]); + const { calls, executor } = createTrackingExecutor(); - expect(schemaObject.safeParse({}).success).toBe(false); + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'user@example.com' }, + executor, + ); - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', + expect(result).toMatchObject({ + didError: false, + action: { type: 'type-text', elementRef: 'e1', textLength: 16 }, }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'email-field', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'user@example.com', '--udid', simulatorId], + ]); }); - }); - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await callHandler(handler, { text: 'Hello' }); + it('types all AXe-supported US keyboard punctuation characters', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + const text = 'Az09 !@#$%^&*()_+-={}[]|\\:";\'<>?,./`~'; - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text }, executor); - it('should surface validation errors when defaults exist', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + expect(result).toMatchObject({ + didError: false, + action: { type: 'type-text', elementRef: 'e1', textLength: text.length }, + }); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--label', + 'Search', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', text, '--udid', simulatorId], + ]); + }); - const result = await callHandler(handler, {}); + it('rejects unsupported AXe typing characters before focusing or typing', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + const text = 'Tokyo Reykjavík 42'; - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('text: Invalid input: expected string, received undefined'); - }); - }); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text }, executor); - describe('Command Generation', () => { - it('should generate correct axe command for basic text typing', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + message: expect.stringContaining('US keyboard characters'), + elementRef: 'e1', + recoveryHint: expect.stringContaining('US keyboard'), }); + expect(result.action).toEqual({ + type: 'type-text', + elementRef: 'e1', + textLength: text.length, + }); + expect(calls).toEqual([]); + expect(JSON.stringify(result)).not.toContain('Tokyo'); + expect(JSON.stringify(result)).not.toContain('Reykjavík'); + }); - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'Hello World', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('includes text field type when focusing a referenced field with a shared identifier', async () => { + recordSnapshot([ + createNode({ + type: 'Group', + role: 'AXGroup', + AXUniqueId: 'locationSearchField', + children: [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXUniqueId: 'locationSearchField', + AXLabel: 'Search for a city', + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'locationSearchField', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for text with special characters', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('focuses by coordinates immediately when the snapshot already has duplicate selector matches', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'locationSearchField', + AXLabel: 'Search', + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 40, y: 200, width: 180, height: 40 }, + AXUniqueId: 'locationSearchField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'user@example.com', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'user@example.com', - '--udid', - '12345678-1234-4234-8234-123456789012', + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '130', '-y', '220', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for text with numbers and symbols', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Password123!@#', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + it('falls back to the resolved center when selector focus is ambiguous', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'locationSearchField', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Multiple 2 accessibility elements matched selector' }, + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ]); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'Password123!@#', - '--udid', - '12345678-1234-4234-8234-123456789012', + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'London' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'locationSearchField', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for long text', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('falls back to the resolved center when selector focus reports no match', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: undefined, + AXIdentifier: undefined, + AXLabel: 'Search for a city', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { + success: false, + error: "No accessibility element matched --label 'Search for a city'. No tap performed.", + }, + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ]); - const longText = - 'This is a very long text that needs to be typed into the simulator for testing purposes.'; - - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: longText, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Portland' }, + executor, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - longText, - '--udid', - '12345678-1234-4234-8234-123456789012', + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--label', + 'Search for a city', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'Portland', '--udid', simulatorId], ]); }); - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/path/to/bundled/axe', - getBundledAxeEnvironmentReturn: { AXE_PATH: '/some/path' }, - }); + it('selects existing text before typing when replaceExisting is true', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXValue: 'Tokyo', + AXLabel: undefined, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - type_textLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - text: 'Test message', - }, - trackingExecutor, - mockAxeHelpers, - ), + await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Portland', replaceExisting: true }, + executor, ); - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', - 'type', - 'Test message', - '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--value', + 'Tokyo', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + [ + '/mocked/axe/path', + 'key-combo', + '--modifiers', + '227', + '--key', + '4', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'Portland', '--udid', simulatorId], ]); }); - }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should handle axe dependency error', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: null, - }); + it('focuses the referenced text field by center when no identifier exists', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXLabel: undefined, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - createNoopExecutor(), - mockAxeHelpers, - ), - ); + await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(calls.map((call) => call.command)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'Hello', '--udid', simulatorId], + ]); }); + }); - it('should successfully type text', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Text typed successfully', - error: undefined, - }); + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain('Text typing simulated successfully.'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return success for valid text typing', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Text typed successfully', - error: undefined, - }); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain('Text typing simulated successfully.'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe binary not found', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: null, - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - createNoopExecutor(), - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e404', text: 'Hello' }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle AxeError from command execution', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Text field not found', - }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ type: 'Button', role: 'AXButton' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate text typing.'); - expect(text).toContain('Text field not found'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle SystemError from command execution', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createRejectingExecutor(new Error('ENOENT: no such file or directory')); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', text: 'Hello' }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected Error objects', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('returns ACTION_FAILED when focusing the resolved field fails', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'focus failed' }, + ]); - const mockExecutor = createRejectingExecutor(new Error('Unexpected error')); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Secret123' }, + executor, ); - expect(result.isError).toBe(true); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(calls).toHaveLength(1); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(result.action).toEqual({ type: 'type-text', elementRef: 'e1', textLength: 9 }); }); - it('should handle unexpected string errors', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('returns ACTION_FAILED when typing fails after focus succeeds', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: 'focused' }, + { success: false, error: 'typing failed' }, + ]); - const mockExecutor = createRejectingExecutor('String error'); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Secret123' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(calls).toHaveLength(2); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(result.action).toEqual({ type: 'type-text', elementRef: 'e1', textLength: 9 }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts new file mode 100644 index 000000000..b5ab08e07 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts @@ -0,0 +1,88 @@ +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import type { CommandExecOptions, CommandExecutor } from '../../../../utils/execution/index.ts'; +import { mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { AxeHelpers } from '../shared/axe-command.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; + +export const simulatorId = '12345678-1234-4234-8234-123456789012'; + +export interface CapturedCommandCall { + command: string[]; + logPrefix?: string; + useShell?: boolean; + opts?: CommandExecOptions; +} + +export function createMockAxeHelpers( + overrides: { + getAxePathReturn?: string | null; + getBundledAxeEnvironmentReturn?: Record; + } = {}, +): AxeHelpers { + return { + getAxePath: () => + overrides.getAxePathReturn !== undefined ? overrides.getAxePathReturn : '/mocked/axe/path', + getBundledAxeEnvironment: () => + overrides.getBundledAxeEnvironmentReturn ?? { SOME_ENV: 'value' }, + }; +} + +export function createTrackingExecutor(): { + calls: CapturedCommandCall[]; + executor: CommandExecutor; +} { + const calls: CapturedCommandCall[] = []; + const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { + calls.push({ command, logPrefix, useShell, opts }); + return { success: true, output: 'ok', error: undefined, process: mockProcess }; + }; + + return { calls, executor }; +} + +export function createFailingExecutor(error: string): CommandExecutor { + return async () => ({ success: false, output: '', error, process: mockProcess }); +} + +export function createSequencedExecutor( + results: Array<{ success: boolean; output?: string; error?: string }>, +): { + calls: CapturedCommandCall[]; + executor: CommandExecutor; +} { + const calls: CapturedCommandCall[] = []; + let index = 0; + const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { + calls.push({ command, logPrefix, useShell, opts }); + const result = results[index] ?? results.at(-1) ?? { success: true }; + index += 1; + return { + success: result.success, + output: result.output ?? '', + error: result.error, + process: mockProcess, + }; + }; + + return { calls, executor }; +} + +export function createNode(overrides: Partial = {}): AccessibilityNode { + return { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + AXLabel: 'Continue', + ...overrides, + }; +} + +export function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} diff --git a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts new file mode 100644 index 000000000..13e9fcac2 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts @@ -0,0 +1,708 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import * as z from 'zod'; +import type { CaptureResultDomainResult } from '../../../../types/domain-results.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { handler, schema, wait_for_uiLogic } from '../wait_for_ui.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +function hierarchyJson(nodes: Array>): string { + return JSON.stringify({ elements: nodes }); +} + +function createTiming(startMs = 0): { + timing: { now: () => number; sleep: (durationMs: number) => Promise }; + getNow: () => number; +} { + let nowMs = startMs; + return { + timing: { + now: () => nowMs, + sleep: async (durationMs) => { + nowMs += durationMs; + }, + }, + getNow: () => nowMs, + }; +} + +async function runWaitForUi( + params: Parameters[0], + executor: CommandExecutor, + timing = createTiming().timing, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => wait_for_uiLogic(params, executor, createMockAxeHelpers(), undefined, timing)); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as CaptureResultDomainResult; +} + +function firstRuntimeLabel(result: CaptureResultDomainResult): string | undefined { + return result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' + ? result.capture.elements[0]?.label + : undefined; +} + +describe('Wait for UI Plugin', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes public selector fields without simulatorId in the public schema', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('predicate'); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('identifier'); + expect(schema).toHaveProperty('label'); + expect(schema).toHaveProperty('role'); + expect(schema).toHaveProperty('value'); + expect(schema).toHaveProperty('text'); + expect(schema).not.toHaveProperty('simulatorId'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ predicate: 'settled' }).success).toBe(true); + expect( + schemaObject.safeParse({ predicate: 'exists', identifier: 'continue-button' }).success, + ).toBe(true); + expect( + schemaObject.safeParse({ predicate: 'gone', label: 'Loading', role: 'text' }).success, + ).toBe(true); + expect(schemaObject.safeParse({ predicate: 'textContains', text: 'Ready' }).success).toBe( + true, + ); + }); + + it('requires simulatorId session default before validation', async () => { + const result = await callHandler(handler, { predicate: 'settled' }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + + it('requires textContains text through handler validation', async () => { + const result = await callHandler(handler, { + simulatorId, + predicate: 'textContains', + identifier: 'status', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('textContains waits require text'); + }); + + it('rejects whitespace-only text through handler validation', async () => { + const result = await callHandler(handler, { + simulatorId, + predicate: 'textContains', + identifier: 'status', + text: ' ', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('textContains waits require text'); + }); + + it('rejects text on non-textContains predicates instead of ignoring it', async () => { + const result = await callHandler(handler, { + simulatorId, + predicate: 'gone', + role: 'text', + text: 'Loading', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('text is only supported for textContains waits'); + }); + + it('rejects unknown fields instead of silently broadening wait selectors', async () => { + const result = await callHandler(handler, { + simulatorId, + predicate: 'textContains', + text: 'Portland', + selector: { role: 'button' }, + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Unrecognized key: "selector"'); + }); + }); + + it('uses the resolved simulatorId in next-step params', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + const { result, run } = createMockToolHandlerContext(); + + await run(() => + wait_for_uiLogic( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + createMockAxeHelpers(), + undefined, + createTiming().timing, + ), + ); + + expect(result.nextStepParams).toEqual({ + snapshot_ui: { simulatorId }, + wait_for_ui: { simulatorId, predicate: 'settled' }, + }); + }); + + it('converts elementRef to identifier before polling', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button', AXLabel: 'Continue' })], 0); + const { calls, executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXUniqueId: 'continue-button', AXLabel: 'Continue now' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + screenHash: expect.any(String), + seq: 2, + elements: [expect.objectContaining({ ref: 'e1', identifier: 'continue-button' })], + }), + ); + expect(calls[0]?.command).toEqual(['/mocked/axe/path', 'describe-ui', '--udid', simulatorId]); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('converts elementRef to label plus role when no identifier exists', async () => { + recordSnapshot([createNode({ AXLabel: 'Continue', AXUniqueId: undefined })], 0); + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([createNode({ AXLabel: 'Continue', AXUniqueId: undefined })]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(firstRuntimeLabel(result)).toBe('Continue'); + }); + + it('converts elementRef to value plus role when no identifier or label exists', async () => { + recordSnapshot( + [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + title: null, + help: null, + AXValue: 'Email', + AXUniqueId: undefined, + }), + ], + 0, + ); + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + title: null, + help: null, + AXValue: 'Email', + AXUniqueId: undefined, + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('rejects elementRef without a stable identifier, label, or value selector', async () => { + recordSnapshot( + [ + createNode({ + AXLabel: null, + title: null, + help: null, + AXValue: null, + AXUniqueId: undefined, + }), + ], + 0, + ); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode()]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_FOUND', elementRef: 'e1' }); + expect(calls).toEqual([]); + }); + + it('matches explicit selector fields by exact AND', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Submit', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Submit', role: 'AXButton', type: 'Button' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', label: 'Submit', role: 'button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('allows multiple matches for exists', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', label: 'Duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('succeeds for gone when selector count is zero', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', label: 'Loading', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + }); + + it('returns TARGET_AMBIGUOUS when focused selector matches multiple elements', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', label: 'Duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: expect.arrayContaining([ + expect.objectContaining({ label: 'Duplicate' }), + expect.objectContaining({ label: 'Duplicate' }), + ]), + }); + }); + + it('returns TARGET_NOT_ACTIONABLE when focused state is unavailable', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + AXUniqueId: 'email-field', + role: 'AXTextField', + type: 'TextField', + AXLabel: null, + AXValue: 'hello@example.com', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', identifier: 'email-field', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + message: 'The matched runtime UI element does not expose focus state.', + candidates: [expect.objectContaining({ identifier: 'email-field' })], + }); + }); + + it('succeeds for focused when the matched element is focused', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + AXUniqueId: 'email-field', + role: 'AXTextField', + type: 'TextField', + AXLabel: null, + AXValue: 'hello@example.com', + AXFocused: true, + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', identifier: 'email-field', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('times out with latest snapshot and candidates for unresolved enabled state', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([createNode({ AXUniqueId: 'login-button', enabled: false })]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', identifier: 'login-button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + timeoutMs: 0, + candidates: [expect.objectContaining({ identifier: 'login-button' })], + }); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('includes empty candidates and exact-match guidance for selector timeouts with zero matches', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXUniqueId: 'other-button' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', identifier: 'missing-button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + candidates: [], + recoveryHint: + 'Selector fields match exact values. Use textContains for partial visible text, inspect the latest runtime snapshot, or adjust the wait selector.', + }); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + }); + + it('checks textContains against normalized case-insensitive value before label', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXUniqueId: 'status', AXLabel: 'Loading', AXValue: 'Server Ready' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { + simulatorId, + predicate: 'textContains', + identifier: 'status', + text: 'server ready', + timeoutMs: 0, + }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('narrows selector matches by text before treating textContains as ambiguous', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Close', role: 'AXButton', type: 'Button' }), + createNode({ + AXLabel: 'Lisbon, Portugal, 9:24 PM ¡ Sunny', + role: 'AXButton', + type: 'Button', + }), + createNode({ AXLabel: 'Clear search', role: 'AXButton', type: 'Button' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'button', text: 'Lisbon', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('returns TARGET_AMBIGUOUS for textContains when selector plus text still matches multiple elements', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Lisbon saved', role: 'AXButton', type: 'Button' }), + createNode({ AXLabel: 'Lisbon details', role: 'AXButton', type: 'Button' }), + createNode({ AXLabel: 'Lisbon', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'button', text: 'Lisbon', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: [ + expect.objectContaining({ label: 'Lisbon saved' }), + expect.objectContaining({ label: 'Lisbon details' }), + ], + }); + }); + + it('supports selector-free textContains when exactly one element matches', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Header' }), + createNode({ AXLabel: 'Light rain is expected around 2 PM.' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'Light rain', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + expect(result.waitMatch).toMatchObject({ + predicate: 'textContains', + matches: [expect.objectContaining({ label: 'Light rain is expected around 2 PM.' })], + }); + }); + + it('succeeds for selector-free textContains when multiple candidates share matching visible text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'You just pressed the button!' }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + AXValue: 'You just pressed the button!', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'you just pressed', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toMatchObject({ + predicate: 'textContains', + matches: [ + expect.objectContaining({ label: 'You just pressed the button!' }), + expect.objectContaining({ value: 'You just pressed the button!' }), + ], + }); + }); + + it('succeeds for selector textContains when multiple candidates share matching visible text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate status', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Duplicate status', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'text', text: 'duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('succeeds for selector-free textContains when multiple candidates exactly match', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Hello from rs1' }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + AXValue: 'Hello from rs1', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'hello from rs1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('returns TARGET_AMBIGUOUS for selector-free textContains with mixed partial matches', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready' }), + createNode({ AXLabel: 'Ready now' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: [ + expect.objectContaining({ label: 'Ready' }), + expect.objectContaining({ label: 'Ready now' }), + ], + }); + }); + + it('clears the runtime store when every poll returns unparsable UI', async () => { + recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const { executor } = createSequencedExecutor([{ success: true, output: 'not json' }]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'settled', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_PARSE_FAILED'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + + it('waits until runtime snapshot element signatures remain settled', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Loading', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + ]); + const { timing, getNow } = createTiming(); + + const result = await runWaitForUi( + { + simulatorId, + predicate: 'settled', + timeoutMs: 500, + pollIntervalMs: 100, + settledDurationMs: 100, + }, + executor, + timing, + ); + + expect(result.didError).toBe(false); + expect(getNow()).toBe(200); + expect(firstRuntimeLabel(result)).toBe('Ready'); + }); +}); diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts new file mode 100644 index 000000000..0f5de5ff2 --- /dev/null +++ b/src/mcp/tools/ui-automation/batch.ts @@ -0,0 +1,142 @@ +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; +import { + createUiActionFailureResult, + createUiActionSuccessResult, + mapAxeCommandError, + setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, +} from './shared/domain-result.ts'; + +const batchSchema = z.object({ + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + steps: z + .array(z.string().min(1, { message: 'steps must not contain empty values' })) + .min(1, { message: 'At least one batch step is required' }) + .max(100, { message: 'At most 100 batch steps are supported' }), + axCache: z.enum(['perBatch', 'perStep', 'none']).optional(), + typeSubmission: z.enum(['chunked', 'composite']).optional(), + typeChunkSize: z.number().int().min(1).optional(), + tapStyle: z.enum(['automatic', 'simulator', 'physical']).optional(), + continueOnError: z.boolean().optional(), + waitTimeout: z.number().min(0, { message: 'waitTimeout must be non-negative' }).optional(), + pollInterval: z.number().positive({ message: 'pollInterval must be greater than 0' }).optional(), +}); + +type BatchParams = z.infer; +type BatchResult = UiActionResultDomainResult; + +const LOG_PREFIX = '[AXe]'; + +function buildBatchCommandArgs(params: BatchParams): string[] { + const commandArgs = ['batch']; + for (const step of params.steps) { + commandArgs.push('--step', step); + } + if (params.axCache !== undefined) { + commandArgs.push('--ax-cache', params.axCache); + } + if (params.typeSubmission !== undefined) { + commandArgs.push('--type-submission', params.typeSubmission); + } + if (params.typeChunkSize !== undefined) { + commandArgs.push('--type-chunk-size', String(params.typeChunkSize)); + } + if (params.tapStyle !== undefined) { + commandArgs.push('--tap-style', params.tapStyle); + } + if (params.continueOnError === true) { + commandArgs.push('--continue-on-error'); + } + if (params.waitTimeout !== undefined) { + commandArgs.push('--wait-timeout', String(params.waitTimeout)); + } + if (params.pollInterval !== undefined) { + commandArgs.push('--poll-interval', String(params.pollInterval)); + } + return commandArgs; +} + +export function createBatchExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): NonStreamingExecutor { + return async (params) => { + const toolName = 'batch'; + const { simulatorId, steps } = params; + const action = { type: 'batch' as const, stepCount: steps.length }; + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); + } + + const commandArgs = buildBatchCommandArgs(params); + log('info', `${LOG_PREFIX}/${toolName}: Starting ${steps.length} step batch on ${simulatorId}`); + + try { + await executeAxeCommand(commandArgs, simulatorId, 'batch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); + log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to execute AXe batch with ${steps.length} steps.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message); + } + }; +} + +export async function batchLogic( + params: BatchParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): Promise { + const ctx = getHandlerContext(); + const executeBatch = createBatchExecutor(executor, axeHelpers, debuggerManager); + const result = await executeBatch(params); + + setUiActionStructuredOutput(ctx, result); +} + +const publicSchemaObject = z.strictObject(batchSchema.omit({ simulatorId: true } as const).shape); + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: batchSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(batchSchema), + logicFunction: (params: BatchParams, executor: CommandExecutor) => + batchLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/mcp/tools/ui-automation/button.ts b/src/mcp/tools/ui-automation/button.ts index 82ca07bbd..280ca0b07 100644 --- a/src/mcp/tools/ui-automation/button.ts +++ b/src/mcp/tools/ui-automation/button.ts @@ -12,6 +12,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -20,6 +21,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const buttonSchema = z.object({ @@ -29,7 +31,8 @@ const buttonSchema = z.object({ .describe('apple-pay|home|lock|side-button|siri'), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) + .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('seconds'), }); @@ -38,11 +41,19 @@ type ButtonParams = z.infer; type ButtonResult = UiActionResultDomainResult; const LOG_PREFIX = '[AXe]'; +const DEFAULT_BUTTON_SETTLE_DELAY_MS = 750; + +function delayMs(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} export function createButtonExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + settleDelayMs = DEFAULT_BUTTON_SETTLE_DELAY_MS, ): NonStreamingExecutor { return async (params) => { const toolName = 'button'; @@ -67,9 +78,16 @@ export function createButtonExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'button', executor, axeHelpers); + if (settleDelayMs > 0) { + await delayMs(settleDelayMs); + } + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to press button '${buttonType}'.`, }); @@ -86,9 +104,10 @@ export async function buttonLogic( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + settleDelayMs = DEFAULT_BUTTON_SETTLE_DELAY_MS, ): Promise { const ctx = getHandlerContext(); - const executeButton = createButtonExecutor(executor, axeHelpers, debuggerManager); + const executeButton = createButtonExecutor(executor, axeHelpers, debuggerManager, settleDelayMs); const result = await executeButton(params); setUiActionStructuredOutput(ctx, result); diff --git a/src/mcp/tools/ui-automation/gesture.ts b/src/mcp/tools/ui-automation/gesture.ts index 447074cb3..01a6848d0 100644 --- a/src/mcp/tools/ui-automation/gesture.ts +++ b/src/mcp/tools/ui-automation/gesture.ts @@ -19,6 +19,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -27,6 +28,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const gestureSchema = z.object({ @@ -49,6 +51,7 @@ const gestureSchema = z.object({ .number() .int() .min(1) + .max(2000) .optional() .describe( 'Screen width in pixels. Used for gesture calculations. Auto-detected if not provided.', @@ -57,28 +60,33 @@ const gestureSchema = z.object({ .number() .int() .min(1) + .max(3000) .optional() .describe( 'Screen height in pixels. Used for gesture calculations. Auto-detected if not provided.', ), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) + .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('Duration of the gesture in seconds.'), delta: z .number() - .min(0, { message: 'Delta must be non-negative' }) + .positive({ message: 'Delta must be greater than 0' }) + .max(200, { message: 'Delta must be at most 200' }) .optional() .describe('Distance to move in pixels.'), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('Delay before starting the gesture in seconds.'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('Delay after completing the gesture in seconds.'), }); @@ -132,9 +140,13 @@ export function createGestureExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'gesture', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to execute gesture '${preset}'.`, }); diff --git a/src/mcp/tools/ui-automation/key_press.ts b/src/mcp/tools/ui-automation/key_press.ts index 7c8afc647..b851d198a 100644 --- a/src/mcp/tools/ui-automation/key_press.ts +++ b/src/mcp/tools/ui-automation/key_press.ts @@ -12,6 +12,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -20,6 +21,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const keyPressSchema = z.object({ @@ -29,10 +31,11 @@ const keyPressSchema = z.object({ .int({ message: 'HID keycode to press (0-255)' }) .min(0) .max(255) - .describe('HID keycode'), + .describe('HID keycode. Common values: 40 Return/Enter, 42 Backspace, 43 Tab, 44 Space.'), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) + .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('seconds'), }); @@ -70,9 +73,13 @@ export function createKeyPressExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'key', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to simulate key press (code: ${keyCode}).`, }); diff --git a/src/mcp/tools/ui-automation/key_sequence.ts b/src/mcp/tools/ui-automation/key_sequence.ts index 95cafe611..998ad7f56 100644 --- a/src/mcp/tools/ui-automation/key_sequence.ts +++ b/src/mcp/tools/ui-automation/key_sequence.ts @@ -18,6 +18,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -26,6 +27,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const keySequenceSchema = z.object({ @@ -33,8 +35,13 @@ const keySequenceSchema = z.object({ keyCodes: z .array(z.number().int().min(0).max(255)) .min(1, { message: 'At least one key code required' }) - .describe('HID keycodes'), - delay: z.number().min(0, { message: 'Delay must be non-negative' }).optional(), + .max(100, { message: 'At most 100 key codes are supported' }) + .describe('HID keycodes. Common values: 40 Return/Enter, 42 Backspace, 43 Tab, 44 Space.'), + delay: z + .number() + .min(0, { message: 'Delay must be non-negative' }) + .max(5, { message: 'Delay must be at most 5 seconds' }) + .optional(), }); type KeySequenceParams = z.infer; @@ -73,9 +80,13 @@ export function createKeySequenceExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'key-sequence', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to execute key sequence.', }); diff --git a/src/mcp/tools/ui-automation/long_press.ts b/src/mcp/tools/ui-automation/long_press.ts index 4a202066d..38953c6e9 100644 --- a/src/mcp/tools/ui-automation/long_press.ts +++ b/src/mcp/tools/ui-automation/long_press.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Long Press * - * Long press at specific coordinates for given duration (ms). - * Use snapshot_ui for precise coordinates (don't guess from screenshots). + * Long presses a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,7 +17,8 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -26,17 +26,19 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const longPressSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z.number().int({ message: 'X coordinate for the long press' }), - y: z.number().int({ message: 'Y coordinate for the long press' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), duration: z .number() - .positive({ message: 'Duration of the long press in milliseconds' }) + .positive({ message: 'Duration must be greater than 0 milliseconds' }) + .max(10_000, { message: 'Duration must be at most 10000 milliseconds' }) .describe('milliseconds'), }); @@ -56,8 +58,15 @@ export function createLongPressExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'long_press'; - const { simulatorId, x, y, duration } = params; - const action = { type: 'long-press' as const, x, y, durationMs: duration }; + const { simulatorId, elementRef, duration } = params; + const action = { type: 'long-press' as const, elementRef, durationMs: duration }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'longPress'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -68,13 +77,14 @@ export function createLongPressExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const delayInSeconds = Number(duration) / 1000; + const center = getRuntimeElementActivationPoint(resolution.element); + const delayInSeconds = duration / 1000; const commandArgs = [ 'touch', '-x', - String(x), + String(center.x), '-y', - String(y), + String(center.y), '--down', '--up', '--delay', @@ -83,23 +93,29 @@ export function createLongPressExecutor( log( 'info', - `${LOG_PREFIX}/${toolName}: Starting for (${x}, ${y}), ${duration}ms on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting for elementRef ${elementRef}, ${duration}ms on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'touch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => `Failed to simulate long press at (${x}, ${y}).`, + axeFailureMessage: () => `Failed to simulate long press on elementRef ${elementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; diff --git a/src/mcp/tools/ui-automation/screenshot.ts b/src/mcp/tools/ui-automation/screenshot.ts index 426b5663d..945c269f9 100644 --- a/src/mcp/tools/ui-automation/screenshot.ts +++ b/src/mcp/tools/ui-automation/screenshot.ts @@ -60,6 +60,44 @@ interface SimctlDeviceList { devices: Record; } +async function getSimulatorDeviceForSimulatorId( + simulatorId: string, + executor: CommandExecutor, +): Promise { + const listCommand = ['xcrun', 'simctl', 'list', 'devices', '-j']; + const result = await executor(listCommand, `${LOG_PREFIX}: list devices`, false); + + if (!result.success || !result.output) { + return null; + } + + const data = JSON.parse(result.output) as SimctlDeviceList; + for (const devices of Object.values(data.devices)) { + const match = devices.find((device) => device.udid === simulatorId); + if (match) { + return match; + } + } + + return null; +} + +async function assertSimulatorBooted( + simulatorId: string, + executor: CommandExecutor, +): Promise { + const device = await getSimulatorDeviceForSimulatorId(simulatorId, executor); + if (!device) { + throw new SystemError(`Simulator ${simulatorId} was not found.`); + } + if (device.state !== 'Booted') { + throw new SystemError( + `Simulator ${simulatorId} is ${device.state ?? 'not booted'}. Boot the simulator and try again.`, + ); + } + return device; +} + function escapeSwiftStringLiteral(value: string): string { return value .replace(/\\/g, '\\\\') @@ -96,21 +134,10 @@ export async function getDeviceNameForSimulatorId( executor: CommandExecutor, ): Promise { try { - const listCommand = ['xcrun', 'simctl', 'list', 'devices', '-j']; - const result = await executor(listCommand, `${LOG_PREFIX}: list devices`, false); - - if (result.success && result.output) { - const data = JSON.parse(result.output) as SimctlDeviceList; - const devices = data.devices; - - for (const runtime of Object.keys(devices)) { - for (const device of devices[runtime]) { - if (device.udid === simulatorId) { - log('info', `${LOG_PREFIX}: Found device name "${device.name}" for ${simulatorId}`); - return device.name; - } - } - } + const device = await getSimulatorDeviceForSimulatorId(simulatorId, executor); + if (device) { + log('info', `${LOG_PREFIX}: Found device name "${device.name}" for ${simulatorId}`); + return device.name; } log('warn', `${LOG_PREFIX}: Could not find device name for ${simulatorId}`); return null; @@ -219,6 +246,7 @@ export function createScreenshotExecutor( ); try { + const simulatorDevice = await assertSimulatorBooted(simulatorId, executor); const result = await executor(commandArgs, `${LOG_PREFIX}: screenshot`, false); if (!result.success) { @@ -228,8 +256,7 @@ export function createScreenshotExecutor( log('info', `${LOG_PREFIX}/screenshot: Success for ${simulatorId}`); try { - const deviceName = await getDeviceNameForSimulatorId(simulatorId, executor); - const isLandscape = await detectLandscapeMode(executor, deviceName ?? undefined); + const isLandscape = await detectLandscapeMode(executor, simulatorDevice.name); if (isLandscape) { log('info', `${LOG_PREFIX}/screenshot: Landscape mode detected, rotating +90`); const rotated = await rotateImage(screenshotPath, 90, executor); diff --git a/src/mcp/tools/ui-automation/shared/domain-result.ts b/src/mcp/tools/ui-automation/shared/domain-result.ts index 0700a9896..b2168dfbc 100644 --- a/src/mcp/tools/ui-automation/shared/domain-result.ts +++ b/src/mcp/tools/ui-automation/shared/domain-result.ts @@ -1,4 +1,4 @@ -import type { ToolHandlerContext } from '../../../../rendering/types.ts'; +import type { RenderHints, ToolHandlerContext } from '../../../../rendering/types.ts'; import type { BasicDiagnostics, CapturePayload, @@ -6,12 +6,19 @@ import type { UiAction, UiActionResultDomainResult, } from '../../../../types/domain-results.ts'; +import type { + UiAutomationRecoverableError, + UiAutomationRecoverableErrorCode, + UiWaitMatch, +} from '../../../../types/ui-snapshot.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { createBasicDiagnostics } from '../../../../utils/diagnostics.ts'; import { AxeError, DependencyError, SystemError } from '../../../../utils/errors.ts'; const UI_ACTION_SCHEMA = 'xcodebuildmcp.output.ui-action-result'; const CAPTURE_SCHEMA = 'xcodebuildmcp.output.capture-result'; +const REFRESH_SNAPSHOT_RECOVERY_HINT = + 'Run snapshot_ui again and retry with a current element reference from the refreshed snapshot.'; function createDiagnostics( warnings: readonly string[] = [], @@ -28,10 +35,25 @@ function compact(values: Array): string[] { return values.filter((value): value is string => typeof value === 'string' && value.length > 0); } +export function createUiAutomationRecoverableError(params: { + code: UiAutomationRecoverableErrorCode; + message: string; + recoveryHint?: string; + elementRef?: string; +}): UiAutomationRecoverableError { + return { + code: params.code, + message: params.message, + recoveryHint: params.recoveryHint ?? REFRESH_SNAPSHOT_RECOVERY_HINT, + ...(params.elementRef ? { elementRef: params.elementRef } : {}), + }; +} + export function createUiActionSuccessResult( action: UiAction, simulatorId: string, warnings: Array = [], + options: { uiError?: UiAutomationRecoverableError } = {}, ): UiActionResultDomainResult { return { kind: 'ui-action-result', @@ -41,6 +63,7 @@ export function createUiActionSuccessResult( action, artifacts: { simulatorId }, diagnostics: createDiagnostics(compact(warnings), []), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -51,6 +74,7 @@ export function createUiActionFailureResult( options: { warnings?: Array; details?: Array; + uiError?: UiAutomationRecoverableError; } = {}, ): UiActionResultDomainResult { return { @@ -61,6 +85,7 @@ export function createUiActionFailureResult( action, artifacts: { simulatorId }, diagnostics: createDiagnostics(compact(options.warnings ?? []), compact(options.details ?? [])), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -70,6 +95,8 @@ export function createCaptureSuccessResult( screenshotPath?: string; capture?: CapturePayload; warnings?: Array; + uiError?: UiAutomationRecoverableError; + waitMatch?: UiWaitMatch; } = {}, ): CaptureResultDomainResult { return { @@ -83,6 +110,8 @@ export function createCaptureSuccessResult( }, ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(options.warnings ?? []), []), + ...(options.uiError ? { uiError: options.uiError } : {}), + ...(options.waitMatch ? { waitMatch: options.waitMatch } : {}), }; } @@ -91,8 +120,10 @@ export function createCaptureFailureResult( message: string, options: { screenshotPath?: string; + capture?: CapturePayload; warnings?: Array; details?: Array; + uiError?: UiAutomationRecoverableError; } = {}, ): CaptureResultDomainResult { return { @@ -104,7 +135,9 @@ export function createCaptureFailureResult( simulatorId, ...(options.screenshotPath ? { screenshotPath: options.screenshotPath } : {}), }, + ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(options.warnings ?? []), compact(options.details ?? [])), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -115,6 +148,10 @@ interface AxeErrorMessages { unexpectedFailureMessage?: (message: string) => string; } +export function shouldInvalidateRuntimeSnapshotAfterActionError(error: unknown): boolean { + return error instanceof AxeError; +} + export function mapAxeCommandError( error: unknown, messages: AxeErrorMessages, @@ -129,7 +166,7 @@ export function mapAxeCommandError( if (error instanceof AxeError) { return { message: messages.axeFailureMessage(error), - diagnostics: createDiagnostics([], compact([error.axeOutput || error.message])), + diagnostics: createDiagnostics([], compact([error.axeOutput ?? error.message])), }; } @@ -161,10 +198,12 @@ export function setUiActionStructuredOutput( export function setCaptureStructuredOutput( ctx: ToolHandlerContext, result: CaptureResultDomainResult, + renderHints?: RenderHints, ): void { ctx.structuredOutput = { result, schema: CAPTURE_SCHEMA, schemaVersion: '2', + ...(renderHints ? { renderHints } : {}), }; } diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts new file mode 100644 index 000000000..4adba639e --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -0,0 +1,701 @@ +import type { AccessibilityNode, Frame, Point } from '../../../../types/domain-results.ts'; +import type { + RuntimeActionHintV1, + RuntimeActionNameV1, + RuntimeElementRoleV1, + RuntimeElementStateV1, + RuntimeElementV1, + RuntimeSnapshotElementRecord, + RuntimeSnapshotRecord, + RuntimeSnapshotV1, +} from '../../../../types/ui-snapshot.ts'; + +export const RUNTIME_SNAPSHOT_PROTOCOL = 'rs/1' as const; +export const RUNTIME_SNAPSHOT_TTL_MS = 60_000; + +interface NormalizedNodeInput { + node: AccessibilityNode; + path: string; + depth: number; +} + +export class RuntimeSnapshotParseError extends Error { + constructor(message: string) { + super(message); + this.name = 'RuntimeSnapshotParseError'; + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function normalizeText(value: unknown): string | undefined { + if (typeof value !== 'string' && typeof value !== 'number' && typeof value !== 'boolean') { + return undefined; + } + + const normalized = String(value).replace(/\s+/g, ' ').trim(); + return normalized.length > 0 ? normalized : undefined; +} + +function readText(node: AccessibilityNode, keys: readonly string[]): string | undefined { + for (const key of keys) { + const value = normalizeText(node[key]); + if (value) { + return value; + } + } + return undefined; +} + +function isFiniteNumber(value: unknown): value is number { + return typeof value === 'number' && Number.isFinite(value); +} + +function normalizeFrame(frame: Frame): Frame { + return { + x: Number(frame.x.toFixed(2)), + y: Number(frame.y.toFixed(2)), + width: Number(frame.width.toFixed(2)), + height: Number(frame.height.toFixed(2)), + }; +} + +function readFrameObject(value: unknown): Frame | null { + if (!isRecord(value)) { + return null; + } + + const { x, y, width, height } = value; + if ( + !isFiniteNumber(x) || + !isFiniteNumber(y) || + !isFiniteNumber(width) || + !isFiniteNumber(height) + ) { + return null; + } + + return normalizeFrame({ x, y, width, height }); +} + +function parseAxFrame(value: unknown): Frame | null { + if (typeof value !== 'string') { + return null; + } + + const numbers = value.match(/-?\d+(?:\.\d+)?/g)?.map(Number) ?? []; + if (numbers.length < 4 || numbers.some((entry) => !Number.isFinite(entry))) { + return null; + } + + const [x = 0, y = 0, width = 0, height = 0] = numbers; + return normalizeFrame({ x, y, width, height }); +} + +function readFrame(node: AccessibilityNode): Frame { + return ( + readFrameObject(node.frame) ?? parseAxFrame(node.AXFrame) ?? { x: 0, y: 0, width: 0, height: 0 } + ); +} + +function deriveRole(node: AccessibilityNode): RuntimeElementRoleV1 | undefined { + const roleText = [node.role, node.type, node.subrole, node.role_description] + .map((value) => normalizeText(value)?.toLowerCase()) + .filter((value): value is string => value !== undefined) + .join(' '); + + if (roleText.length === 0) return undefined; + if (/application/.test(roleText)) return 'application'; + if (/window/.test(roleText)) return 'window'; + if (/button/.test(roleText)) return 'button'; + if (/keyboard|key/.test(roleText)) return 'keyboard-key'; + if ( + /textfield|text field|searchfield|search field|securetext|textarea|combo box/.test(roleText) + ) { + return 'text-field'; + } + if (/statictext|text/.test(roleText)) return 'text'; + if (/image/.test(roleText)) return 'image'; + if (/switch|checkbox|check box/.test(roleText)) return 'switch'; + if (/slider/.test(roleText)) return 'slider'; + if (/tab/.test(roleText)) return 'tab'; + if (/cell|row/.test(roleText)) return 'cell'; + if (/scroll/.test(roleText)) return 'scroll-view'; + if (/table|list|outline|collection/.test(roleText)) return 'list'; + if (/menu/.test(roleText)) return 'menu'; + return 'other'; +} + +function isVisible(frame: Frame): boolean { + return frame.width > 0 && frame.height > 0; +} + +function framesIntersect(a: Frame, b: Frame): boolean { + return a.x < b.x + b.width && a.x + a.width > b.x && a.y < b.y + b.height && a.y + a.height > b.y; +} + +function pointInsideFrame(point: Point, frame: Frame): boolean { + return ( + point.x >= frame.x && + point.x <= frame.x + frame.width && + point.y >= frame.y && + point.y <= frame.y + frame.height + ); +} + +function hasPointAction(actions: readonly RuntimeActionNameV1[]): boolean { + return actions.some( + (action) => + action === 'tap' || action === 'typeText' || action === 'longPress' || action === 'touch', + ); +} + +function isTapRole(role: RuntimeElementRoleV1 | undefined): boolean { + return ( + role === 'button' || + role === 'cell' || + role === 'keyboard-key' || + role === 'switch' || + role === 'tab' || + role === 'text-field' + ); +} + +function isGenericInternalIdentifier(identifier: string | undefined): boolean { + return identifier === 'label-view'; +} + +function deriveActions(params: { + role: RuntimeElementRoleV1 | undefined; + enabled: boolean; + frame: Frame; + customActions: readonly string[]; + hasSemanticIdentity: boolean; +}): RuntimeActionNameV1[] { + const { role, enabled, frame, customActions, hasSemanticIdentity } = params; + if (!enabled || !isVisible(frame)) { + return []; + } + + const actions = new Set(); + if (isTapRole(role) || (customActions.length > 0 && hasSemanticIdentity)) { + actions.add('tap'); + } + if (role === 'text-field') { + actions.add('typeText'); + } + if (role !== 'application' && role !== 'window') { + actions.add('longPress'); + actions.add('touch'); + } + if (role === 'scroll-view' || role === 'list' || role === 'cell') { + actions.add('swipeWithin'); + } + + return [...actions]; +} + +function hashString(input: string): string { + let hash = 0x811c9dc5; + for (let index = 0; index < input.length; index += 1) { + hash ^= input.charCodeAt(index); + hash = Math.imul(hash, 0x01000193) >>> 0; + } + return hash.toString(36).padStart(7, '0'); +} + +function readChildren(node: AccessibilityNode): AccessibilityNode[] { + return Array.isArray(node.children) ? node.children : []; +} + +function normalizeCustomActions(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.map(normalizeText).filter((entry): entry is string => entry !== undefined); +} + +function readState(node: AccessibilityNode, frame: Frame): RuntimeElementStateV1 | undefined { + const state: RuntimeElementStateV1 = { + enabled: node.enabled !== false, + visible: isVisible(frame), + }; + + if (typeof node.focused === 'boolean') { + state.focused = node.focused; + } else if (typeof node.AXFocused === 'boolean') { + state.focused = node.AXFocused; + } + + if (typeof node.selected === 'boolean') { + state.selected = node.selected; + } else if (typeof node.AXSelected === 'boolean') { + state.selected = node.AXSelected; + } + + return Object.keys(state).length > 0 ? state : undefined; +} + +function stableSignature(params: { + role?: RuntimeElementRoleV1; + label?: string; + value?: string; + identifier?: string; + path: string; + frame: Frame; +}): string { + return hashString(JSON.stringify(params)); +} + +function normalizeNode(input: NormalizedNodeInput, index: number): RuntimeSnapshotElementRecord { + const { node, path, depth } = input; + const ref = `e${index + 1}`; + const frame = readFrame(node); + const role = deriveRole(node); + const label = readText(node, ['AXLabel', 'title', 'help', 'label']); + const value = readText(node, ['AXValue', 'value']); + const identifier = readText(node, ['AXUniqueId', 'identifier', 'id']); + const enabled = node.enabled !== false; + const customActions = normalizeCustomActions(node.custom_actions); + const actions = deriveActions({ + role, + enabled, + frame, + customActions, + hasSemanticIdentity: + label !== undefined || + value !== undefined || + (identifier !== undefined && !isGenericInternalIdentifier(identifier)), + }); + const state = readState(node, frame); + + return { + publicElement: { + ref, + ...(role ? { role } : {}), + ...(label ? { label } : {}), + ...(value ? { value } : {}), + ...(identifier ? { identifier } : {}), + frame, + ...(state ? { state } : {}), + actions, + }, + metadata: { + path, + depth, + childCount: readChildren(node).length, + signature: stableSignature({ role, label, value, identifier, path, frame }), + }, + rawNode: node, + }; +} + +function isContainerRole(role: RuntimeElementRoleV1 | undefined): boolean { + return ( + role === 'application' || + role === 'window' || + role === 'scroll-view' || + role === 'list' || + role === 'other' + ); +} + +function isDescendantPath(parentPath: string, candidatePath: string): boolean { + return candidatePath.startsWith(`${parentPath}.`); +} + +function isLargeEnoughInferredScrollContainer( + role: RuntimeElementRoleV1 | undefined, + frame: Frame, +): boolean { + if (role !== 'other') { + return true; + } + return frame.width >= 120 && frame.height >= 120; +} + +function frameOverflowsContainer(frame: Frame, containerFrame: Frame): boolean { + const tolerance = 8; + return ( + frame.x < containerFrame.x - tolerance || + frame.y < containerFrame.y - tolerance || + frame.x + frame.width > containerFrame.x + containerFrame.width + tolerance || + frame.y + frame.height > containerFrame.y + containerFrame.height + tolerance + ); +} + +function isSheetGrabberElement(element: RuntimeSnapshotElementRecord): boolean { + return element.publicElement.label?.toLowerCase() === 'sheet grabber'; +} + +function findSheetGrabberDescendant( + element: RuntimeSnapshotElementRecord, + elements: RuntimeSnapshotElementRecord[], +): RuntimeSnapshotElementRecord | null { + return ( + elements.find( + (candidate) => + candidate !== element && + isDescendantPath(element.metadata.path, candidate.metadata.path) && + isSheetGrabberElement(candidate), + ) ?? null + ); +} + +function createSheetSwipeFrame(containerFrame: Frame, grabberFrame: Frame): Frame { + const top = Math.round( + Math.max( + grabberFrame.y + grabberFrame.height + 120, + containerFrame.y + containerFrame.height * 0.35, + ), + ); + const bottom = Math.round(containerFrame.y + containerFrame.height * 0.85); + const height = Math.max(2, bottom - top); + return normalizeFrame({ + x: containerFrame.x, + y: Math.min(top, bottom - 2), + width: containerFrame.width, + height, + }); +} + +function findViewportFrame(elements: RuntimeSnapshotElementRecord[]): Frame | null { + return ( + elements.find( + (element) => + (element.publicElement.role === 'application' || element.publicElement.role === 'window') && + isVisible(element.publicElement.frame), + )?.publicElement.frame ?? null + ); +} + +function applyViewportVisibility(elements: RuntimeSnapshotElementRecord[]): void { + const viewport = findViewportFrame(elements); + if (!viewport) { + return; + } + + for (const element of elements) { + const publicElement = element.publicElement; + if (publicElement.role === 'application' || publicElement.role === 'window') { + continue; + } + + if (!framesIntersect(publicElement.frame, viewport)) { + publicElement.state = { ...publicElement.state, visible: false }; + publicElement.actions = []; + continue; + } + + const activationPoint = getDefaultRuntimeElementActivationPoint(element); + if (!pointInsideFrame(activationPoint, viewport)) { + publicElement.actions = publicElement.actions.filter((action) => action === 'swipeWithin'); + continue; + } + + const adjustedActivationPoint = getBottomClippedActivationPoint(element, viewport); + if (adjustedActivationPoint) { + element.metadata.activationPoint = adjustedActivationPoint; + } + } +} + +function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): void { + for (const element of elements) { + const { publicElement, metadata } = element; + if ( + !isContainerRole(publicElement.role) || + !isVisible(publicElement.frame) || + !isLargeEnoughInferredScrollContainer(publicElement.role, publicElement.frame) + ) { + continue; + } + if (publicElement.actions.includes('swipeWithin')) { + continue; + } + + const hasOverflowingDescendant = elements.some((candidate) => { + if (candidate === element) { + return false; + } + return ( + isDescendantPath(metadata.path, candidate.metadata.path) && + frameOverflowsContainer(candidate.publicElement.frame, publicElement.frame) + ); + }); + + const sheetGrabber = + publicElement.role === 'application' || publicElement.role === 'window' + ? findSheetGrabberDescendant(element, elements) + : null; + + if (sheetGrabber) { + publicElement.actions.push('swipeWithin'); + metadata.swipeFrame = createSheetSwipeFrame( + publicElement.frame, + sheetGrabber.publicElement.frame, + ); + continue; + } + + if ( + publicElement.role !== 'application' && + publicElement.role !== 'window' && + hasOverflowingDescendant + ) { + publicElement.actions.push('swipeWithin'); + } + } +} + +function flattenHierarchy(roots: AccessibilityNode[]): NormalizedNodeInput[] { + const flattened: NormalizedNodeInput[] = []; + + function visit(node: AccessibilityNode, path: string, depth: number): void { + flattened.push({ node, path, depth }); + readChildren(node).forEach((child, index) => visit(child, `${path}.${index}`, depth + 1)); + } + + roots.forEach((root, index) => visit(root, String(index), 0)); + return flattened; +} + +function toActionHints(elements: readonly RuntimeElementV1[]): RuntimeActionHintV1[] { + return elements.flatMap((element) => + element.actions.map((action) => ({ + action, + elementRef: element.ref, + ...(element.label ? { label: element.label } : {}), + })), + ); +} + +function createScreenHash(params: { + elements: readonly RuntimeElementV1[]; + actions: readonly RuntimeActionHintV1[]; +}): string { + return hashString( + JSON.stringify({ + protocol: RUNTIME_SNAPSHOT_PROTOCOL, + elements: params.elements, + actions: params.actions, + }), + ); +} + +export function extractAccessibilityHierarchy(responseText: string): AccessibilityNode[] { + let parsed: unknown; + try { + parsed = JSON.parse(responseText) as unknown; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new RuntimeSnapshotParseError(`AXe describe-ui returned invalid JSON: ${message}`); + } + + if (Array.isArray(parsed)) { + return parsed as AccessibilityNode[]; + } + + if (isRecord(parsed) && Array.isArray(parsed.elements)) { + return parsed.elements as AccessibilityNode[]; + } + + throw new RuntimeSnapshotParseError( + 'AXe describe-ui did not return an accessibility element array.', + ); +} + +export function createRuntimeSnapshotRecord(params: { + simulatorId: string; + uiHierarchy: AccessibilityNode[]; + nowMs?: number; + seq?: number; +}): RuntimeSnapshotRecord { + const capturedAtMs = params.nowMs ?? Date.now(); + const expiresAtMs = capturedAtMs + RUNTIME_SNAPSHOT_TTL_MS; + const elements = flattenHierarchy(params.uiHierarchy).map((input, index) => + normalizeNode(input, index), + ); + applyViewportVisibility(elements); + inferScrollableContainers(elements); + const publicElements = elements.map((element) => element.publicElement); + const actions = toActionHints(publicElements); + const screenHash = createScreenHash({ elements: publicElements, actions }); + const seq = params.seq ?? 0; + const elementsByRef = new Map(elements.map((element) => [element.publicElement.ref, element])); + const payload: RuntimeSnapshotV1 = { + type: 'runtime-snapshot', + protocol: RUNTIME_SNAPSHOT_PROTOCOL, + simulatorId: params.simulatorId, + screenHash, + seq, + capturedAtMs, + expiresAtMs, + elements: publicElements, + actions, + }; + + return { + simulatorId: params.simulatorId, + screenHash, + seq, + capturedAtMs, + expiresAtMs, + payload, + elements, + elementsByRef, + }; +} + +export function parseRuntimeSnapshotResponse(params: { + simulatorId: string; + responseText: string; + nowMs?: number; +}): RuntimeSnapshotRecord { + return createRuntimeSnapshotRecord({ + simulatorId: params.simulatorId, + uiHierarchy: extractAccessibilityHierarchy(params.responseText), + nowMs: params.nowMs, + }); +} + +export function getPrimaryRuntimeElement( + snapshot: RuntimeSnapshotV1, + action: RuntimeActionNameV1 = 'tap', +): RuntimeElementV1 | null { + return ( + snapshot.elements.find((element) => element.actions.includes(action)) ?? + snapshot.elements[0] ?? + null + ); +} + +export function getRuntimeElementCenter(element: RuntimeSnapshotElementRecord): Point { + const { frame } = element.publicElement; + return { + x: Math.round(frame.x + frame.width / 2), + y: Math.round(frame.y + frame.height / 2), + }; +} + +function getDefaultRuntimeElementActivationPoint(element: RuntimeSnapshotElementRecord): Point { + const { frame, role } = element.publicElement; + if (role === 'switch' && frame.width > 120) { + return { + x: Math.round(frame.x + frame.width - 52), + y: Math.round(frame.y + frame.height / 2), + }; + } + + return getRuntimeElementCenter(element); +} + +function getBottomClippedActivationPoint( + element: RuntimeSnapshotElementRecord, + viewport: Frame, +): Point | null { + if (!hasPointAction(element.publicElement.actions)) { + return null; + } + + const defaultPoint = getDefaultRuntimeElementActivationPoint(element); + const bottomClippedZoneStart = viewport.y + viewport.height * 0.93; + if (defaultPoint.y < bottomClippedZoneStart) { + return null; + } + + const { frame } = element.publicElement; + const verticalOffset = Math.min(Math.max(frame.height * 0.1, 8), frame.height / 2); + const adjustedPoint = { + x: defaultPoint.x, + y: Math.round(frame.y + verticalOffset), + }; + + if (!pointInsideFrame(adjustedPoint, frame) || !pointInsideFrame(adjustedPoint, viewport)) { + return null; + } + + return adjustedPoint; +} + +export function getRuntimeElementActivationPoint(element: RuntimeSnapshotElementRecord): Point { + return element.metadata.activationPoint ?? getDefaultRuntimeElementActivationPoint(element); +} + +export type RuntimeSwipeDirection = 'up' | 'down' | 'left' | 'right'; + +export type RuntimeSwipePointResolution = + | { ok: true; from: Point; to: Point } + | { ok: false; message: string }; + +function isDegenerateSwipe(from: Point, to: Point): boolean { + return from.x === to.x && from.y === to.y; +} + +function getFrameCenter(frame: Frame): Point { + return { + x: Math.round(frame.x + frame.width / 2), + y: Math.round(frame.y + frame.height / 2), + }; +} + +function getRuntimeSwipeCenter( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, + swipeFrame: Frame, +): Point { + const center = getFrameCenter(swipeFrame); + const { role } = element.publicElement; + if ( + (role === 'application' || role === 'window') && + (direction === 'left' || direction === 'right') + ) { + return { x: center.x, y: Math.round(swipeFrame.y + swipeFrame.height * 0.6) }; + } + return center; +} + +export function getRuntimeElementSwipePoints( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, +): RuntimeSwipePointResolution { + const frame = element.metadata.swipeFrame ?? element.publicElement.frame; + if (frame.width < 2 || frame.height < 2) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' is too small for a reliable swipe.`, + }; + } + + const center = getRuntimeSwipeCenter(element, direction, frame); + const horizontalInset = Math.max(1, Math.min(Math.max(frame.width * 0.15, 24), frame.width / 3)); + const verticalInset = Math.max(1, Math.min(Math.max(frame.height * 0.15, 24), frame.height / 3)); + const left = Math.round(frame.x + horizontalInset); + const right = Math.round(frame.x + frame.width - horizontalInset); + const top = Math.round(frame.y + verticalInset); + const bottom = Math.round(frame.y + frame.height - verticalInset); + + const points = ((): { from: Point; to: Point } => { + switch (direction) { + case 'up': + return { from: { x: center.x, y: bottom }, to: { x: center.x, y: top } }; + case 'down': + return { from: { x: center.x, y: top }, to: { x: center.x, y: bottom } }; + case 'left': + return { from: { x: right, y: center.y }, to: { x: left, y: center.y } }; + case 'right': + return { from: { x: left, y: center.y }, to: { x: right, y: center.y } }; + } + })(); + + if (isDegenerateSwipe(points.from, points.to)) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' does not provide non-degenerate ${direction} swipe points.`, + }; + } + + return { ok: true, ...points }; +} diff --git a/src/mcp/tools/ui-automation/shared/semantic-tap.ts b/src/mcp/tools/ui-automation/shared/semantic-tap.ts new file mode 100644 index 000000000..0ea5c2b6e --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/semantic-tap.ts @@ -0,0 +1,138 @@ +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { executeAxeCommand } from './axe-command.ts'; +import type { AxeHelpers } from './axe-command.ts'; +import { getRuntimeElementActivationPoint } from './runtime-snapshot.ts'; +import type { RuntimeSnapshotElementRecord } from '../../../../types/ui-snapshot.ts'; + +export interface SemanticTapCommand { + selectorArgs: string[] | null; + coordinateArgs: string[]; + primaryArgs: string[]; + targetDescription: string; + usedSelector: boolean; +} + +function axeElementTypeFor(element: RuntimeSnapshotElementRecord): string | null { + switch (element.publicElement.role) { + case 'button': + return 'Button'; + case 'cell': + return 'Cell'; + case 'keyboard-key': + return 'Key'; + case 'switch': + return 'Switch'; + case 'tab': + return 'Tab'; + case 'text-field': + return 'TextField'; + default: + return null; + } +} + +export function isRecoverableAxeSelectorError(error: unknown): boolean { + const messageParts = error instanceof Error ? [error.message] : [String(error)]; + if (typeof error === 'object' && error !== null && 'axeOutput' in error) { + const { axeOutput } = error as { axeOutput?: unknown }; + if (typeof axeOutput === 'string') { + messageParts.push(axeOutput); + } + } + + const message = messageParts.join('\n'); + return ( + /multiple(?:\s+\(?\d+\)?)?\s+accessibility\s+elements\s+matched/i.test(message) || + /no\s+accessibility\s+element\s+matched/i.test(message) + ); +} + +function hasDuplicateSelectorMatch(params: { + element: RuntimeSnapshotElementRecord; + elements: readonly RuntimeSnapshotElementRecord[]; + selector: 'identifier' | 'label' | 'value'; + value: string; +}): boolean { + const targetType = axeElementTypeFor(params.element); + const matches = params.elements.filter((candidate) => { + if (axeElementTypeFor(candidate) !== targetType) { + return false; + } + return candidate.publicElement[params.selector] === params.value; + }); + + return matches.length > 1; +} + +export function createSemanticTapCommand( + element: RuntimeSnapshotElementRecord, + elementRef: string, + extraArgs: readonly string[] = [], + elements: readonly RuntimeSnapshotElementRecord[] = [element], +): SemanticTapCommand { + const { identifier, label, value } = element.publicElement; + const activationPoint = getRuntimeElementActivationPoint(element); + const elementType = axeElementTypeFor(element); + const elementTypeArgs = elementType ? ['--element-type', elementType] : []; + const coordinateArgs = + element.publicElement.role === 'switch' + ? [ + 'touch', + '-x', + String(activationPoint.x), + '-y', + String(activationPoint.y), + '--down', + '--up', + ] + : ['tap', '-x', String(activationPoint.x), '-y', String(activationPoint.y), ...extraArgs]; + + const selectorArgs = (() => { + if (element.publicElement.role === 'switch') return null; + if ( + identifier && + !hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier }) + ) { + return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs]; + } + if ( + label && + !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label }) + ) { + return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs]; + } + if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) { + return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs]; + } + return null; + })(); + + return { + selectorArgs, + coordinateArgs, + primaryArgs: selectorArgs ?? coordinateArgs, + targetDescription: selectorArgs + ? `elementRef ${elementRef} semantic selector` + : `elementRef ${elementRef} activation point (${activationPoint.x}, ${activationPoint.y})`, + usedSelector: selectorArgs !== null, + }; +} + +export async function executeSemanticTapWithAmbiguityFallback(params: { + command: SemanticTapCommand; + simulatorId: string; + executor: CommandExecutor; + axeHelpers: AxeHelpers; +}): Promise { + const { command, simulatorId, executor, axeHelpers } = params; + + try { + await executeAxeCommand(command.primaryArgs, simulatorId, 'tap', executor, axeHelpers); + } catch (error) { + if (!command.selectorArgs || !isRecoverableAxeSelectorError(error)) { + throw error; + } + + await executeAxeCommand(command.coordinateArgs, simulatorId, 'tap', executor, axeHelpers); + } +} diff --git a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts index cd0fa28c6..efdb5f15c 100644 --- a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts +++ b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts @@ -1,21 +1,144 @@ -const SNAPSHOT_UI_WARNING_TIMEOUT_MS = 60000; // 60 seconds +import type { + RuntimeActionNameV1, + RuntimeElementResolution, + RuntimeSnapshotLookup, + RuntimeSnapshotRecord, + UiAutomationRecoverableError, +} from '../../../../types/ui-snapshot.ts'; -const snapshotUiTimestamps = new Map(); +const runtimeSnapshots = new Map(); +const runtimeSnapshotSeqs = new Map(); -export function recordSnapshotUiCall(simulatorId: string): void { - snapshotUiTimestamps.set(simulatorId, Date.now()); +function snapshotAgeMs(snapshot: RuntimeSnapshotRecord, nowMs: number): number { + return Math.max(0, nowMs - snapshot.capturedAtMs); +} + +function snapshotMissingError(): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_MISSING', + message: 'No runtime UI snapshot is available for this simulator.', + recoveryHint: + 'Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot.', + }; +} + +function snapshotExpiredError( + snapshot: RuntimeSnapshotRecord, + nowMs: number, +): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_EXPIRED', + message: 'The runtime UI snapshot for this simulator has expired.', + recoveryHint: 'Run snapshot_ui again and retry with a current elementRef.', + snapshotAgeMs: snapshotAgeMs(snapshot, nowMs), + }; +} + +export function recordRuntimeSnapshot(snapshot: RuntimeSnapshotRecord): RuntimeSnapshotRecord { + const nextSeq = (runtimeSnapshotSeqs.get(snapshot.simulatorId) ?? 0) + 1; + runtimeSnapshotSeqs.set(snapshot.simulatorId, nextSeq); + snapshot.seq = nextSeq; + snapshot.payload.seq = nextSeq; + runtimeSnapshots.set(snapshot.simulatorId, snapshot); + return snapshot; +} + +export function clearRuntimeSnapshot(simulatorId: string): void { + runtimeSnapshots.delete(simulatorId); +} + +export function __resetRuntimeSnapshotStoreForTests(): void { + runtimeSnapshots.clear(); + runtimeSnapshotSeqs.clear(); +} + +export function getRuntimeSnapshotLookup( + simulatorId: string, + nowMs = Date.now(), +): RuntimeSnapshotLookup { + const snapshot = runtimeSnapshots.get(simulatorId) ?? null; + if (!snapshot) { + return { status: 'missing', snapshot: null }; + } + + const ageMs = snapshotAgeMs(snapshot, nowMs); + if (nowMs > snapshot.expiresAtMs) { + runtimeSnapshots.delete(simulatorId); + return { status: 'expired', snapshot: null, snapshotAgeMs: ageMs }; + } + + return { status: 'available', snapshot, snapshotAgeMs: ageMs }; +} + +export function getRuntimeSnapshot( + simulatorId: string, + nowMs = Date.now(), +): RuntimeSnapshotRecord | null { + return getRuntimeSnapshotLookup(simulatorId, nowMs).snapshot; +} + +export function resolveElementRef( + simulatorId: string, + elementRef: string, + requiredAction: RuntimeActionNameV1, + nowMs = Date.now(), +): RuntimeElementResolution { + const snapshot = runtimeSnapshots.get(simulatorId) ?? null; + if (!snapshot) { + return { ok: false, error: snapshotMissingError() }; + } + + const ageMs = snapshotAgeMs(snapshot, nowMs); + if (nowMs > snapshot.expiresAtMs) { + runtimeSnapshots.delete(simulatorId); + return { ok: false, error: snapshotExpiredError(snapshot, nowMs) }; + } + + const element = snapshot.elementsByRef.get(elementRef); + if (!element) { + return { + ok: false, + error: { + code: 'ELEMENT_REF_NOT_FOUND', + message: `Element ref '${elementRef}' was not found in the current runtime UI snapshot.`, + recoveryHint: + 'Run snapshot_ui again and retry with an elementRef from the latest snapshot.', + elementRef, + snapshotAgeMs: ageMs, + }, + }; + } + + if (!element.publicElement.actions.includes(requiredAction)) { + return { + ok: false, + error: { + code: 'TARGET_NOT_ACTIONABLE', + message: `Element ref '${elementRef}' does not support '${requiredAction}'.`, + recoveryHint: + 'Choose an elementRef that lists the required action, or refresh with snapshot_ui.', + elementRef, + candidates: snapshot.payload.elements.filter((candidate) => + candidate.actions.includes(requiredAction), + ), + snapshotAgeMs: ageMs, + }, + }; + } + + return { ok: true, snapshot, element, snapshotAgeMs: ageMs }; } export function getSnapshotUiWarning(simulatorId: string): string | null { - const timestamp = snapshotUiTimestamps.get(simulatorId); - if (!timestamp) { - return 'Warning: snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots.'; + const lookup = getRuntimeSnapshotLookup(simulatorId); + + if (lookup.status === 'missing') { + return 'Warning: snapshot_ui has not been called yet. Consider using snapshot_ui to capture semantic element references before interacting with the UI.'; } - const timeSinceDescribe = Date.now() - timestamp; - if (timeSinceDescribe > SNAPSHOT_UI_WARNING_TIMEOUT_MS) { - const secondsAgo = Math.round(timeSinceDescribe / 1000); - return `Warning: snapshot_ui was last called ${secondsAgo} seconds ago. Consider refreshing UI coordinates with snapshot_ui instead of using potentially stale coordinates.`; + if (lookup.status === 'expired') { + const secondsAgo = Math.round((lookup.snapshotAgeMs ?? 0) / 1000); + return `Warning: snapshot_ui was last called ${secondsAgo} seconds ago. Refresh UI element references with snapshot_ui before interacting with the UI.`; } return null; diff --git a/src/mcp/tools/ui-automation/shared/wait-predicate.ts b/src/mcp/tools/ui-automation/shared/wait-predicate.ts new file mode 100644 index 000000000..ffa838975 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/wait-predicate.ts @@ -0,0 +1,361 @@ +import type { + RuntimeElementRoleV1, + RuntimeElementV1, + RuntimeSnapshotRecord, + UiAutomationRecoverableError, +} from '../../../../types/ui-snapshot.ts'; +import { getRuntimeSnapshotLookup } from './snapshot-ui-state.ts'; + +export const waitPredicates = [ + 'exists', + 'gone', + 'enabled', + 'focused', + 'textContains', + 'settled', +] as const; + +export type WaitPredicate = (typeof waitPredicates)[number]; +export type SelectorPredicate = Exclude; + +export interface WaitSelector { + elementRef?: string; + identifier?: string; + label?: string; + role?: RuntimeElementRoleV1; + value?: string; +} + +export interface ResolvedWaitSelector { + sourceElementRef?: string; + identifier?: string; + label?: string; + role?: RuntimeElementRoleV1; + value?: string; +} + +export interface WaitEvaluation { + matched: boolean; + candidates?: RuntimeElementV1[]; + uiError?: UiAutomationRecoverableError; +} + +export interface SettledTracker { + signature: string | null; + stableSinceMs: number | null; +} + +function snapshotMissingError(): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_MISSING', + message: 'No runtime UI snapshot is available for this simulator.', + recoveryHint: + 'Run snapshot_ui for this simulator, then retry wait_for_ui with an elementRef from that snapshot.', + }; +} + +function snapshotExpiredError(snapshotAgeMs: number): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_EXPIRED', + message: 'The runtime UI snapshot for this simulator has expired.', + recoveryHint: 'Run snapshot_ui again and retry wait_for_ui with a current elementRef.', + snapshotAgeMs, + }; +} + +function targetNotFoundError(elementRef: string): UiAutomationRecoverableError { + return { + code: 'TARGET_NOT_FOUND', + message: `Element ref '${elementRef}' cannot be converted into a stable wait selector.`, + recoveryHint: + 'Use an element with an identifier, label, or value, or refresh with snapshot_ui and choose a more stable target.', + elementRef, + }; +} + +function normalizedText(value: string | undefined): string { + return value?.replace(/\s+/g, ' ').trim() ?? ''; +} + +function elementTextContains(element: RuntimeElementV1, text: string): boolean { + const needle = normalizedText(text).toLowerCase(); + if (needle.length === 0) { + return false; + } + return ( + normalizedText(element.value).toLowerCase().includes(needle) || + normalizedText(element.label).toLowerCase().includes(needle) + ); +} + +function matchingElementText(element: RuntimeElementV1, text: string): string | null { + const needle = normalizedText(text).toLowerCase(); + if (needle.length === 0) { + return null; + } + + const value = normalizedText(element.value).toLowerCase(); + if (value.includes(needle)) { + return value; + } + + const label = normalizedText(element.label).toLowerCase(); + if (label.includes(needle)) { + return label; + } + + return null; +} + +function candidatesShareMatchingText(candidates: RuntimeElementV1[], text: string): boolean { + const [first, ...remaining] = candidates.map((candidate) => matchingElementText(candidate, text)); + return first !== null && remaining.every((candidateText) => candidateText === first); +} + +function elementSignatures(snapshot: RuntimeSnapshotRecord): string { + return snapshot.elements.map((element) => element.metadata.signature).join('|'); +} + +export function hasSelectorFields(selector: WaitSelector): boolean { + return Boolean( + selector.elementRef || selector.identifier || selector.label || selector.role || selector.value, + ); +} + +export function selectorFromParams(selector: WaitSelector): ResolvedWaitSelector | null { + const resolved: ResolvedWaitSelector = { + ...(selector.identifier ? { identifier: selector.identifier } : {}), + ...(selector.label ? { label: selector.label } : {}), + ...(selector.role ? { role: selector.role } : {}), + ...(selector.value ? { value: selector.value } : {}), + }; + + return hasSelectorFields(resolved) ? resolved : null; +} + +export function resolveElementSelector( + simulatorId: string, + elementRef: string, + nowMs: number, +): + | { ok: true; selector: ResolvedWaitSelector } + | { ok: false; error: UiAutomationRecoverableError } { + const lookup = getRuntimeSnapshotLookup(simulatorId, nowMs); + if (lookup.status === 'missing') { + return { ok: false, error: snapshotMissingError() }; + } + + if (lookup.status === 'expired') { + return { ok: false, error: snapshotExpiredError(lookup.snapshotAgeMs ?? 0) }; + } + + const snapshot = lookup.snapshot; + const element = snapshot?.elementsByRef.get(elementRef); + if (!snapshot || !element) { + return { + ok: false, + error: { + code: 'ELEMENT_REF_NOT_FOUND', + message: `Element ref '${elementRef}' was not found in the current runtime UI snapshot.`, + recoveryHint: + 'Run snapshot_ui again and retry wait_for_ui with an elementRef from the latest snapshot.', + elementRef, + snapshotAgeMs: lookup.snapshotAgeMs ?? 0, + }, + }; + } + + const publicElement = element.publicElement; + if (publicElement.identifier) { + return { + ok: true, + selector: { sourceElementRef: elementRef, identifier: publicElement.identifier }, + }; + } + + if (publicElement.label && publicElement.role) { + return { + ok: true, + selector: { + sourceElementRef: elementRef, + label: publicElement.label, + role: publicElement.role, + }, + }; + } + + if (publicElement.value && publicElement.role) { + return { + ok: true, + selector: { + sourceElementRef: elementRef, + value: publicElement.value, + role: publicElement.role, + }, + }; + } + + return { ok: false, error: targetNotFoundError(elementRef) }; +} + +function matchSelector( + snapshot: RuntimeSnapshotRecord, + selector: ResolvedWaitSelector, +): RuntimeElementV1[] { + return snapshot.elements + .map((element) => element.publicElement) + .filter((element) => { + if (selector.identifier !== undefined && element.identifier !== selector.identifier) + return false; + if (selector.label !== undefined && element.label !== selector.label) return false; + if (selector.role !== undefined && element.role !== selector.role) return false; + if (selector.value !== undefined && element.value !== selector.value) return false; + return true; + }); +} + +function ambiguousSelectorError( + selector: ResolvedWaitSelector, + candidates: RuntimeElementV1[], +): UiAutomationRecoverableError { + return { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: + 'Provide a more specific selector, or refresh with snapshot_ui and choose a stable elementRef.', + ...(selector.sourceElementRef ? { elementRef: selector.sourceElementRef } : {}), + candidates, + }; +} + +function focusedStateUnavailableError( + selector: ResolvedWaitSelector, + candidate: RuntimeElementV1, +): UiAutomationRecoverableError { + return { + code: 'TARGET_NOT_ACTIONABLE', + message: 'The matched runtime UI element does not expose focus state.', + recoveryHint: + 'Use exists, enabled, textContains, or a screenshot-based check for this element instead of focused.', + ...(selector.sourceElementRef ? { elementRef: selector.sourceElementRef } : {}), + candidates: [candidate], + }; +} + +export function evaluateTextContainsPredicate(params: { + snapshot: RuntimeSnapshotRecord; + text: string; +}): WaitEvaluation { + const candidates = params.snapshot.elements + .map((element) => element.publicElement) + .filter((element) => elementTextContains(element, params.text)); + + if (candidates.length > 1) { + if (candidatesShareMatchingText(candidates, params.text)) { + return { matched: true, candidates }; + } + return { + matched: false, + candidates, + uiError: ambiguousSelectorError({}, candidates), + }; + } + + return { matched: candidates.length === 1, candidates }; +} + +export function evaluateElementPredicate(params: { + predicate: SelectorPredicate; + selector: ResolvedWaitSelector; + snapshot: RuntimeSnapshotRecord; + text?: string; +}): WaitEvaluation { + const { predicate, selector, snapshot, text } = params; + const candidates = matchSelector(snapshot, selector); + + if (predicate === 'exists') { + return { matched: candidates.length > 0, candidates }; + } + + if (predicate === 'gone') { + return { matched: candidates.length === 0, candidates }; + } + + if (predicate === 'textContains') { + const textMatches = candidates.filter((candidate) => + elementTextContains(candidate, text ?? ''), + ); + if (textMatches.length > 1) { + if (candidatesShareMatchingText(textMatches, text ?? '')) { + return { matched: true, candidates: textMatches }; + } + return { + matched: false, + candidates: textMatches, + uiError: ambiguousSelectorError(selector, textMatches), + }; + } + return { matched: textMatches.length === 1, candidates: textMatches }; + } + + if (candidates.length > 1) { + return { matched: false, candidates, uiError: ambiguousSelectorError(selector, candidates) }; + } + + const match = candidates[0]; + if (!match) { + return { matched: false, candidates }; + } + + switch (predicate) { + case 'enabled': + return { matched: match.state?.enabled === true, candidates }; + case 'focused': + if (match.state?.focused === undefined) { + return { + matched: false, + candidates, + uiError: focusedStateUnavailableError(selector, match), + }; + } + return { matched: match.state.focused === true, candidates }; + } +} + +export function evaluateSettledPredicate(params: { + snapshot: RuntimeSnapshotRecord; + nowMs: number; + settledDurationMs: number; + tracker: SettledTracker; +}): boolean { + const signature = elementSignatures(params.snapshot); + if (params.tracker.signature !== signature) { + params.tracker.signature = signature; + params.tracker.stableSinceMs = params.nowMs; + return params.settledDurationMs === 0; + } + + const stableSinceMs = params.tracker.stableSinceMs ?? params.nowMs; + params.tracker.stableSinceMs = stableSinceMs; + return params.nowMs - stableSinceMs >= params.settledDurationMs; +} + +export function createWaitTimeoutError(params: { + predicate: WaitPredicate; + timeoutMs: number; + selector?: ResolvedWaitSelector; + candidates?: RuntimeElementV1[]; +}): UiAutomationRecoverableError { + const recoveryHint = params.selector + ? 'Selector fields match exact values. Use textContains for partial visible text, inspect the latest runtime snapshot, or adjust the wait selector.' + : 'Inspect the latest runtime snapshot, adjust the wait selector, or retry later.'; + + return { + code: 'WAIT_TIMEOUT', + message: `Timed out after ${params.timeoutMs}ms waiting for UI predicate '${params.predicate}'.`, + recoveryHint, + timeoutMs: params.timeoutMs, + ...(params.selector?.sourceElementRef ? { elementRef: params.selector.sourceElementRef } : {}), + ...(params.candidates !== undefined ? { candidates: params.candidates } : {}), + }; +} diff --git a/src/mcp/tools/ui-automation/snapshot_ui.ts b/src/mcp/tools/ui-automation/snapshot_ui.ts index 6a5558dcc..9a197a4ee 100644 --- a/src/mcp/tools/ui-automation/snapshot_ui.ts +++ b/src/mcp/tools/ui-automation/snapshot_ui.ts @@ -11,13 +11,11 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { recordSnapshotUiCall } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; -import type { - AccessibilityNode, - CaptureResultDomainResult, -} from '../../../types/domain-results.ts'; +import type { NextStep } from '../../../types/common.ts'; +import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import { createCaptureFailureResult, @@ -25,9 +23,18 @@ import { mapAxeCommandError, setCaptureStructuredOutput, } from './shared/domain-result.ts'; +import { + parseRuntimeSnapshotResponse, + RuntimeSnapshotParseError, +} from './shared/runtime-snapshot.ts'; const snapshotUiSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + sinceScreenHash: z + .string() + .min(1, 'sinceScreenHash must not be empty') + .optional() + .describe('Return an unchanged response when the current screen hash matches this value'), }); type SnapshotUiParams = z.infer; @@ -35,24 +42,72 @@ type SnapshotUiResult = CaptureResultDomainResult; const LOG_PREFIX = '[AXe]'; -function parseUiHierarchy(responseText: string): AccessibilityNode[] | undefined { - try { - const parsed = JSON.parse(responseText) as unknown; - if (Array.isArray(parsed)) { - return parsed as AccessibilityNode[]; - } - if ( - parsed && - typeof parsed === 'object' && - 'elements' in parsed && - Array.isArray((parsed as { elements?: unknown }).elements) - ) { - return (parsed as { elements: AccessibilityNode[] }).elements; - } - } catch { - // ignore +const HIDDEN_TAP_NEXT_STEP_LABELS = new Set(['sheet grabber']); + +const LOW_PRIORITY_TAP_NEXT_STEP_LABELS = new Set([ + 'close', + 'clear search', + 'remove', + 'delete', + 'clear', + 'c', + 'ac', + 'Âą', + '%', + 'Ãˇ', + '×', + '-', + '+', + '=', +]); + +function compactTapNextStepText(value: string | undefined): string { + return (value ?? '').replace(/\s+/g, ' ').trim(); +} + +function isHiddenTapNextStepElement(label: string | undefined): boolean { + return HIDDEN_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isLowPriorityTapNextStepElement(label: string | undefined): boolean { + return LOW_PRIORITY_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isContentRichTapNextStepElement(element: { + label?: string; + identifier?: string; +}): boolean { + const label = compactTapNextStepText(element.label); + const identifier = compactTapNextStepText(element.identifier); + return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedTapNextStepElement(element: { + state?: { selected?: boolean }; + value?: string; +}): boolean { + return ( + element.state?.selected === true || + compactTapNextStepText(element.value).toLowerCase() === 'selected' + ); +} + +function getTapNextStepElementPriority(element: { + label?: string; + identifier?: string; + state?: { selected?: boolean }; + value?: string; +}): number { + if (isLowPriorityTapNextStepElement(element.label)) { + return 90; } - return undefined; + if (isAlreadySelectedTapNextStepElement(element)) { + return 70; + } + if (isContentRichTapNextStepElement(element)) { + return 0; + } + return 20; } export function createSnapshotUiExecutor( @@ -71,6 +126,7 @@ export function createSnapshotUiExecutor( toolName, }); if (guard.blockedMessage) { + clearRuntimeSnapshot(simulatorId); return createCaptureFailureResult(simulatorId, guard.blockedMessage); } @@ -85,20 +141,43 @@ export function createSnapshotUiExecutor( axeHelpers, ); - recordSnapshotUiCall(simulatorId); + const snapshot = parseRuntimeSnapshotResponse({ simulatorId, responseText }); + recordRuntimeSnapshot(snapshot); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - const uiHierarchy = parseUiHierarchy(responseText); + if (params.sinceScreenHash === snapshot.screenHash) { + return createCaptureSuccessResult(simulatorId, { + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId, + screenHash: snapshot.screenHash, + seq: snapshot.seq, + }, + warnings: [guard.warningText], + }); + } + return createCaptureSuccessResult(simulatorId, { - capture: uiHierarchy - ? { - type: 'ui-hierarchy', - uiHierarchy, - } - : undefined, + capture: snapshot.payload, warnings: [guard.warningText], }); } catch (error) { + clearRuntimeSnapshot(simulatorId); + + if (error instanceof RuntimeSnapshotParseError) { + const message = 'Failed to parse runtime UI snapshot.'; + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${message}`); + return createCaptureFailureResult(simulatorId, message, { + details: [error.message], + uiError: { + code: 'SNAPSHOT_PARSE_FAILED', + message, + recoveryHint: 'Run snapshot_ui again after the app is fully launched and responsive.', + }, + }); + } + const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to get accessibility hierarchy.', }); @@ -122,11 +201,56 @@ export async function snapshot_uiLogic( setCaptureStructuredOutput(ctx, result); - ctx.nextStepParams = { - snapshot_ui: { simulatorId: params.simulatorId }, - tap: { simulatorId: params.simulatorId, x: 0, y: 0 }, - screenshot: { simulatorId: params.simulatorId }, - }; + const runtimeSnapshot = + result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' + ? result.capture + : null; + const tapElement = runtimeSnapshot + ? (runtimeSnapshot.elements + .map((element, index) => ({ element, index })) + .filter( + ({ element }) => + element.actions.includes('tap') && + !element.actions.includes('typeText') && + !isHiddenTapNextStepElement(element.label), + ) + .sort((left, right) => { + const priorityDelta = + getTapNextStepElementPriority(left.element) - + getTapNextStepElementPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + })[0]?.element ?? null) + : null; + + if (!result.didError) { + const nextSteps: NextStep[] = [ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: params.simulatorId }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { simulatorId: params.simulatorId, predicate: 'settled' }, + }, + ...(tapElement + ? [ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId: params.simulatorId, elementRef: tapElement.ref }, + }, + ] + : []), + { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: params.simulatorId }, + }, + ]; + ctx.nextSteps = nextSteps; + } } const publicSchemaObject = z.strictObject( diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index 145f32a4f..f0167de27 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -1,7 +1,7 @@ /** * UI Testing Plugin: Swipe * - * Swipe from one coordinate to another on iOS simulator with customizable duration and delta. + * Swipes within a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -17,7 +17,8 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementSwipePoints } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; @@ -26,30 +27,32 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const swipeSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x1: z.number().int({ message: 'Start X coordinate' }), - y1: z.number().int({ message: 'Start Y coordinate' }), - x2: z.number().int({ message: 'End X coordinate' }), - y2: z.number().int({ message: 'End Y coordinate' }), + withinElementRef: z.string().min(1, { message: 'withinElementRef must be non-empty' }), + direction: z.enum(['up', 'down', 'left', 'right']).describe('up|down|left|right'), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) .optional() .describe('seconds'), - delta: z.number().min(0, { message: 'Delta must be non-negative' }).optional(), + distance: z.number().positive({ message: 'Distance must be greater than 0' }).optional(), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('seconds'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); @@ -68,40 +71,57 @@ export function createSwipeExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'swipe'; - const { simulatorId, x1, y1, x2, y2, duration, delta, preDelay, postDelay } = params; - const baseAction = { type: 'swipe' as const }; - const fullAction = { + const { simulatorId, withinElementRef, direction, duration, distance, preDelay, postDelay } = + params; + const action = { type: 'swipe' as const, - from: { x: x1, y: y1 }, - to: { x: x2, y: y2 }, + withinElementRef, + direction, ...(duration !== undefined ? { durationSeconds: duration } : {}), }; + const resolution = resolveElementRef(simulatorId, withinElementRef, 'swipeWithin'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const points = getRuntimeElementSwipePoints(resolution.element, direction); + if (!points.ok) { + const uiError = createUiAutomationRecoverableError({ + code: 'TARGET_NOT_ACTIONABLE', + message: points.message, + elementRef: withinElementRef, + }); + return createUiActionFailureResult(action, simulatorId, points.message, { uiError }); + } + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, toolName, }); if (guard.blockedMessage) { - return createUiActionFailureResult(baseAction, simulatorId, guard.blockedMessage); + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } const commandArgs = [ 'swipe', '--start-x', - String(x1), + String(points.from.x), '--start-y', - String(y1), + String(points.from.y), '--end-x', - String(x2), + String(points.to.x), '--end-y', - String(y2), + String(points.to.y), ]; if (duration !== undefined) { commandArgs.push('--duration', String(duration)); } - if (delta !== undefined) { - commandArgs.push('--delta', String(delta)); + if (distance !== undefined) { + commandArgs.push('--delta', String(distance)); } if (preDelay !== undefined) { commandArgs.push('--pre-delay', String(preDelay)); @@ -110,26 +130,33 @@ export function createSwipeExecutor( commandArgs.push('--post-delay', String(postDelay)); } - const optionsText = duration ? ` duration=${duration}s` : ''; + const optionsText = duration !== undefined ? ` duration=${duration}s` : ''; log( 'info', - `${LOG_PREFIX}/${toolName}: Starting swipe (${x1},${y1})->(${x2},${y2})${optionsText} on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting ${direction} swipe within ${withinElementRef}${optionsText} on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'swipe', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(fullAction, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => 'Failed to simulate swipe.', + axeFailureMessage: () => + `Failed to simulate ${direction} swipe within ${withinElementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); - return createUiActionFailureResult(baseAction, simulatorId, failure.message, { + return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef: withinElementRef, + }), }); } }; diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index 960d8bc7f..245d7ead2 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -11,8 +11,12 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; -import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { defaultAxeHelpers } from './shared/axe-command.ts'; +import { + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, +} from './shared/semantic-tap.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -20,95 +24,42 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; -const baseTapSchema = z.object({ +const tapSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z - .number() - .int({ message: 'X coordinate must be an integer' }) - .optional() - .describe( - 'Fallback tap X coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', - ), - y: z - .number() - .int({ message: 'Y coordinate must be an integer' }) - .optional() - .describe( - 'Fallback tap Y coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', - ), - id: z - .string() - .min(1, { message: 'Id must be non-empty' }) - .optional() - .describe('Recommended tap target: accessibility element id (AXUniqueId).'), - label: z - .string() - .min(1, { message: 'Label must be non-empty' }) - .optional() - .describe('Recommended when unique: accessibility label (AXLabel).'), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('seconds'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); -const tapSchema = baseTapSchema.superRefine((values, ctx) => { - const hasX = values.x !== undefined; - const hasY = values.y !== undefined; - const hasId = values.id !== undefined; - const hasLabel = values.label !== undefined; - - if (!hasX && !hasY && hasId && hasLabel) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['id'], - message: 'Provide either id or label, not both.', - }); - } - - if (hasX !== hasY) { - if (!hasX) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['x'], - message: 'X coordinate is required when y is provided.', - }); - } - if (!hasY) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['y'], - message: 'Y coordinate is required when x is provided.', - }); - } - } - - if (!hasX && !hasY && !hasId && !hasLabel) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['x'], - message: 'Provide an element id/label (recommended) or x/y coordinates as fallback.', - }); - } -}); - type TapParams = z.infer; type TapResult = UiActionResultDomainResult; -const publicSchemaObject = z.strictObject(baseTapSchema.omit({ simulatorId: true } as const).shape); +const publicSchemaObject = z.strictObject(tapSchema.omit({ simulatorId: true } as const).shape); const LOG_PREFIX = '[AXe]'; +function delayMs(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} + export function createTapExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, @@ -116,15 +67,15 @@ export function createTapExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'tap'; - const { simulatorId, x, y, id, label, preDelay, postDelay } = params; - const action = - x !== undefined && y !== undefined - ? { type: 'tap' as const, x, y } - : id !== undefined - ? { type: 'tap' as const, id } - : label !== undefined - ? { type: 'tap' as const, label } - : { type: 'tap' as const }; + const { simulatorId, elementRef, preDelay, postDelay } = params; + const action = { type: 'tap' as const, elementRef }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'tap'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -135,55 +86,57 @@ export function createTapExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - let targetDescription = ''; - let actionDescription = ''; - let usesCoordinates = false; - const commandArgs = ['tap']; - - if (x !== undefined && y !== undefined) { - usesCoordinates = true; - targetDescription = `(${x}, ${y})`; - actionDescription = `Tap at ${targetDescription}`; - commandArgs.push('-x', String(x), '-y', String(y)); - } else if (id !== undefined) { - targetDescription = `element id "${id}"`; - actionDescription = `Tap on ${targetDescription}`; - commandArgs.push('--id', id); - } else if (label !== undefined) { - targetDescription = `element label "${label}"`; - actionDescription = `Tap on ${targetDescription}`; - commandArgs.push('--label', label); - } else { - return createUiActionFailureResult( - action, - simulatorId, - 'Parameter validation failed: Missing tap target', - ); - } - - if (preDelay !== undefined) { - commandArgs.push('--pre-delay', String(preDelay)); + const usesTouchActivation = resolution.element.publicElement.role === 'switch'; + const extraArgs: string[] = []; + if (!usesTouchActivation && preDelay !== undefined) { + extraArgs.push('--pre-delay', String(preDelay)); } - if (postDelay !== undefined) { - commandArgs.push('--post-delay', String(postDelay)); + if (!usesTouchActivation && postDelay !== undefined) { + extraArgs.push('--post-delay', String(postDelay)); } - - log('info', `${LOG_PREFIX}/${toolName}: Starting for ${targetDescription} on ${simulatorId}`); + const tapCommand = createSemanticTapCommand( + resolution.element, + elementRef, + extraArgs, + resolution.snapshot.elements, + ); + + log( + 'info', + `${LOG_PREFIX}/${toolName}: Starting for ${tapCommand.targetDescription} on ${simulatorId}`, + ); try { - await executeAxeCommand(commandArgs, simulatorId, 'tap', executor, axeHelpers); + if (usesTouchActivation && preDelay !== undefined) { + await delayMs(preDelay * 1000); + } + await executeSemanticTapWithAmbiguityFallback({ + command: tapCommand, + simulatorId, + executor, + axeHelpers, + }); + if (usesTouchActivation && postDelay !== undefined) { + await delayMs(postDelay * 1000); + } + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [ - guard.warningText, - usesCoordinates ? getSnapshotUiWarning(simulatorId) : null, - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => `Failed to simulate ${actionDescription.toLowerCase()}.`, + axeFailureMessage: () => `Failed to simulate tap on elementRef ${elementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: tapCommand.usedSelector ? 'UI_STATE_CHANGED' : 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; @@ -204,7 +157,7 @@ export async function tapLogic( export const schema = getSessionAwareToolSchemaShape({ sessionAware: publicSchemaObject, - legacy: baseTapSchema, + legacy: tapSchema, }); export const handler = createSessionAwareTool({ diff --git a/src/mcp/tools/ui-automation/touch.ts b/src/mcp/tools/ui-automation/touch.ts index 650dce8e2..277bbb53b 100644 --- a/src/mcp/tools/ui-automation/touch.ts +++ b/src/mcp/tools/ui-automation/touch.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Touch * - * Perform touch down/up events at specific coordinates. - * Use snapshot_ui for precise coordinates (don't guess from screenshots). + * Performs touch down/up events on a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,7 +17,8 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -26,27 +26,43 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; -const touchSchema = z.object({ +const touchSchemaObject = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z.number().int({ message: 'X coordinate must be an integer' }), - y: z.number().int({ message: 'Y coordinate must be an integer' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), down: z.boolean().optional(), up: z.boolean().optional(), delay: z .number() .min(0, { message: 'Delay must be non-negative' }) + .max(10, { message: 'Delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); -type TouchParams = z.infer; +function refineTouchDelay(value: z.infer, ctx: z.RefinementCtx): void { + if (value.delay !== undefined && !(value.down === true && value.up === true)) { + ctx.addIssue({ + code: 'custom', + path: ['delay'], + message: 'Delay can only be used when both down and up are true', + }); + } +} + +const touchSchema = touchSchemaObject.superRefine(refineTouchDelay); + +type TouchParams = z.infer; type TouchResult = UiActionResultDomainResult; -const publicSchemaObject = z.strictObject(touchSchema.omit({ simulatorId: true } as const).shape); +const publicSchemaObject = z.strictObject( + touchSchemaObject.omit({ simulatorId: true } as const).shape, +); const LOG_PREFIX = '[AXe]'; @@ -57,29 +73,41 @@ export function createTouchExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'touch'; - const { simulatorId, x, y, down, up, delay } = params; - const actionText = down && up ? 'touch down+up' : down ? 'touch down' : 'touch up'; - const baseAction = { type: 'touch' as const }; - const fullAction = { type: 'touch' as const, event: actionText, x, y }; + const { simulatorId, elementRef, down, up, delay } = params; + const actionText = + down && up ? 'touch down+up' : down ? 'touch down' : up ? 'touch up' : undefined; + const action = { + type: 'touch' as const, + elementRef, + ...(actionText ? { event: actionText } : {}), + }; if (!down && !up) { return createUiActionFailureResult( - baseAction, + action, simulatorId, 'At least one of "down" or "up" must be true', ); } + const resolution = resolveElementRef(simulatorId, elementRef, 'touch'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, toolName, }); if (guard.blockedMessage) { - return createUiActionFailureResult(baseAction, simulatorId, guard.blockedMessage); + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const commandArgs = ['touch', '-x', String(x), '-y', String(y)]; + const center = getRuntimeElementActivationPoint(resolution.element); + const commandArgs = ['touch', '-x', String(center.x), '-y', String(center.y)]; if (down) { commandArgs.push('--down'); } @@ -92,23 +120,29 @@ export function createTouchExecutor( log( 'info', - `${LOG_PREFIX}/${toolName}: Starting ${actionText} at (${x}, ${y}) on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting ${actionText ?? 'touch'} on elementRef ${elementRef} on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'touch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(fullAction, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to execute touch event.', }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); - return createUiActionFailureResult(baseAction, simulatorId, failure.message, { + return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; @@ -129,7 +163,7 @@ export async function touchLogic( export const schema = getSessionAwareToolSchemaShape({ sessionAware: publicSchemaObject, - legacy: touchSchema, + legacy: touchSchemaObject, }); export const handler = createSessionAwareTool({ diff --git a/src/mcp/tools/ui-automation/type_text.ts b/src/mcp/tools/ui-automation/type_text.ts index a18c09208..d4999df97 100644 --- a/src/mcp/tools/ui-automation/type_text.ts +++ b/src/mcp/tools/ui-automation/type_text.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Type Text * - * Types text into the iOS Simulator using keyboard input. - * Supports standard US keyboard characters. + * Types text into a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,22 +17,47 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, +} from './shared/semantic-tap.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const LOG_PREFIX = '[AXe]'; +const AXE_UNSUPPORTED_TEXT_MESSAGE = + 'Text contains characters unsupported by AXe typing. AXe type supports US keyboard characters only.'; + +function containsUnsupportedAxeTypeText(text: string): boolean { + for (const character of text) { + const codePoint = character.codePointAt(0); + if (codePoint === undefined || codePoint < 0x20 || codePoint > 0x7e) { + return true; + } + } + + return false; +} const typeTextSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), text: z.string().min(1, { message: 'Text cannot be empty' }), + replaceExisting: z + .boolean() + .optional() + .describe('Select and replace existing field contents before typing'), }); type TypeTextParams = z.infer; @@ -50,8 +74,15 @@ export function createTypeTextExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'type_text'; - const { simulatorId, text } = params; - const action = { type: 'type-text' as const }; + const { simulatorId, elementRef, text, replaceExisting } = params; + const action = { type: 'type-text' as const, elementRef, textLength: text.length }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'typeText'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -62,24 +93,82 @@ export function createTypeTextExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const commandArgs = ['type', text]; + if (containsUnsupportedAxeTypeText(text)) { + return createUiActionFailureResult(action, simulatorId, AXE_UNSUPPORTED_TEXT_MESSAGE, { + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: AXE_UNSUPPORTED_TEXT_MESSAGE, + recoveryHint: 'Use only US keyboard characters supported by AXe type.', + elementRef, + }), + }); + } + + const focusCommand = createSemanticTapCommand( + resolution.element, + elementRef, + [], + resolution.snapshot.elements, + ); + const typeCommandArgs = ['type', text]; log( 'info', - `${LOG_PREFIX}/${toolName}: Starting type "${text.substring(0, 20)}..." on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting type into elementRef ${elementRef}, length=${text.length} on ${simulatorId}`, ); try { - await executeAxeCommand(commandArgs, simulatorId, 'type', executor, axeHelpers); + await executeSemanticTapWithAmbiguityFallback({ + command: focusCommand, + simulatorId, + executor, + axeHelpers, + }); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to focus elementRef ${elementRef} before typing.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Focus failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message, { + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), + }); + } + + try { + if (replaceExisting === true) { + await executeAxeCommand( + ['key-combo', '--modifiers', '227', '--key', '4'], + simulatorId, + 'key-combo', + executor, + axeHelpers, + ); + } + await executeAxeCommand(typeCommandArgs, simulatorId, 'type', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => 'Failed to simulate text typing.', + axeFailureMessage: () => `Failed to type text into elementRef ${elementRef}.`, }); - log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + log('error', `${LOG_PREFIX}/${toolName}: Typing failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { - details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; diff --git a/src/mcp/tools/ui-automation/wait_for_ui.ts b/src/mcp/tools/ui-automation/wait_for_ui.ts new file mode 100644 index 000000000..3fb55ccbc --- /dev/null +++ b/src/mcp/tools/ui-automation/wait_for_ui.ts @@ -0,0 +1,365 @@ +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { + RuntimeElementRoleV1, + RuntimeElementV1, + RuntimeSnapshotRecord, + UiWaitMatch, +} from '../../../types/ui-snapshot.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import { + parseRuntimeSnapshotResponse, + RuntimeSnapshotParseError, +} from './shared/runtime-snapshot.ts'; +import { + createCaptureFailureResult, + createCaptureSuccessResult, + mapAxeCommandError, + setCaptureStructuredOutput, +} from './shared/domain-result.ts'; +import { + createWaitTimeoutError, + evaluateElementPredicate, + evaluateSettledPredicate, + evaluateTextContainsPredicate, + hasSelectorFields, + resolveElementSelector, + selectorFromParams, + waitPredicates, +} from './shared/wait-predicate.ts'; +import type { ResolvedWaitSelector, SettledTracker } from './shared/wait-predicate.ts'; + +const DEFAULT_TIMEOUT_MS = 5_000; +const DEFAULT_POLL_INTERVAL_MS = 250; +const DEFAULT_SETTLED_DURATION_MS = 500; +const LOG_PREFIX = '[AXe]'; + +const waitForUiSchemaShape = { + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + predicate: z.enum(waitPredicates), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }).optional(), + identifier: z.string().min(1, { message: 'identifier must be non-empty' }).optional(), + label: z.string().min(1, { message: 'label must be non-empty' }).optional(), + role: z + .enum([ + 'application', + 'button', + 'cell', + 'image', + 'keyboard-key', + 'list', + 'menu', + 'other', + 'scroll-view', + 'slider', + 'switch', + 'tab', + 'text', + 'text-field', + 'window', + ] satisfies RuntimeElementRoleV1[]) + .optional(), + value: z.string().min(1, { message: 'value must be non-empty' }).optional(), + text: z + .string() + .min(1, { message: 'text must be non-empty' }) + .refine((value) => value.replace(/\s+/g, ' ').trim().length > 0, { + message: 'text must contain non-whitespace characters', + }) + .optional(), + timeoutMs: z + .number() + .int({ message: 'timeoutMs must be an integer number of milliseconds' }) + .min(0, { message: 'timeoutMs must be non-negative' }) + .optional() + .describe('milliseconds'), + pollIntervalMs: z + .number() + .int({ message: 'pollIntervalMs must be an integer number of milliseconds' }) + .min(1, { message: 'pollIntervalMs must be at least 1 millisecond' }) + .optional() + .describe('milliseconds'), + settledDurationMs: z + .number() + .int({ message: 'settledDurationMs must be an integer number of milliseconds' }) + .min(0, { message: 'settledDurationMs must be non-negative' }) + .optional() + .describe('milliseconds'), +}; + +const waitForUiSchema = z.strictObject(waitForUiSchemaShape).superRefine((value, ctx) => { + if ( + value.predicate !== 'settled' && + value.predicate !== 'textContains' && + !hasSelectorFields(value) + ) { + ctx.addIssue({ + code: 'custom', + path: ['elementRef'], + message: `${value.predicate} waits require at least one selector field`, + }); + } + + if (value.predicate === 'textContains' && value.text === undefined) { + ctx.addIssue({ + code: 'custom', + path: ['text'], + message: 'textContains waits require text', + }); + } + + if (value.predicate !== 'textContains' && value.text !== undefined) { + ctx.addIssue({ + code: 'custom', + path: ['text'], + message: 'text is only supported for textContains waits', + }); + } +}); + +type WaitForUiParams = z.infer; +type WaitForUiResult = CaptureResultDomainResult; + +interface WaitTiming { + now: () => number; + sleep: (durationMs: number) => Promise; +} + +function defaultSleep(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} + +function createWaitMatch( + predicate: WaitForUiParams['predicate'], + matches: RuntimeElementV1[] | undefined, +): UiWaitMatch | undefined { + if (predicate === 'settled' || matches === undefined) { + return undefined; + } + return { predicate, matches }; +} + +export function createWaitForUiExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + timing: WaitTiming = { now: Date.now, sleep: defaultSleep }, +): NonStreamingExecutor { + return async (params) => { + const toolName = 'wait_for_ui'; + const { simulatorId, predicate, elementRef, text } = params; + const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const pollIntervalMs = params.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; + const settledDurationMs = params.settledDurationMs ?? DEFAULT_SETTLED_DURATION_MS; + const startedAtMs = timing.now(); + const deadlineMs = startedAtMs + timeoutMs; + let selector: ResolvedWaitSelector | null = null; + if (predicate !== 'settled') { + if (elementRef) { + const selectorResolution = resolveElementSelector(simulatorId, elementRef, startedAtMs); + if (!selectorResolution.ok) { + return createCaptureFailureResult(simulatorId, selectorResolution.error.message, { + uiError: selectorResolution.error, + }); + } + selector = selectorResolution.selector; + } else { + selector = selectorFromParams(params); + } + } + + if (predicate !== 'settled' && predicate !== 'textContains' && !selector) { + const message = `${predicate} waits require at least one selector field.`; + return createCaptureFailureResult(simulatorId, message, { + uiError: { + code: 'TARGET_NOT_FOUND', + message, + recoveryHint: + 'Provide elementRef, identifier, label, role, or value, or use settled for selector-free waits.', + }, + }); + } + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + clearRuntimeSnapshot(simulatorId); + return createCaptureFailureResult(simulatorId, guard.blockedMessage); + } + + let latestSnapshot: RuntimeSnapshotRecord | null = null; + let latestCandidates: RuntimeElementV1[] = []; + let lastParseError: RuntimeSnapshotParseError | null = null; + let lastPollError: string | null = null; + const settledTracker: SettledTracker = { signature: null, stableSinceMs: null }; + + log('info', `${LOG_PREFIX}/${toolName}: Waiting for ${predicate} on ${simulatorId}`); + + while (true) { + try { + const responseText = await executeAxeCommand( + ['describe-ui'], + simulatorId, + 'describe-ui', + executor, + axeHelpers, + ); + const nowMs = timing.now(); + const snapshot = parseRuntimeSnapshotResponse({ simulatorId, responseText, nowMs }); + latestSnapshot = snapshot; + lastParseError = null; + lastPollError = null; + recordRuntimeSnapshot(snapshot); + + const matched = + predicate === 'settled' + ? evaluateSettledPredicate({ + snapshot, + nowMs, + settledDurationMs, + tracker: settledTracker, + }) + : predicate === 'textContains' && !selector + ? evaluateTextContainsPredicate({ snapshot, text: text! }) + : evaluateElementPredicate({ predicate, selector: selector!, snapshot, text }); + + if (typeof matched === 'boolean') { + if (matched) { + return createCaptureSuccessResult(simulatorId, { + capture: snapshot.payload, + warnings: [guard.warningText], + }); + } + } else { + latestCandidates = matched.candidates ?? []; + if (matched.uiError) { + return createCaptureFailureResult(simulatorId, matched.uiError.message, { + warnings: [guard.warningText], + uiError: matched.uiError, + capture: snapshot.payload, + }); + } + if (matched.matched) { + return createCaptureSuccessResult(simulatorId, { + capture: snapshot.payload, + warnings: [guard.warningText], + waitMatch: createWaitMatch(predicate, matched.candidates), + }); + } + } + } catch (error) { + if (error instanceof RuntimeSnapshotParseError) { + lastParseError = error; + lastPollError = null; + } else { + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => 'Failed to poll runtime UI snapshot.', + }); + lastPollError = failure.message; + lastParseError = null; + } + } + + const nowMs = timing.now(); + if (nowMs >= deadlineMs) { + break; + } + + await timing.sleep(Math.min(pollIntervalMs, deadlineMs - nowMs)); + } + + if (latestSnapshot) { + const uiError = createWaitTimeoutError({ + predicate, + timeoutMs, + selector: selector ?? undefined, + candidates: latestCandidates, + }); + return createCaptureFailureResult(simulatorId, uiError.message, { + warnings: [guard.warningText], + uiError, + capture: latestSnapshot.payload, + }); + } + + clearRuntimeSnapshot(simulatorId); + if (lastParseError) { + const message = 'Failed to parse runtime UI snapshot while waiting for UI.'; + return createCaptureFailureResult(simulatorId, message, { + details: [lastParseError.message], + uiError: { + code: 'SNAPSHOT_PARSE_FAILED', + message, + recoveryHint: 'Retry after the app is fully launched and responsive.', + }, + }); + } + + const message = + lastPollError ?? `Timed out after ${timeoutMs}ms waiting for UI predicate '${predicate}'.`; + return createCaptureFailureResult(simulatorId, message, { + uiError: { + code: lastPollError ? 'ACTION_FAILED' : 'WAIT_TIMEOUT', + message, + recoveryHint: 'Retry after the app is fully launched and responsive.', + ...(lastPollError ? {} : { timeoutMs }), + }, + }); + }; +} + +export async function wait_for_uiLogic( + params: WaitForUiParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + timing?: WaitTiming, +): Promise { + const ctx = getHandlerContext(); + const executeWaitForUi = createWaitForUiExecutor(executor, axeHelpers, debuggerManager, timing); + const result = await executeWaitForUi(params); + + setCaptureStructuredOutput(ctx, result, { headerTitle: 'Wait for UI' }); + + ctx.nextStepParams = { + snapshot_ui: { simulatorId: params.simulatorId }, + wait_for_ui: { simulatorId: params.simulatorId, predicate: 'settled' }, + }; +} + +const publicSchemaObject = z.strictObject( + z.object(waitForUiSchemaShape).omit({ simulatorId: true } as const).shape, +); + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: waitForUiSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(waitForUiSchema), + logicFunction: (params: WaitForUiParams, executor: CommandExecutor) => + wait_for_uiLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/rendering/render.ts b/src/rendering/render.ts index 2b0d6fded..ac72786a7 100644 --- a/src/rendering/render.ts +++ b/src/rendering/render.ts @@ -95,6 +95,7 @@ function createRenderHooks( outputStyle?: OutputStyle; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; }, ): RenderSessionHooks { const suppressWarnings = sessionStore.get('suppressWarnings'); @@ -118,6 +119,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }), }; case 'raw': @@ -144,6 +146,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }); if (text) { process.stdout.write(text); @@ -158,6 +161,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }); return { @@ -179,6 +183,7 @@ export interface RenderSessionOptions { outputStyle?: OutputStyle; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } export function createRenderSession( @@ -195,7 +200,7 @@ export function renderTranscript( strategy: RenderStrategy, options?: Pick< RenderSessionOptions, - 'runtime' | 'outputStyle' | 'filePathRenderStyle' | 'includeHeaderDetails' + 'runtime' | 'outputStyle' | 'filePathRenderStyle' | 'includeHeaderDetails' | 'includeNextSteps' >, ): string { return createRenderHooks(strategy, { ...options, interactive: false }).finalize(input); diff --git a/src/runtime/__tests__/tool-invoker.test.ts b/src/runtime/__tests__/tool-invoker.test.ts index 73a5e0fb8..3beee0172 100644 --- a/src/runtime/__tests__/tool-invoker.test.ts +++ b/src/runtime/__tests__/tool-invoker.test.ts @@ -664,6 +664,50 @@ describe('DefaultToolInvoker next steps post-processing', () => { expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id 123'); }); + it('prefers the current workflow when normalizing duplicate next-step tool names', async () => { + const directHandler = emitNextStepsHandler('ok', [ + { + tool: 'screenshot', + label: 'Take screenshot', + params: { simulatorId: '123' }, + }, + ]); + + const catalog = createToolCatalog([ + makeTool({ + id: 'snapshot_ui', + cliName: 'snapshot-ui', + mcpName: 'snapshot_ui', + workflow: 'ui-automation', + stateful: false, + handler: directHandler, + }), + makeTool({ + id: 'screenshot', + cliName: 'screenshot', + mcpName: 'screenshot', + workflow: 'simulator', + stateful: false, + handler: emitHandler('simulator screenshot'), + }), + makeTool({ + id: 'screenshot', + cliName: 'screenshot', + mcpName: 'screenshot', + workflow: 'ui-automation', + stateful: false, + handler: emitHandler('ui screenshot'), + }), + ]); + + const invoker = new DefaultToolInvoker(catalog); + const response = await invokeAndFinalize(invoker, 'snapshot-ui', {}, { runtime: 'cli' }); + + const text = response.content.map((c) => (c.type === 'text' ? c.text : '')).join('\n'); + expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id "123"'); + expect(text).not.toContain('xcodebuildmcp simulator screenshot --simulator-id "123"'); + }); + it('injects manifest template next steps from dynamic nextStepParams when response omits nextSteps', async () => { const directHandler = emitNextStepsHandler('ok', undefined, { snapshot_ui: { simulatorId: '12345678-1234-4234-8234-123456789012' }, diff --git a/src/runtime/tool-invoker.ts b/src/runtime/tool-invoker.ts index 75935866c..1f82f2abe 100644 --- a/src/runtime/tool-invoker.ts +++ b/src/runtime/tool-invoker.ts @@ -135,13 +135,31 @@ function mergeTemplateAndResponseNextSteps( }); } -function normalizeNextSteps(steps: NextStep[], catalog: ToolCatalog): NextStep[] { +function getNextStepTarget(params: { + catalog: ToolCatalog; + mcpName: string; + preferredWorkflow: string; +}): ToolDefinition | null { + return ( + params.catalog.tools.find( + (tool) => + tool.mcpName.toLowerCase() === params.mcpName.toLowerCase().trim() && + tool.workflow === params.preferredWorkflow, + ) ?? params.catalog.getByMcpName(params.mcpName) + ); +} + +function normalizeNextSteps( + steps: NextStep[], + catalog: ToolCatalog, + preferredWorkflow: string, +): NextStep[] { return steps.map((step) => { if (!step.tool) { return step; } - const target = catalog.getByMcpName(step.tool); + const target = getNextStepTarget({ catalog, mcpName: step.tool, preferredWorkflow }); if (!target) { return step; } @@ -238,7 +256,7 @@ export function postProcessSession(params: { return; } - const normalized = normalizeNextSteps(finalSteps, catalog); + const normalized = normalizeNextSteps(finalSteps, catalog, tool.workflow); if (normalized.length > 0) { session.setNextSteps?.(normalized, runtime); diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt index b04be39af..adaba8754 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate long press at (100, 400). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt index 678f04137..dfce2922b 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt @@ -3,8 +3,4 @@ Simulator: -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Long press at (100, 400) for 500ms simulated successfully. +✅ Long press on elementRef e3 for 500ms simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt index 412aea1fd..0dc63b95f 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt @@ -3,586 +3,36 @@ Simulator: -Accessibility Hierarchy - ```json - [ - { - "AXFrame" : "{{0, 0}, {402, 874}}", - "AXUniqueId" : null, - "frame" : { - "y" : 0, - "x" : 0, - "width" : 402, - "height" : 874 - }, - "role_description" : "application", - "AXLabel" : "Calculator", - "content_required" : false, - "type" : "Application", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXApplication", - "children" : [ - { - "AXFrame" : "{{344, 250.5}, {34, 67}}", - "AXUniqueId" : null, - "frame" : { - "y" : 250.5, - "x" : 344, - "width" : 34, - "height" : 67 - }, - "role_description" : "text", - "AXLabel" : "0", - "content_required" : false, - "type" : "StaticText", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXStaticText", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "C", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Âą", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "%", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Ãˇ", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "7", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "8", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "9", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "×", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "4", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "5", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "6", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "-", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "1", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "2", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "3", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "+", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "0", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : ".", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "=", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - } - ], - "subrole" : null, - "pid" : - } - ] - ``` +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|Âą| + e5|tap|button|%| + e6|tap|button|Ãˇ| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| Tips - - Use frame coordinates for tap/swipe (center: x+width/2, y+height/2) - - If a debugger is attached, ensure the app is running (not stopped on breakpoints) - - Screenshots are for visual verification only + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. -✅ Accessibility hierarchy retrieved successfully. +✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: -1. Refresh after layout changes: xcodebuildmcp simulator snapshot-ui --simulator-id -2. Tap on element: xcodebuildmcp ui-automation tap --simulator-id --x 0 --y 0 -3. Take screenshot for verification: xcodebuildmcp simulator screenshot --simulator-id +1. Refresh after layout changes: xcodebuildmcp simulator snapshot-ui --simulator-id "" +2. Wait for UI to settle: xcodebuildmcp ui-automation wait-for-ui --simulator-id "SIMULATOR_UUID" --predicate "settled" +3. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id "" --element-ref "e3" +4. Take screenshot for verification: xcodebuildmcp simulator screenshot --simulator-id "" diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt index 4716920bc..935b744b8 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate swipe. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt new file mode 100644 index 000000000..3cdc8d292 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt @@ -0,0 +1,11 @@ + +👆 Swipe + + Simulator: + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'swipeWithin'. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt deleted file mode 100644 index f78015c16..000000000 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt +++ /dev/null @@ -1,10 +0,0 @@ - -👆 Swipe - - Simulator: - -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Swipe from (200, 400) to (200, 200) simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt index d45f020a2..726bd1eb3 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate tap at (100, 100). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt index d4a41a58c..bc58f3e30 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt @@ -3,8 +3,4 @@ Simulator: -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Tap at (100, 400) simulated successfully. +✅ Tap on elementRef e3 simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt index 751d9f3fd..8a7e5ad50 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to execute touch event. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt index 5197f0e74..ea972a7a7 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt @@ -3,8 +3,4 @@ Simulator: -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Touch event (touch down+up) at (100, 400) executed successfully. +✅ Touch event (touch down+up) on elementRef e3 executed successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt index bba706413..ccdd1e70e 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate text typing. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt new file mode 100644 index 000000000..5d72e95f4 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt @@ -0,0 +1,11 @@ + +âŒ¨ī¸ Type Text + + Simulator: + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'typeText'. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--success.txt deleted file mode 100644 index 72a6ac50b..000000000 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--success.txt +++ /dev/null @@ -1,6 +0,0 @@ - -âŒ¨ī¸ Type Text - - Simulator: - -✅ Text typing simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt new file mode 100644 index 000000000..1e74f961e --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt @@ -0,0 +1,36 @@ + +âš™ī¸ Wait for UI + + Simulator: + +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|Âą| + e5|tap|button|%| + e6|tap|button|Ãˇ| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| + +Tips + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. + +✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. + +Next steps: +1. Refresh runtime snapshot: xcodebuildmcp simulator snapshot-ui --simulator-id "SIMULATOR_UUID" +2. Wait again: xcodebuildmcp ui-automation wait-for-ui --simulator-id "SIMULATOR_UUID" --predicate "settled" diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json index b75c46872..4bef73811 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--error-no-simulator.json @@ -2,27 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate long press at (100, 400).", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { "type": "long-press", - "x": 100, - "y": 400, + "elementRef": "e3", "durationMs": 500 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json index 32a7bd325..8621e1a67 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json @@ -9,20 +9,11 @@ }, "action": { "type": "long-press", - "x": 100, - "y": 400, + "elementRef": "e3", "durationMs": 500 }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json index 68cb4caaf..de6328de9 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json @@ -11,393 +11,34 @@ "simulatorId": "" }, "capture": { - "type": "ui-hierarchy", - "uiHierarchy": [ - { - "AXFrame": "{{0, 0}, {402, 874}}", - "AXUniqueId": null, - "frame": { "x": 0, "y": 0, "width": 402, "height": 874 }, - "role_description": "application", - "AXLabel": "Calculator", - "content_required": false, - "type": "Application", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXApplication", - "children": [ - { - "AXFrame": "{{344, 250.5}, {34, 67}}", - "AXUniqueId": null, - "frame": { "x": 344, "y": 250.5, "width": 34, "height": 67 }, - "role_description": "text", - "AXLabel": "0", - "content_required": false, - "type": "StaticText", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXStaticText", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 357.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "C", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 357.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "Âą", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 357.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "%", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 357.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "Ãˇ", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 449.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "7", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 449.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "8", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 449.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "9", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 449.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "×", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 541.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "4", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 541.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "5", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 541.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "6", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 541.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "-", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 19.5, "y": 633.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "1", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 633.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "2", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 633.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": "3", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 633.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "+", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 113.2, "y": 725.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "0", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { "x": 206.5, "y": 725.5, "width": 82.7, "height": 81 }, - "role_description": "button", - "AXLabel": ".", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { "x": 300.2, "y": 725.5, "width": 82.3, "height": 81 }, - "role_description": "button", - "AXLabel": "=", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - } - ], - "subrole": null, - "pid": 99999 - } - ] + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e10|tap|button|×||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e14|tap|button|-||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e18|tap|button|+||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e21|tap|button|=||" + ], + "scroll": [], + "udid": "" } - }, - "nextSteps": [ - "Refresh after layout changes: snapshot_ui({ simulatorId: \"\" })", - "Tap on element: tap({ simulatorId: \"\", x: 0, y: 0 })", - "Take screenshot for verification: screenshot({ simulatorId: \"\" })" - ] + } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json index 6968362b0..29c92f1c9 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate swipe.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "swipe" + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-not-actionable.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-not-actionable.json new file mode 100644 index 000000000..d7d3c80b9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'swipeWithin'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'swipeWithin'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json deleted file mode 100644 index d3a04cc16..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "2", - "didError": false, - "error": null, - "data": { - "summary": { - "status": "SUCCEEDED" - }, - "action": { - "type": "swipe", - "from": { - "x": 200, - "y": 400 - }, - "to": { - "x": 200, - "y": 200 - } - }, - "artifacts": { - "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] - } - } -} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json index 00556fe82..ba680e80d 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--error-no-simulator.json @@ -2,26 +2,22 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate tap at (100, 100).", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { "type": "tap", - "x": 100, - "y": 100 + "elementRef": "e3" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json index fa6e9978e..7b215e9c7 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json @@ -9,19 +9,10 @@ }, "action": { "type": "tap", - "x": 100, - "y": 400 + "elementRef": "e3" }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json index 4cbcc83fb..9b589c7d6 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to execute touch event.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "touch" + "type": "touch", + "elementRef": "e3", + "event": "touch down+up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json index 0e708ad44..365d4ca63 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json @@ -9,20 +9,11 @@ }, "action": { "type": "touch", - "event": "touch down+up", - "x": 100, - "y": 400 + "elementRef": "e3", + "event": "touch down+up" }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json index 30d9ab14d..ffb164bb3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate text typing.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "type-text" + "type": "type-text", + "elementRef": "e3", + "textLength": 5 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-not-actionable.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-not-actionable.json new file mode 100644 index 000000000..e403f06f9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'typeText'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "type-text", + "elementRef": "e3", + "textLength": 5 + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'typeText'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--success.json deleted file mode 100644 index a2686f68c..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/type-text--success.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "2", - "didError": false, - "error": null, - "data": { - "summary": { - "status": "SUCCEEDED" - }, - "action": { - "type": "type-text" - }, - "artifacts": { - "simulatorId": "" - } - } -} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json new file mode 100644 index 000000000..de6328de9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json @@ -0,0 +1,44 @@ +{ + "schema": "xcodebuildmcp.output.capture-result", + "schemaVersion": "2", + "didError": false, + "error": null, + "data": { + "summary": { + "status": "SUCCEEDED" + }, + "artifacts": { + "simulatorId": "" + }, + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e10|tap|button|×||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e14|tap|button|-||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e18|tap|button|+||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e21|tap|button|=||" + ], + "scroll": [], + "udid": "" + } + } +} diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt index 10acee620..3eb76d15a 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Long Press -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate long press at (100, 400). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt index faeec8350..46711e63c 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt @@ -1,8 +1,4 @@ 👆 Long Press -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Long press at (100, 400) for 500ms simulated successfully. +✅ Long press on elementRef e3 for 500ms simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt index 5766d2720..014b6969e 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt @@ -1,586 +1,36 @@ 📷 Snapshot UI -Accessibility Hierarchy - ```json - [ - { - "AXFrame" : "{{0, 0}, {402, 874}}", - "AXUniqueId" : null, - "frame" : { - "y" : 0, - "x" : 0, - "width" : 402, - "height" : 874 - }, - "role_description" : "application", - "AXLabel" : "Calculator", - "content_required" : false, - "type" : "Application", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXApplication", - "children" : [ - { - "AXFrame" : "{{344, 250.5}, {34, 67}}", - "AXUniqueId" : null, - "frame" : { - "y" : 250.5, - "x" : 344, - "width" : 34, - "height" : 67 - }, - "role_description" : "text", - "AXLabel" : "0", - "content_required" : false, - "type" : "StaticText", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXStaticText", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "C", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Âą", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "%", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "Ãˇ", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "7", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "8", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "9", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "×", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "4", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "5", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "6", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "-", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "1", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "2", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "3", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "+", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "0", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : ".", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "=", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - } - ], - "subrole" : null, - "pid" : - } - ] - ``` +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|Âą| + e5|tap|button|%| + e6|tap|button|Ãˇ| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| Tips - - Use frame coordinates for tap/swipe (center: x+width/2, y+height/2) - - If a debugger is attached, ensure the app is running (not stopped on breakpoints) - - Screenshots are for visual verification only + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. -✅ Accessibility hierarchy retrieved successfully. +✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: 1. Refresh after layout changes: snapshot_ui({ simulatorId: "" }) -2. Tap on element: tap({ simulatorId: "", x: 0, y: 0 }) -3. Take screenshot for verification: screenshot({ simulatorId: "" }) +2. Wait for UI to settle: wait_for_ui({ simulatorId: "SIMULATOR_UUID", predicate: "settled" }) +3. Tap an elementRef: tap({ simulatorId: "", elementRef: "e3" }) +4. Take screenshot for verification: screenshot({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt index 05a6c9606..a0e22ad66 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Swipe -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate swipe. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt new file mode 100644 index 000000000..8c257311f --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt @@ -0,0 +1,9 @@ + +👆 Swipe + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'swipeWithin'. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt deleted file mode 100644 index 9b5ca8373..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt +++ /dev/null @@ -1,8 +0,0 @@ - -👆 Swipe - -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Swipe from (200, 400) to (200, 200) simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt index 3aa5515fa..9f2c04d8b 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Tap -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate tap at (100, 100). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt index 6c3da0d59..3fbb4eeaa 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt @@ -1,8 +1,4 @@ 👆 Tap -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Tap at (100, 400) simulated successfully. +✅ Tap on elementRef e3 simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt index ad4778d4a..71f0ecf56 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Touch -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to execute touch event. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt index b9dad4d4d..9f28f64ad 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt @@ -1,8 +1,4 @@ 👆 Touch -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Touch event (touch down+up) at (100, 400) executed successfully. +✅ Touch event (touch down+up) on elementRef e3 executed successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt index 40a192802..99cf12e61 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt @@ -1,8 +1,8 @@ âŒ¨ī¸ Type Text -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate text typing. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt new file mode 100644 index 000000000..e1e5c9bf8 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt @@ -0,0 +1,9 @@ + +âŒ¨ī¸ Type Text + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'typeText'. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--success.txt deleted file mode 100644 index a3abffa98..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--success.txt +++ /dev/null @@ -1,4 +0,0 @@ - -âŒ¨ī¸ Type Text - -✅ Text typing simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt new file mode 100644 index 000000000..86c03d978 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt @@ -0,0 +1,34 @@ + +âš™ī¸ Wait for UI + +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|Âą| + e5|tap|button|%| + e6|tap|button|Ãˇ| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| + +Tips + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. + +✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. + +Next steps: +1. Refresh runtime snapshot: snapshot_ui({ simulatorId: "SIMULATOR_UUID" }) +2. Wait again: wait_for_ui({ simulatorId: "SIMULATOR_UUID", predicate: "settled" }) diff --git a/src/snapshot-tests/__tests__/json-normalize.test.ts b/src/snapshot-tests/__tests__/json-normalize.test.ts index 0e4ecabc4..ce826a230 100644 --- a/src/snapshot-tests/__tests__/json-normalize.test.ts +++ b/src/snapshot-tests/__tests__/json-normalize.test.ts @@ -1,9 +1,9 @@ import { describe, expect, it } from 'vitest'; import type { StructuredOutputEnvelope } from '../../types/structured-output.ts'; -import { formatStructuredEnvelopeFixture, normalizeStructuredEnvelope } from '../json-normalize.ts'; +import { normalizeStructuredEnvelope } from '../json-normalize.ts'; describe('normalizeStructuredEnvelope', () => { - it('keeps only failing test cases for failed result snapshots', () => { + it('keeps suite-less simulator test cases while normalizing volatile durations', () => { const envelope: StructuredOutputEnvelope = { schema: 'xcodebuildmcp.output.test-result', schemaVersion: '1', @@ -26,7 +26,11 @@ describe('normalizeStructuredEnvelope', () => { error: 'Tests failed', data: { summary: { target: 'simulator' }, - testCases: [{ test: 'Swift Testing failure', status: 'failed', durationMs: 0 }], + testCases: [ + { test: 'Swift Testing failure', status: 'failed', durationMs: 0 }, + { test: 'Volatile Swift Testing pass', status: 'passed', durationMs: 0 }, + { suite: 'XCTestSuite', test: 'testStablePass', status: 'passed', durationMs: 0 }, + ], }, }); }); @@ -73,130 +77,64 @@ describe('normalizeStructuredEnvelope', () => { }); }); - it('normalizes and sorts SwiftPM build progress lines in stderr arrays', () => { + it('normalizes volatile runtime snapshot timestamps', () => { const envelope: StructuredOutputEnvelope = { - schema: 'xcodebuildmcp.output.build-run-result', - schemaVersion: '1', + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', didError: false, error: null, data: { - output: { - stderr: [ - 'Building for debugging...', - '[5/8] Emitting module spm', - '[4/8] Compiling spm main.swift', - "Build of product 'spm' complete! (0.42s)", - ], + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 9, + capturedAtMs: 123, + expiresAtMs: 456, + elements: [], + actions: [], }, - }, - }; - - expect(normalizeStructuredEnvelope(envelope)).toEqual({ - schema: 'xcodebuildmcp.output.build-run-result', - schemaVersion: '1', - didError: false, - error: null, - data: { - output: { - stderr: [ - 'Building for debugging...', - '[] Compiling spm main.swift', - '[] Emitting module spm', - "Build of product 'spm' complete! ()", - ], + uiError: { + code: 'TARGET_NOT_ACTIONABLE', + message: 'Target is not actionable.', + recoveryHint: 'Refresh the snapshot and choose another element.', + snapshotAgeMs: 42, }, }, - }); - }); - - it('normalizes volatile build settings entry values without dropping entries', () => { - const envelope: StructuredOutputEnvelope = { - schema: 'xcodebuildmcp.output.build-settings', - schemaVersion: '1', - didError: false, - error: null, - data: { - entries: [ - { key: 'ALTERNATE_OWNER', value: 'cameroncooke' }, - { key: 'ALTERNATE_GROUP', value: 'staff' }, - { key: 'CACHE_ROOT', value: '/var/folders/hash/C/com.apple.DeveloperTools/26.4/Xcode' }, - { key: 'GID', value: '20' }, - { key: 'TARGET_DEVICE_MODEL', value: 'iPhone17,2' }, - { key: 'TARGET_DEVICE_OS_VERSION', value: '26.4.2' }, - { - key: 'SDKROOT', - value: - '/Applications/Xcode-26.4.0.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS26.4.sdk', - }, - { - key: 'SDK_DIR_iphoneos26_4', - value: - '/Applications/Xcode-26.4.0.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS26.4.sdk', - }, - { key: 'SDK_NAME', value: 'iphoneos26.4' }, - { key: 'SDK_VERSION_ACTUAL', value: '260400' }, - { key: 'SDK_PRODUCT_BUILD_VERSION', value: '23E237' }, - { key: 'MAC_OS_X_VERSION_ACTUAL', value: '260301' }, - { key: 'MAC_OS_X_PRODUCT_BUILD_VERSION', value: '25D2128' }, - { - key: 'PLATFORM_DEVELOPER_APPLICATIONS_DIR', - value: '/Applications/Xcode-26.4.0.app/Contents/Developer/Applications', - }, - { - key: 'SDK_STAT_CACHE_PATH', - value: - '/Library/Developer/Xcode/DerivedData/SDKStatCaches.noindex/iphoneos26.4-23E237-c1e9.sdkstatcache', - }, - ], - }, }; expect(normalizeStructuredEnvelope(envelope)).toEqual({ - schema: 'xcodebuildmcp.output.build-settings', - schemaVersion: '1', + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', didError: false, error: null, data: { - entries: [ - { key: 'ALTERNATE_OWNER', value: '' }, - { key: 'ALTERNATE_GROUP', value: '' }, - { key: 'CACHE_ROOT', value: '' }, - { key: 'GID', value: '' }, - { key: 'TARGET_DEVICE_MODEL', value: '' }, - { key: 'TARGET_DEVICE_OS_VERSION', value: '' }, - { key: 'SDKROOT', value: '' }, - { key: 'SDK_DIR_', value: '' }, - { key: 'SDK_NAME', value: '' }, - { key: 'SDK_VERSION_ACTUAL', value: '' }, - { key: 'SDK_PRODUCT_BUILD_VERSION', value: '' }, - { key: 'MAC_OS_X_VERSION_ACTUAL', value: '' }, - { key: 'MAC_OS_X_PRODUCT_BUILD_VERSION', value: '' }, - { - key: 'PLATFORM_DEVELOPER_APPLICATIONS_DIR', - value: '/Applications/Xcode-.app/Contents/Developer/Applications', - }, - { key: 'SDK_STAT_CACHE_PATH', value: '' }, - ], + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: '', + seq: 1, + capturedAtMs: 1_700_000_000_000, + expiresAtMs: 1_700_000_060_000, + elements: [], + actions: [], + }, + uiError: { + code: 'TARGET_NOT_ACTIONABLE', + message: 'Target is not actionable.', + recoveryHint: 'Refresh the snapshot and choose another element.', + snapshotAgeMs: 1234, + }, }, }); }); - it('compacts frame objects emitted with y before x', () => { - const envelope: StructuredOutputEnvelope = { - schema: 'xcodebuildmcp.output.ui-snapshot', - schemaVersion: '1', - didError: false, - error: null, - data: { - frame: { y: 2, x: 1, width: 3, height: 4 }, - }, - }; - - expect(formatStructuredEnvelopeFixture(envelope)).toContain( - '"frame": { "x": 1, "y": 2, "width": 3, "height": 4 }', - ); - }); - it('normalizes volatile build settings PATH entry values without dropping the entry', () => { const envelope: StructuredOutputEnvelope = { schema: 'xcodebuildmcp.output.build-settings', @@ -218,7 +156,7 @@ describe('normalizeStructuredEnvelope', () => { error: null, data: { entries: [ - { key: 'SDKROOT', value: '' }, + { key: 'SDKROOT', value: 'iphoneos' }, { key: 'PATH', value: '' }, ], }, diff --git a/src/snapshot-tests/json-normalize.ts b/src/snapshot-tests/json-normalize.ts index 5699bef61..f208ec805 100644 --- a/src/snapshot-tests/json-normalize.ts +++ b/src/snapshot-tests/json-normalize.ts @@ -22,6 +22,10 @@ function normalizeString(value: string, key?: string, path: string[] = []): stri return ''; } + if (key === 'screenHash') { + return ''; + } + if (key === 'AXFrame') { // Round embedded floats to 1 decimal place for rounding-stable comparison with // the sibling `frame` object. e.g. 82.666664123535156 -> 82.7, 250.5 stays 250.5. @@ -61,6 +65,15 @@ function normalizeNumber(path: string[], key: string | undefined, value: number) return 3600; case 'threadId': return 1; + case 'capturedAtMs': + return 1_700_000_000_000; + case 'expiresAtMs': + return 1_700_000_060_000; + case 'snapshotAgeMs': + return 1234; + case 'seq': + if (path.includes('capture')) return 1; + return value; case 'x': case 'y': case 'width': @@ -236,14 +249,15 @@ function normalizeXcodeBridgeCallEnvelope( return envelope; } - return { + const normalizedEnvelope: StructuredOutputEnvelope = { ...envelope, data: { ...data, content: [], ...(Object.hasOwn(data, 'structuredContent') ? { structuredContent: {} } : {}), }, - } as StructuredOutputEnvelope; + }; + return normalizedEnvelope; } export function normalizeStructuredEnvelope( diff --git a/src/snapshot-tests/suites/ui-automation-suite.ts b/src/snapshot-tests/suites/ui-automation-suite.ts index 536095bf2..ee2ebaa3c 100644 --- a/src/snapshot-tests/suites/ui-automation-suite.ts +++ b/src/snapshot-tests/suites/ui-automation-suite.ts @@ -13,6 +13,25 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe(`${runtime} ui-automation workflow`, () => { let harness: WorkflowSnapshotHarness; let simulatorUdid: string; + let snapshotCaptured = false; + + async function refreshRuntimeSnapshot(): Promise { + if (snapshotCaptured) { + return; + } + + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId: BUNDLE_ID, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { isError } = await harness.invoke('ui-automation', 'snapshot-ui', { + simulatorId: simulatorUdid, + }); + expect(isError).toBe(false); + snapshotCaptured = true; + } beforeAll(async () => { vi.setConfig({ testTimeout: 120_000 }); @@ -22,7 +41,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi await harness.invoke('simulator', 'build-and-run', { workspacePath: WORKSPACE, scheme: 'CalculatorApp', - simulatorName: 'iPhone 17', + simulatorName: 'iPhone 17 Pro', }); await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -34,10 +53,11 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('tap', () => { it('success', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'tap', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', }); expect(isError).toBe(false); expectFixture(text, 'tap--success'); @@ -46,8 +66,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'tap', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 100, + elementRef: 'e3', }); expect(isError).toBe(true); expectFixture(text, 'tap--error-no-simulator'); @@ -56,10 +75,11 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('touch', () => { it('success', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'touch', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', down: true, up: true, }); @@ -70,8 +90,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'touch', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 400, + elementRef: 'e3', down: true, up: true, }); @@ -82,10 +101,11 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('long-press', () => { it('success', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'long-press', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', duration: 500, }); expect(isError).toBe(false); @@ -95,8 +115,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'long-press', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 400, + elementRef: 'e3', duration: 500, }); expect(isError).toBe(true); @@ -105,25 +124,23 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); describe('swipe', () => { - it('success', async () => { + it('error - target not actionable', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'swipe', { simulatorId: simulatorUdid, - x1: 200, - y1: 400, - x2: 200, - y2: 200, + withinElementRef: 'e3', + direction: 'up', }); - expect(isError).toBe(false); - expectFixture(text, 'swipe--success'); + expect(isError).toBe(true); + expectFixture(text, 'swipe--error-not-actionable'); }); it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'swipe', { simulatorId: INVALID_SIMULATOR_ID, - x1: 200, - y1: 400, - x2: 200, - y2: 200, + withinElementRef: 'e3', + direction: 'up', }); expect(isError).toBe(true); expectFixture(text, 'swipe--error-no-simulator'); @@ -211,18 +228,22 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); describe('type-text', () => { - it('success', async () => { + it('error - target not actionable', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'type-text', { simulatorId: simulatorUdid, + elementRef: 'e3', text: 'hello', }); - expect(isError).toBe(false); - expectFixture(text, 'type-text--success'); + expect(isError).toBe(true); + expectFixture(text, 'type-text--error-not-actionable'); }); it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'type-text', { simulatorId: INVALID_SIMULATOR_ID, + elementRef: 'e3', text: 'hello', }); expect(isError).toBe(true); @@ -230,6 +251,28 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); }); + describe('wait-for-ui', () => { + it('success - existing calculator button', async () => { + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId: BUNDLE_ID, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { text, isError } = await harness.invoke('ui-automation', 'wait-for-ui', { + simulatorId: simulatorUdid, + predicate: 'exists', + label: 'C', + role: 'button', + timeoutMs: 1000, + pollIntervalMs: 100, + }); + expect(isError).toBe(false); + expectFixture(text, 'wait-for-ui--success'); + snapshotCaptured = true; + }); + }); + describe('snapshot-ui', () => { it('success - calculator app', async () => { // Re-focus the calculator app before snapshotting: preceding UI tests @@ -247,6 +290,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi expect(isError).toBe(false); expect(text.length).toBeGreaterThan(100); expectFixture(text, 'snapshot-ui--success'); + snapshotCaptured = true; }); it('error - invalid simulator', async () => { diff --git a/src/types/domain-results.ts b/src/types/domain-results.ts index d6886d7d0..1ad1f0a47 100644 --- a/src/types/domain-results.ts +++ b/src/types/domain-results.ts @@ -50,6 +50,12 @@ export type AtLeastOne = { [K in keyof T]-?: Required> & Partial>; }[keyof T]; import type { BuildInvocationRequest } from './domain-fragments.ts'; +import type { + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, + UiAutomationRecoverableError, + UiWaitMatch, +} from './ui-snapshot.ts'; export type ExecutionStatus = 'SUCCEEDED' | 'FAILED'; export type BuildTarget = 'simulator' | 'device' | 'macos' | 'swift-package'; @@ -242,7 +248,9 @@ export interface CaptureVideoRecordingPayload { export type CapturePayload = | CaptureImagePayload | CaptureUiHierarchyPayload - | CaptureVideoRecordingPayload; + | CaptureVideoRecordingPayload + | RuntimeSnapshotV1 + | RuntimeSnapshotUnchangedV1; export interface DebugFileLineBreakpoint { kind: 'file-line'; file: string; @@ -349,27 +357,22 @@ export interface TestSelectionInfo { } export interface UiActionTap { type: 'tap'; - x?: number; - y?: number; - id?: string; - label?: string; + elementRef: string; } export interface UiActionSwipe { type: 'swipe'; - from?: Point; - to?: Point; + withinElementRef: string; + direction: 'up' | 'down' | 'left' | 'right'; durationSeconds?: number; } export interface UiActionTouch { type: 'touch'; + elementRef: string; event?: string; - x?: number; - y?: number; } export interface UiActionLongPress { type: 'long-press'; - x: number; - y: number; + elementRef: string; durationMs: number; } export interface UiActionButton { @@ -382,6 +385,8 @@ export interface UiActionGesture { } export interface UiActionTypeText { type: 'type-text'; + elementRef: string; + textLength?: number; } export interface UiActionKeyPress { type: 'key-press'; @@ -391,6 +396,10 @@ export interface UiActionKeySequence { type: 'key-sequence'; keyCodes: number[]; } +export interface UiActionBatch { + type: 'batch'; + stepCount: number; +} export type UiAction = | UiActionTap | UiActionSwipe @@ -400,7 +409,8 @@ export type UiAction = | UiActionGesture | UiActionTypeText | UiActionKeyPress - | UiActionKeySequence; + | UiActionKeySequence + | UiActionBatch; export interface SimulatorActionBoot { type: 'boot'; } @@ -491,6 +501,8 @@ export type CaptureResultDomainResult = ToolDomainResultBase & { artifacts: { simulatorId: string; screenshotPath?: string }; capture?: CapturePayload; diagnostics?: BasicDiagnostics; + uiError?: UiAutomationRecoverableError; + waitMatch?: UiWaitMatch; }; export type CoverageResultDomainResult = ToolDomainResultBase & { kind: 'coverage-result'; @@ -630,6 +642,7 @@ export type UiActionResultDomainResult = ToolDomainResultBase & { action: UiAction; artifacts: { simulatorId: string }; diagnostics?: BasicDiagnostics; + uiError?: UiAutomationRecoverableError; }; export type XcodeBridgeCallResultDomainResult = ToolDomainResultBase & { kind: 'xcode-bridge-call-result'; diff --git a/src/types/ui-snapshot.ts b/src/types/ui-snapshot.ts new file mode 100644 index 000000000..6f4d09cad --- /dev/null +++ b/src/types/ui-snapshot.ts @@ -0,0 +1,148 @@ +import type { AccessibilityNode, Frame, Point } from './domain-results.ts'; + +export type RuntimeSnapshotProtocol = 'rs/1'; +export type RuntimeSnapshotCaptureType = 'runtime-snapshot'; + +export type RuntimeActionNameV1 = 'tap' | 'typeText' | 'longPress' | 'touch' | 'swipeWithin'; + +export type RuntimeElementRoleV1 = + | 'application' + | 'button' + | 'cell' + | 'image' + | 'keyboard-key' + | 'list' + | 'menu' + | 'other' + | 'scroll-view' + | 'slider' + | 'switch' + | 'tab' + | 'text' + | 'text-field' + | 'window'; + +export interface RuntimeElementStateV1 { + enabled?: boolean; + focused?: boolean; + selected?: boolean; + visible?: boolean; +} + +export interface RuntimeElementV1 { + ref: string; + role?: RuntimeElementRoleV1; + label?: string; + value?: string; + identifier?: string; + frame: Frame; + state?: RuntimeElementStateV1; + actions: RuntimeActionNameV1[]; +} + +export interface RuntimeActionHintV1 { + action: RuntimeActionNameV1; + elementRef: string; + label?: string; +} + +export interface RuntimeSnapshotV1 { + type: RuntimeSnapshotCaptureType; + protocol: RuntimeSnapshotProtocol; + simulatorId: string; + screenHash: string; + seq: number; + capturedAtMs: number; + expiresAtMs: number; + elements: RuntimeElementV1[]; + actions: RuntimeActionHintV1[]; +} + +export interface RuntimeSnapshotUnchangedV1 { + type: 'runtime-snapshot-unchanged'; + protocol: RuntimeSnapshotProtocol; + simulatorId: string; + screenHash: string; + seq: number; +} + +export interface RuntimeSnapshotMetadata { + path: string; + depth: number; + childCount: number; + signature: string; + activationPoint?: Point; + swipeFrame?: Frame; +} + +export interface RuntimeSnapshotElementRecord { + publicElement: RuntimeElementV1; + metadata: RuntimeSnapshotMetadata; + rawNode: AccessibilityNode; +} + +export interface RuntimeSnapshotRecord { + simulatorId: string; + screenHash: string; + seq: number; + capturedAtMs: number; + expiresAtMs: number; + payload: RuntimeSnapshotV1; + elements: RuntimeSnapshotElementRecord[]; + elementsByRef: Map; +} + +export type RuntimeSnapshotLookupStatus = 'available' | 'expired' | 'missing'; + +export interface RuntimeSnapshotLookup { + status: RuntimeSnapshotLookupStatus; + snapshot: RuntimeSnapshotRecord | null; + snapshotAgeMs?: number; +} + +export type UiAutomationRecoverableErrorCode = + | 'SNAPSHOT_MISSING' + | 'SNAPSHOT_EXPIRED' + | 'SNAPSHOT_PARSE_FAILED' + | 'ELEMENT_REF_NOT_FOUND' + | 'TARGET_NOT_FOUND' + | 'TARGET_AMBIGUOUS' + | 'TARGET_NOT_ACTIONABLE' + | 'WAIT_TIMEOUT' + | 'UI_STATE_CHANGED' + | 'ACTION_FAILED'; + +export interface UiAutomationRecoverableError { + code: UiAutomationRecoverableErrorCode; + message: string; + recoveryHint: string; + elementRef?: string; + candidates?: RuntimeElementV1[]; + snapshotAgeMs?: number; + timeoutMs?: number; +} + +export type UiWaitPredicate = + | 'exists' + | 'gone' + | 'enabled' + | 'focused' + | 'textContains' + | 'settled'; + +export interface UiWaitMatch { + predicate: UiWaitPredicate; + matches: RuntimeElementV1[]; +} + +export type RuntimeElementResolution = + | { + ok: true; + snapshot: RuntimeSnapshotRecord; + element: RuntimeSnapshotElementRecord; + snapshotAgeMs: number; + } + | { + ok: false; + error: UiAutomationRecoverableError; + }; diff --git a/src/utils/__tests__/structured-output-envelope.test.ts b/src/utils/__tests__/structured-output-envelope.test.ts index 2dcb4b950..c440a6587 100644 --- a/src/utils/__tests__/structured-output-envelope.test.ts +++ b/src/utils/__tests__/structured-output-envelope.test.ts @@ -1,11 +1,10 @@ import { describe, expect, it } from 'vitest'; import { toStructuredEnvelope } from '../structured-output-envelope.ts'; -import type { NextStep } from '../../types/common.ts'; import type { BuildResultDomainResult, + CaptureResultDomainResult, DeviceListDomainResult, } from '../../types/domain-results.ts'; -import type { StructuredOutputEnvelope } from '../../types/structured-output.ts'; describe('toStructuredEnvelope', () => { it('strips kind, didError, and error from the data payload', () => { @@ -52,345 +51,304 @@ describe('toStructuredEnvelope', () => { }); }); - it('omits nextSteps when no serializable steps are provided', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: true, - error: 'Build failed', - }; - const expectedEnvelope = { - schema: 'xcodebuildmcp.output.build-result', - schemaVersion: '1', - didError: true, - error: 'Build failed', - data: null, - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '1', { nextSteps: [] }), - ).toEqual(expectedEnvelope); - }); - - it('does not serialize next steps on error envelopes because the error schema has no nextSteps field', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: true, - error: 'Build failed', - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.error', '1', { - nextSteps: [ + it('compacts runtime snapshots inside the capture payload by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'exists', + matches: [ { - label: 'Retry build', - cliTool: 'build', - workflow: 'project', - params: { scheme: 'CalculatorApp' }, + ref: 'e2', + role: 'button', + label: 'San Francisco', + identifier: 'weather.locationButton', + frame: { x: 12, y: 81, width: 178, height: 33 }, + actions: ['tap', 'longPress', 'touch'], }, ], - }), - ).toEqual({ - schema: 'xcodebuildmcp.output.error', - schemaVersion: '1', - didError: true, - error: 'Build failed', - data: null, - }); - }); - - it('serializes next steps as rendered CLI command lines by default sorted by priority', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], - }; - const nextSteps: NextStep[] = [ - { - tool: 'launch_app_sim', - cliTool: 'launch-app', - workflow: 'simulator', - label: 'Launch app', - params: { simulatorId: 'SIM-1' }, - priority: 20, - when: 'success', }, - { - tool: 'boot_sim', - cliTool: 'boot', - workflow: 'simulator', - label: 'Boot the simulator', - params: { simulatorId: 'SIM-1', useLatestOS: true }, - priority: 10, - when: 'success', + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-one', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'San Francisco', + identifier: 'weather.locationButton', + frame: { x: 12, y: 81, width: 178, height: 33 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'San Francisco' }, + ], }, - ]; + }; - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { nextSteps }), - ).toEqual({ - schema: 'xcodebuildmcp.output.device-list', + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', schemaVersion: '2', didError: false, error: null, data: { - devices: [], + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + rs: '1', + screenHash: 'screen-one', + seq: 1, + count: 2, + targets: ['e2|tap|button|San Francisco||weather.locationButton'], + scroll: ['e1|swipe|application|Weather||'], + udid: 'SIMULATOR-1', + }, + waitMatch: { + predicate: 'exists', + matches: ['e2|tap|button|San Francisco||weather.locationButton'], + }, }, - nextSteps: [ - 'Boot the simulator: xcodebuildmcp simulator boot --simulator-id SIM-1 --use-latest-os', - 'Launch app: xcodebuildmcp simulator launch-app --simulator-id SIM-1', - ], }); }); - it('shell-escapes only JSON next step arguments that need quoting', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', + it('compacts unchanged runtime snapshot captures by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', didError: false, error: null, - devices: [], + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-one', + seq: 2, + }, }; - const nextSteps: NextStep[] = [ - { - tool: 'launch_sim', - cliTool: 'launch', - workflow: 'simulator', - label: 'Launch app', - params: { - simulatorId: 'SIM-1', - appPath: '/tmp/My App.app', - displayName: "Cam's App", + + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: 'screen-one', + seq: 2, + unchanged: true, + udid: 'SIMULATOR-1', }, }, - ]; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { nextSteps }), - ).toMatchObject({ - nextSteps: [ - "Launch app: xcodebuildmcp simulator launch --simulator-id SIM-1 --app-path '/tmp/My App.app' --display-name 'Cam'\\''s App'", - ], }); }); - it('serializes CLI next steps when only cliTool is present', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', + it('orders compact runtime snapshot targets by usefulness', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', didError: false, error: null, - devices: [], - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-two', + seq: 2, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e2', + role: 'button', + label: 'Sheet Grabber', + value: 'Expanded', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + frame: { x: 320, y: 40, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e8', + role: 'text-field', + value: 'Portland', + frame: { x: 20, y: 100, width: 200, height: 40 }, + actions: ['typeText'], + }, + { + ref: 'e9', + role: 'button', + label: 'Clear search', + frame: { x: 230, y: 100, width: 40, height: 40 }, + actions: ['tap'], + }, { - cliTool: 'list', - workflow: 'simulator', - label: 'List simulators', - params: { platform: 'iOS Simulator' }, + ref: 'e82', + role: 'button', + label: 'PRECIP., 78%, Next 24 hours', + identifier: 'weather.precipitationCard', + frame: { x: 20, y: 300, width: 340, height: 140 }, + actions: ['tap'], }, ], - }), - ).toMatchObject({ - nextSteps: ["List simulators: xcodebuildmcp simulator list --platform 'iOS Simulator'"], - }); - }); - - it('serializes next steps as MCP tool-call lines for MCP structured content', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], + actions: [], + }, }; - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ - { - tool: 'get_mac_app_path', - cliTool: 'get-app-path', - workflow: 'macos', - label: 'Get app path', - params: { scheme: 'MCPTest' }, - }, + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2'); + + expect(envelope.data).toMatchObject({ + capture: { + screenHash: 'screen-two', + seq: 2, + targets: [ + 'e82|tap|button|PRECIP., 78%, Next 24 hours||weather.precipitationCard', + 'e8|typeText|text-field||Portland|', + 'e3|tap|button|Settings||', + 'e9|tap|button|Clear search||', ], - nextStepRuntime: 'mcp', - }), - ).toMatchObject({ - nextSteps: ['Get app path: get_mac_app_path({ scheme: "MCPTest" })'], + }, }); }); - it('escapes MCP structured next-step string params as JSON string literals', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ + it('compacts runtime snapshot candidates inside recoverable UI errors by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ { - tool: 'launch_app_sim', - cliTool: 'launch-app', - workflow: 'simulator', - label: 'Launch app', - params: { - scheme: 'Cam "Debug" App', - bundleId: 'com.example.$APP\\debug', - launchArg: 'line1\nline2', - }, + ref: 'e8', + role: 'text-field', + value: 'Lisbon', + identifier: 'weather.locationsSheet', + frame: { x: 65, y: 482, width: 272, height: 18 }, + actions: ['tap', 'typeText', 'longPress', 'touch'], + }, + { + ref: 'e11', + role: 'button', + label: 'Lisbon, Portugal', + value: 'saved', + frame: { x: 40, y: 552, width: 89, height: 49 }, + actions: ['tap', 'longPress', 'touch'], }, ], - nextStepRuntime: 'mcp', - }), - ).toMatchObject({ - nextSteps: [ - 'Launch app: launch_app_sim({ scheme: "Cam \\"Debug\\" App", bundleId: "com.example.$APP\\\\debug", launchArg: "line1\\nline2" })', - ], - }); - }); - - it('preserves request data for normal structured output', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: false, - error: null, - request: { - scheme: 'CalculatorApp', - workspacePath: 'example_projects/iOS_Calculator/CalculatorApp.xcworkspace', }, - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, }; - expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '2')).toEqual({ - schema: 'xcodebuildmcp.output.build-result', + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', schemaVersion: '2', - didError: false, - error: null, + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', data: { - request: { - scheme: 'CalculatorApp', - workspacePath: 'example_projects/iOS_Calculator/CalculatorApp.xcworkspace', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ + 'e8|typeText|text-field||Lisbon|weather.locationsSheet', + 'e11|tap|button|Lisbon, Portugal|saved|', + ], }, - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, }, }); }); - it('preserves CLI next steps while applying minimal structured-output compactness', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: false, - error: null, - request: { - scheme: 'CalculatorApp', - workspacePath: 'example_projects/iOS_Calculator/CalculatorApp.xcworkspace', + it('can keep full runtime snapshots and candidates for verbose callers', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-three', + seq: 3, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + ], + actions: [{ action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }], }, - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '2', { - nextSteps: [ + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ { - tool: 'get_mac_app_path', - cliTool: 'get-app-path', - workflow: 'macos', - label: 'Get built app path', - params: { scheme: 'CalculatorApp' }, + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], }, ], - outputStyle: 'minimal', - }), - ).toEqual({ - schema: 'xcodebuildmcp.output.build-result', - schemaVersion: '2', - didError: false, - error: null, - data: { - summary: { status: 'SUCCEEDED', durationMs: 1234, target: 'simulator' }, - artifacts: { buildLogPath: '~/Library/Developer/XcodeBuildMCP/logs/build.log' }, - diagnostics: { warnings: [], errors: [] }, }, - nextSteps: ['Get built app path: xcodebuildmcp macos get-app-path --scheme CalculatorApp'], - }); - }); - - it('uses null data when minimal pruning removes the only data field', () => { - const result: BuildResultDomainResult = { - kind: 'build-result', - didError: false, - error: null, - request: { scheme: 'CalculatorApp' }, }; expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.build-result', '2', { - outputStyle: 'minimal', + toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshot: 'full', }), ).toEqual({ - schema: 'xcodebuildmcp.output.build-result', + schema: 'xcodebuildmcp.output.capture-result', schemaVersion: '2', - didError: false, - error: null, - data: null, - }); - }); - - it('leaves minimal structured output without request frontmatter unchanged', () => { - const result: StructuredOutputEnvelope<{ simulators: [] }> = { - schema: 'xcodebuildmcp.output.simulator-list', - schemaVersion: '1', - didError: false, - error: null, - data: { simulators: [] }, - }; - - expect( - toStructuredEnvelope( - { - kind: 'simulator-list', - didError: result.didError, - error: result.error, - simulators: [], - }, - result.schema, - result.schemaVersion, - { outputStyle: 'minimal' }, - ), - ).toEqual(result); - }); - - it('serializes label-only next steps as text lines', () => { - const result: DeviceListDomainResult = { - kind: 'device-list', - didError: false, - error: null, - devices: [], - }; - - expect( - toStructuredEnvelope(result, 'xcodebuildmcp.output.device-list', '2', { - nextSteps: [ - { - label: 'Open Simulator', - params: {}, - }, - ], - }), - ).toMatchObject({ - nextSteps: ['Open Simulator'], + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + data: { + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: result.capture, + uiError: result.uiError, + }, }); }); }); diff --git a/src/utils/renderers/__tests__/cli-text-renderer.test.ts b/src/utils/renderers/__tests__/cli-text-renderer.test.ts index 78da2b98f..b7173dc79 100644 --- a/src/utils/renderers/__tests__/cli-text-renderer.test.ts +++ b/src/utils/renderers/__tests__/cli-text-renderer.test.ts @@ -571,6 +571,498 @@ describe('cli-text-renderer', () => { expect(output).toContain('└ App Path: /tmp/MyApp.app'); }); + it('renders runtime UI snapshots as compact target lists', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + identifier: 'add-button', + value: 'selected', + frame: { x: 10, y: 20, width: 30, height: 40 }, + state: { enabled: true, visible: true }, + actions: ['tap', 'longPress'], + }, + { + ref: 'e2', + role: 'text', + label: 'Total', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: [], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Add' }], + }, + }, + }, + }); + + expect(output).toContain('📷 Snapshot UI'); + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Add|selected|add-button'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 0 scroll areas.', + ); + expect(output).not.toContain('- Use scroll refs with swipe.'); + expect(output).not.toContain('Accessibility Hierarchy'); + expect(output).not.toContain('```json'); + }); + + it('renders unchanged runtime UI snapshots compactly', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 2, + }, + }, + }, + }); + + expect(output).toContain('📷 Snapshot UI'); + expect(output).toContain('Runtime UI snapshot unchanged (screenHash: screen-hash, seq: 2).'); + expect(output).not.toContain('Targets ('); + expect(output).not.toContain('Tips'); + }); + + it('orders useful runtime targets before chrome controls in compact output', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e2', + role: 'button', + label: 'Sheet Grabber', + value: 'Expanded', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + frame: { x: 320, y: 40, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e8', + role: 'text-field', + value: 'Portland', + frame: { x: 20, y: 100, width: 200, height: 40 }, + actions: ['typeText'], + }, + { + ref: 'e9', + role: 'button', + label: 'Clear search', + frame: { x: 230, y: 100, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: 'Remove', + identifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e82', + role: 'button', + label: 'PRECIP., 78%, Next 24 hours', + identifier: 'weather.precipitationCard', + frame: { x: 20, y: 300, width: 340, height: 140 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e2', label: 'Sheet Grabber' }, + { action: 'tap', elementRef: 'e3', label: 'Settings' }, + { action: 'typeText', elementRef: 'e8' }, + { action: 'tap', elementRef: 'e9', label: 'Clear search' }, + { action: 'tap', elementRef: 'e10', label: 'Remove' }, + { action: 'tap', elementRef: 'e82', label: 'PRECIP., 78%, Next 24 hours' }, + ], + }, + }, + }, + }); + + const precipitationIndex = output.indexOf( + 'e82|tap|button|PRECIP., 78%, Next 24 hours||weather.precipitationCard', + ); + const searchIndex = output.indexOf('e8|typeText|text-field||Portland|'); + const settingsIndex = output.indexOf('e3|tap|button|Settings||'); + const clearSearchIndex = output.indexOf('e9|tap|button|Clear search||'); + const removeIndex = output.indexOf('e10|tap|button|Remove||trash'); + + expect(precipitationIndex).toBeGreaterThanOrEqual(0); + expect(searchIndex).toBeGreaterThan(precipitationIndex); + expect(settingsIndex).toBeGreaterThan(searchIndex); + expect(output).not.toContain('e2|tap|button|Sheet Grabber|Expanded|'); + expect(clearSearchIndex).toBeGreaterThan(settingsIndex); + expect(removeIndex).toBeGreaterThan(settingsIndex); + }); + + it('orders unselected segmented controls before already-selected controls in compact output', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e9', + role: 'button', + label: '°F', + value: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: '°C', + value: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e9', label: '°F' }, + { action: 'tap', elementRef: 'e10', label: '°C' }, + ], + }, + }, + }, + }); + + const selectedIndex = output.indexOf('e9|tap|button|°F|selected|'); + const unselectedIndex = output.indexOf('e10|tap|button|°C|not selected|'); + + expect(unselectedIndex).toBeGreaterThanOrEqual(0); + expect(selectedIndex).toBeGreaterThan(unselectedIndex); + }); + + it('does not list static text as a likely runtime target when only low-level actions are present', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Settings', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap', 'longPress', 'touch'], + }, + { + ref: 'e2', + role: 'text', + label: 'Updated just now', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Settings' }, + { action: 'longPress', elementRef: 'e2', label: 'Updated just now' }, + { action: 'touch', elementRef: 'e2', label: 'Updated just now' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Settings||'); + expect(output).not.toContain('e2|'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 0 scroll areas.', + ); + }); + + it('renders runtime UI snapshot scroll areas separately from likely targets', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'Settings', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'Settings' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e2|tap|button|Settings||'); + expect(output).toContain('Scroll (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|swipe|application|Weather||'); + expect(output).toContain('- Use scroll refs with swipe.'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 1 scroll area.', + ); + }); + + it('renders wait_for_ui output with wait-specific text', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { headerTitle: 'Wait for UI' }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'exists', + matches: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Continue' }], + }, + }, + }, + }); + + expect(output).toContain('âš™ī¸ Wait for UI'); + expect(output).toContain('Matched exists (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Continue||'); + expect(output).toContain( + 'Wait completed; runtime UI snapshot refreshed with 1 element, 1 likely target, and 0 scroll areas.', + ); + }); + + it('renders static wait matches with no primary action', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { headerTitle: 'Wait for UI' }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'textContains', + matches: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'longPress', elementRef: 'e11', label: 'No matches' }, + { action: 'touch', elementRef: 'e11', label: 'No matches' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Matched textContains (1) — ref|action|role|label|value|id'); + expect(output).toContain('e11|none|text|No matches||'); + expect(output).not.toContain('e11|longPress|text|No matches||'); + }); + + it('renders typed UI action recovery hints', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + result: { + kind: 'ui-action-result', + didError: true, + error: 'Element reference e9 was not found in the current runtime snapshot.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + action: { type: 'tap', elementRef: 'e9' }, + uiError: { + code: 'ELEMENT_REF_NOT_FOUND', + message: 'Element reference e9 was not found in the current runtime snapshot.', + recoveryHint: 'Run snapshot_ui again and retry with a current element reference.', + elementRef: 'e9', + candidates: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + }, + }, + }, + }); + + expect(output).toContain('Recovery'); + expect(output).toContain('Code: ELEMENT_REF_NOT_FOUND'); + expect(output).toContain('Element: e9'); + expect(output).toContain( + 'Hint: Run snapshot_ui again and retry with a current element reference.', + ); + expect(output).toContain('Candidates (1):'); + expect(output).toContain('e1|tap|button|Add||'); + expect(output).toContain( + '❌ Element reference e9 was not found in the current runtime snapshot.', + ); + }); + it('renders structured output path artifacts as a tree when requested', () => { const output = renderCliTextTranscript({ filePathRenderStyle: 'tree', diff --git a/src/utils/renderers/cli-text-renderer.ts b/src/utils/renderers/cli-text-renderer.ts index 2afa21fb1..5001f0801 100644 --- a/src/utils/renderers/cli-text-renderer.ts +++ b/src/utils/renderers/cli-text-renderer.ts @@ -73,6 +73,7 @@ interface CliTextProcessorOptions { showTestTiming: boolean; filePathRenderStyle: FilePathRenderStyle; includeHeaderDetails: boolean; + includeNextSteps: boolean; } interface CliTextRendererOptions { @@ -81,6 +82,7 @@ interface CliTextRendererOptions { showTestTiming?: boolean; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } export interface CliTextTranscriptInput { @@ -92,6 +94,7 @@ export interface CliTextTranscriptInput { showTestTiming?: boolean; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } interface XcodebuildParserState { @@ -110,6 +113,7 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps, } = options; const groupedCompilerErrors: CompilerErrorRenderItem[] = []; const groupedWarnings: CompilerWarningRenderItem[] = []; @@ -470,7 +474,9 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen groupedCompilerErrors.length = 0; groupedTestFailures.length = 0; groupedWarnings.length = 0; - const nextStepsBlock = createNextStepsBlock(nextSteps, nextStepsRuntime); + const nextStepsBlock = includeNextSteps + ? createNextStepsBlock(nextSteps, nextStepsRuntime) + : null; if (nextStepsBlock && !sawProgressNextSteps) { processItem(nextStepsBlock); } @@ -506,6 +512,7 @@ export function createCliTextRenderer(options: CliTextRendererOptions): Transcri showTestTiming: options.showTestTiming ?? false, filePathRenderStyle: options.filePathRenderStyle ?? 'list', includeHeaderDetails: options.includeHeaderDetails ?? true, + includeNextSteps: options.includeNextSteps ?? true, sink: { clearTransient(): void { reporter.clear(); @@ -531,6 +538,7 @@ export function renderCliTextTranscript(input: CliTextTranscriptInput = {}): str showTestTiming: input.showTestTiming ?? false, filePathRenderStyle: input.filePathRenderStyle ?? 'list', includeHeaderDetails: input.includeHeaderDetails ?? true, + includeNextSteps: input.includeNextSteps ?? true, sink: { clearTransient(): void {}, updateTransient(): void {}, diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index 40a5f6186..9df4a4ddb 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -5,6 +5,13 @@ import type { TestDiagnostics, ToolDomainResult, } from '../../types/domain-results.ts'; +import type { + RuntimeElementV1, + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, + UiAutomationRecoverableError, + UiWaitMatch, +} from '../../types/ui-snapshot.ts'; import type { RenderHints } from '../../rendering/types.ts'; import type { XcodebuildOperation } from '../../types/domain-fragments.ts'; import type { @@ -160,6 +167,8 @@ type CaptureResultWithVideo = Extract= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + element.state?.selected === true || normalizedRuntimeSnapshotText(element.value) === 'selected' + ); +} + +function getRuntimeTargetDisplayPriority(element: RuntimeElementV1): number { + if (isLowPriorityRuntimeTarget(element)) { + return 90; + } + if (isAlreadySelectedRuntimeTarget(element)) { + return 70; + } + if (isContentRichTapTarget(element)) { + return 0; + } + if (element.actions.includes('typeText')) { + return 10; + } + if (element.actions.includes('tap')) { + return 20; + } + return 50; +} + +function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const priorityDelta = + getRuntimeTargetDisplayPriority(left.element) - + getRuntimeTargetDisplayPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); +} + +function formatRuntimeElementLine(element: RuntimeElementV1, action?: string): string { + const primaryAction = + action ?? + (element.actions.includes('typeText') + ? 'typeText' + : element.actions.includes('tap') + ? 'tap' + : element.actions.includes('swipeWithin') + ? 'swipe' + : 'none'); + return [ + element.ref, + primaryAction, + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function isLikelyRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + !isHiddenRuntimeTarget(element) && + element.actions.some((action) => action === 'tap' || action === 'typeText') + ); +} + +function isScrollableRuntimeArea(element: RuntimeElementV1): boolean { + return element.actions.includes('swipeWithin') && !isLikelyRuntimeTarget(element); +} + +function countLikelyRuntimeTargets(snapshot: RuntimeSnapshotV1): number { + return snapshot.elements.filter(isLikelyRuntimeTarget).length; +} + +function countScrollableRuntimeAreas(snapshot: RuntimeSnapshotV1): number { + return snapshot.elements.filter(isScrollableRuntimeArea).length; +} + +function createRuntimeSnapshotTargetsSection(snapshot: RuntimeSnapshotV1): SectionTextBlock { + const likelyTargets = sortRuntimeTargetsForDisplay( + snapshot.elements.filter(isLikelyRuntimeTarget), + ); + const lines = likelyTargets.map((element) => formatRuntimeElementLine(element)); + + return createSection( + `Targets (${likelyTargets.length}) — ref|action|role|label|value|id`, + lines.length > 0 ? lines : ['(no likely interaction targets found)'], + ); +} + +function createRuntimeSnapshotScrollAreasSection( + snapshot: RuntimeSnapshotV1, +): SectionTextBlock | null { + const scrollAreas = snapshot.elements.filter(isScrollableRuntimeArea); + if (scrollAreas.length === 0) { + return null; + } + + return createSection( + `Scroll (${scrollAreas.length}) — ref|action|role|label|value|id`, + scrollAreas.map((element) => formatRuntimeElementLine(element, 'swipe')), + ); +} + +function createWaitMatchSection(waitMatch: UiWaitMatch): SectionTextBlock { + return createSection( + `Matched ${waitMatch.predicate} (${waitMatch.matches.length}) — ref|action|role|label|value|id`, + waitMatch.matches.length > 0 + ? waitMatch.matches.map((element) => formatRuntimeElementLine(element)) + : ['(no matching elements found)'], + ); +} + +function createUiErrorItems(uiError?: UiAutomationRecoverableError): TextRenderableItem[] { + if (!uiError) { + return []; + } + + const lines = [ + `Code: ${uiError.code}`, + ...(uiError.elementRef ? [`Element: ${uiError.elementRef}`] : []), + ...(typeof uiError.timeoutMs === 'number' ? [`Timeout: ${uiError.timeoutMs}ms`] : []), + `Hint: ${uiError.recoveryHint}`, + ]; + + if (uiError.candidates && uiError.candidates.length > 0) { + lines.push( + `Candidates (${uiError.candidates.length}):`, + ...uiError.candidates.map((candidate) => ` ${formatRuntimeElementLine(candidate)}`), + ); + } + + return [createSection('Recovery', lines)]; +} + function createSimulatorActionItems( result: Extract, ): TextRenderableItem[] { @@ -1215,6 +1403,7 @@ function createSimulatorActionItems( function createCaptureResultItems( rawResult: Extract, + hints?: RenderHints, ): TextRenderableItem[] { const result = rawResult as CaptureResultWithVideo; @@ -1254,10 +1443,18 @@ function createCaptureResultItems( return items; } + const capture = result.capture; + const isRuntimeSnapshot = + capture !== undefined && 'type' in capture && capture.type === 'runtime-snapshot'; + const isRuntimeSnapshotUnchanged = + capture !== undefined && 'type' in capture && capture.type === 'runtime-snapshot-unchanged'; const isUiHierarchy = - (result.capture && 'type' in result.capture && result.capture.type === 'ui-hierarchy') || - result.error?.includes('accessibility hierarchy') === true; - const title = isUiHierarchy ? 'Snapshot UI' : 'Screenshot'; + (capture !== undefined && 'type' in capture && capture.type === 'ui-hierarchy') || + isRuntimeSnapshot || + isRuntimeSnapshotUnchanged || + result.error?.includes('accessibility hierarchy') === true || + result.error?.includes('runtime UI snapshot') === true; + const title = hints?.headerTitle ?? (isUiHierarchy ? 'Snapshot UI' : 'Screenshot'); const items: TextRenderableItem[] = [ createHeader(title, [ ...(result.artifacts.simulatorId @@ -1267,10 +1464,61 @@ function createCaptureResultItems( ]; if (result.didError) { + items.push(...createStandardDiagnosticSections(result.diagnostics)); + items.push(...createUiErrorItems(result.uiError)); items.push( - ...createFailureStatusWithDiagnostics( - result, - isUiHierarchy ? 'Failed to get accessibility hierarchy.' : 'Failed to capture screenshot.', + createStatus( + 'error', + result.error ?? + (isUiHierarchy + ? isRuntimeSnapshot + ? 'Failed to get runtime UI snapshot.' + : 'Failed to get accessibility hierarchy.' + : 'Failed to capture screenshot.'), + ), + ); + return items; + } + + if (isRuntimeSnapshotUnchanged) { + const capture = result.capture as RuntimeSnapshotUnchangedV1; + items.push( + ...createStandardDiagnosticSections(result.diagnostics), + createStatus( + 'success', + `Runtime UI snapshot unchanged (screenHash: ${capture.screenHash}, seq: ${capture.seq}).`, + ), + ); + return items; + } + + if (isRuntimeSnapshot) { + const snapshot = result.capture as RuntimeSnapshotV1; + const likelyTargetCount = countLikelyRuntimeTargets(snapshot); + const scrollAreaCount = countScrollableRuntimeAreas(snapshot); + const scrollAreasSection = createRuntimeSnapshotScrollAreasSection(snapshot); + if (title === 'Wait for UI' && result.waitMatch) { + items.push(createWaitMatchSection(result.waitMatch)); + } + items.push(createRuntimeSnapshotTargetsSection(snapshot)); + if (scrollAreasSection) { + items.push(scrollAreasSection); + } + items.push( + createSection('Tips', [ + '- Use target refs with tap, type_text, long_press, and touch.', + ...(scrollAreaCount > 0 ? ['- Use scroll refs with swipe.'] : []), + '- Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output.', + '- Use wait_for_ui for text/assertions or changing UI.', + ]), + ); + items.push( + ...createStandardDiagnosticSections(result.diagnostics), + createStatus( + 'success', + title === 'Wait for UI' + ? `Wait completed; runtime UI snapshot refreshed with ${pluralize(snapshot.elements.length, 'element')}, ${pluralize(likelyTargetCount, 'likely target')}, and ${pluralize(scrollAreaCount, 'scroll area')}.` + : `Runtime UI snapshot captured with ${pluralize(snapshot.elements.length, 'element')}, ${pluralize(likelyTargetCount, 'likely target')}, and ${pluralize(scrollAreaCount, 'scroll area')}.`, ), ); return items; @@ -2057,7 +2305,7 @@ function createSpecialCaseItems( case 'simulator-action-result': return createSimulatorActionItems(result); case 'capture-result': - return createCaptureResultItems(result); + return createCaptureResultItems(result, hints); case 'process-list': return createProcessListItems(result); case 'coverage-result': @@ -2093,6 +2341,7 @@ function createSpecialCaseItems( 'type-text': 'Type Text', 'key-press': 'Key Press', 'key-sequence': 'Key Sequence', + batch: 'Batch UI Actions', }; const items: TextRenderableItem[] = [ createHeader(headerTitleMap[result.action.type], [ @@ -2100,40 +2349,31 @@ function createSpecialCaseItems( ]), ]; if (result.didError) { - items.push(...createFailureStatusWithDiagnostics(result, 'UI action failed.')); + items.push(...createStandardDiagnosticSections(result.diagnostics)); + items.push(...createUiErrorItems(result.uiError)); + items.push(createStatus('error', result.error ?? 'UI action failed.')); return items; } let successMessage = 'UI action completed successfully.'; switch (result.action.type) { case 'tap': - successMessage = - typeof result.action.x === 'number' && typeof result.action.y === 'number' - ? `Tap at (${result.action.x}, ${result.action.y}) simulated successfully.` - : result.action.id - ? `Tap on element id "${result.action.id}" simulated successfully.` - : result.action.label - ? `Tap on element label "${result.action.label}" simulated successfully.` - : successMessage; + successMessage = `Tap on elementRef ${result.action.elementRef} simulated successfully.`; break; case 'swipe': { const durationText = typeof result.action.durationSeconds === 'number' ? ` duration=${result.action.durationSeconds}s` : ''; - if (result.action.from && result.action.to) { - successMessage = - `Swipe from (${result.action.from.x}, ${result.action.from.y}) to (${result.action.to.x}, ${result.action.to.y})` + - `${durationText} simulated successfully.`; - } + successMessage = + `Swipe ${result.action.direction} within elementRef ${result.action.withinElementRef}` + + `${durationText} simulated successfully.`; break; } case 'touch': - if (typeof result.action.x === 'number' && typeof result.action.y === 'number') { - successMessage = `Touch event (${result.action.event ?? 'touch'}) at (${result.action.x}, ${result.action.y}) executed successfully.`; - } + successMessage = `Touch event (${result.action.event}) on elementRef ${result.action.elementRef} executed successfully.`; break; case 'long-press': - successMessage = `Long press at (${result.action.x}, ${result.action.y}) for ${result.action.durationMs}ms simulated successfully.`; + successMessage = `Long press on elementRef ${result.action.elementRef} for ${result.action.durationMs}ms simulated successfully.`; break; case 'button': successMessage = `Hardware button '${result.action.button}' pressed successfully.`; @@ -2141,15 +2381,26 @@ function createSpecialCaseItems( case 'gesture': successMessage = `Gesture '${result.action.gesture}' executed successfully.`; break; - case 'type-text': - successMessage = 'Text typing simulated successfully.'; + case 'type-text': { + const targetText = result.action.elementRef + ? ` into elementRef ${result.action.elementRef}` + : ''; + const lengthText = + typeof result.action.textLength === 'number' + ? ` (${pluralize(result.action.textLength, 'character')})` + : ''; + successMessage = `Text typed${targetText}${lengthText} successfully.`; break; + } case 'key-press': successMessage = `Key press (code: ${result.action.keyCode}) simulated successfully.`; break; case 'key-sequence': successMessage = `Key sequence [${result.action.keyCodes.join(',')}] executed successfully.`; break; + case 'batch': + successMessage = `Batch UI automation completed successfully (${pluralize(result.action.stepCount, 'step')}).`; + break; } items.push( ...createStandardDiagnosticSections(result.diagnostics), diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts index 919832c42..68417ea54 100644 --- a/src/utils/structured-output-envelope.ts +++ b/src/utils/structured-output-envelope.ts @@ -2,6 +2,12 @@ import type { RuntimeKind } from '../runtime/types.ts'; import type { NextStep, OutputStyle } from '../types/common.ts'; import type { ToolDomainResult } from '../types/domain-results.ts'; import type { StructuredOutputEnvelope } from '../types/structured-output.ts'; +import type { + RuntimeActionNameV1, + RuntimeElementV1, + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, +} from '../types/ui-snapshot.ts'; import { serializeNextSteps } from './responses/next-step-formatting.ts'; type DomainResultData = Omit< @@ -9,13 +15,54 @@ type DomainResultData = Omit< 'kind' | 'didError' | 'error' >; +export type RuntimeSnapshotEnvelopeMode = 'compact' | 'full'; + export interface StructuredEnvelopeOptions { nextSteps?: readonly NextStep[]; nextStepRuntime?: RuntimeKind; outputStyle?: OutputStyle; + runtimeSnapshot?: RuntimeSnapshotEnvelopeMode; } +type RuntimeSnapshotCompactCapture = { + type: 'runtime-snapshot'; + rs: '1'; + screenHash: string; + seq: number; + count: number; + targets: string[]; + scroll: string[]; + udid: string; +}; + +type RuntimeSnapshotUnchangedCompactCapture = { + type: 'runtime-snapshot-unchanged'; + rs: '1'; + screenHash: string; + seq: number; + unchanged: true; + udid: string; +}; + const MINIMAL_DATA_PRUNE_KEYS = ['request'] as const; +const HIDDEN_RUNTIME_TARGET_LABELS = new Set(['sheet grabber']); +const LOW_PRIORITY_RUNTIME_TARGET_LABELS = new Set([ + 'sheet grabber', + 'close', + 'clear search', + 'remove', + 'delete', + 'clear', + 'c', + 'ac', + 'Âą', + '%', + 'Ãˇ', + '×', + '-', + '+', + '=', +]); function isRecord(value: unknown): value is Record { return typeof value === 'object' && value !== null && !Array.isArray(value); @@ -49,14 +96,231 @@ function applyStructuredOutputStyle( }; } +function compactRuntimeSnapshotText(value: string | undefined): string { + return (value ?? '').replace(/\s+/g, ' ').replace(/\|/g, '/').trim(); +} + +function normalizedRuntimeSnapshotText(value: string | undefined): string { + return compactRuntimeSnapshotText(value).toLocaleLowerCase(); +} + +function isHiddenRuntimeTarget(element: RuntimeElementV1): boolean { + return HIDDEN_RUNTIME_TARGET_LABELS.has(normalizedRuntimeSnapshotText(element.label)); +} + +function isLowPriorityRuntimeTarget(element: RuntimeElementV1): boolean { + return LOW_PRIORITY_RUNTIME_TARGET_LABELS.has(normalizedRuntimeSnapshotText(element.label)); +} + +function isContentRichTapTarget(element: RuntimeElementV1): boolean { + if (!element.actions.includes('tap')) { + return false; + } + + const label = compactRuntimeSnapshotText(element.label); + const identifier = compactRuntimeSnapshotText(element.identifier); + return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + element.state?.selected === true || normalizedRuntimeSnapshotText(element.value) === 'selected' + ); +} + +function getRuntimeTargetDisplayPriority(element: RuntimeElementV1): number { + if (isLowPriorityRuntimeTarget(element)) { + return 90; + } + if (isAlreadySelectedRuntimeTarget(element)) { + return 70; + } + if (isContentRichTapTarget(element)) { + return 0; + } + if (element.actions.includes('typeText')) { + return 10; + } + if (element.actions.includes('tap')) { + return 20; + } + return 50; +} + +function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const priorityDelta = + getRuntimeTargetDisplayPriority(left.element) - getRuntimeTargetDisplayPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); +} + +function compactRuntimeElementRow(element: RuntimeElementV1, action: string): string { + return [ + element.ref, + action, + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNameV1 | 'none' { + return ( + (element.actions.includes('typeText') && 'typeText') || + (element.actions.includes('tap') && 'tap') || + (element.actions.includes('swipeWithin') && 'swipeWithin') || + 'none' + ); +} + +function toRuntimeSnapshotCompactCapture( + snapshot: RuntimeSnapshotV1, +): RuntimeSnapshotCompactCapture { + const targets = sortRuntimeTargetsForDisplay( + snapshot.elements.filter( + (element) => + !isHiddenRuntimeTarget(element) && + (element.actions.includes('tap') || element.actions.includes('typeText')), + ), + ).map((element) => { + const action = element.actions.includes('typeText') ? 'typeText' : 'tap'; + return compactRuntimeElementRow(element, action); + }); + const scroll = snapshot.elements + .filter( + (element) => + element.actions.includes('swipeWithin') && + !element.actions.includes('tap') && + !element.actions.includes('typeText'), + ) + .map((element) => compactRuntimeElementRow(element, 'swipe')); + + return { + type: 'runtime-snapshot', + rs: '1', + screenHash: snapshot.screenHash, + seq: snapshot.seq, + count: snapshot.elements.length, + targets, + scroll, + udid: snapshot.simulatorId, + }; +} + +function compactRuntimeElementCandidate(element: RuntimeElementV1): string { + return compactRuntimeElementRow(element, primaryRuntimeElementAction(element)); +} + +function isRuntimeElement(candidate: unknown): candidate is RuntimeElementV1 { + return ( + typeof candidate === 'object' && + candidate !== null && + 'ref' in candidate && + typeof candidate.ref === 'string' && + 'actions' in candidate && + Array.isArray(candidate.actions) + ); +} + +function isRuntimeSnapshotCapture(capture: unknown): capture is RuntimeSnapshotV1 { + return ( + typeof capture === 'object' && + capture !== null && + 'type' in capture && + capture.type === 'runtime-snapshot' && + 'elements' in capture && + Array.isArray(capture.elements) + ); +} + +function isRuntimeSnapshotUnchangedCapture( + capture: unknown, +): capture is RuntimeSnapshotUnchangedV1 { + return ( + typeof capture === 'object' && + capture !== null && + 'type' in capture && + capture.type === 'runtime-snapshot-unchanged' + ); +} + +function toRuntimeSnapshotUnchangedCompactCapture( + capture: RuntimeSnapshotUnchangedV1, +): RuntimeSnapshotUnchangedCompactCapture { + return { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: capture.screenHash, + seq: capture.seq, + unchanged: true, + udid: capture.simulatorId, + }; +} + +function projectRuntimeSnapshotData(data: TData, options: StructuredEnvelopeOptions): unknown { + if (options.runtimeSnapshot === 'full' || typeof data !== 'object' || data === null) { + return data; + } + + const dataWithCapture = data as TData & { capture?: unknown }; + const projectedData = isRuntimeSnapshotCapture(dataWithCapture.capture) + ? { + ...dataWithCapture, + capture: toRuntimeSnapshotCompactCapture(dataWithCapture.capture), + } + : isRuntimeSnapshotUnchangedCapture(dataWithCapture.capture) + ? { + ...dataWithCapture, + capture: toRuntimeSnapshotUnchangedCompactCapture(dataWithCapture.capture), + } + : dataWithCapture; + + const dataWithRuntimeRows = projectedData as typeof projectedData & { + uiError?: { candidates?: unknown[] }; + waitMatch?: { matches?: unknown[] }; + }; + const uiError = Array.isArray(dataWithRuntimeRows.uiError?.candidates) + ? { + ...dataWithRuntimeRows.uiError, + candidates: dataWithRuntimeRows.uiError.candidates.map((candidate) => + isRuntimeElement(candidate) ? compactRuntimeElementCandidate(candidate) : candidate, + ), + } + : dataWithRuntimeRows.uiError; + const waitMatch = Array.isArray(dataWithRuntimeRows.waitMatch?.matches) + ? { + ...dataWithRuntimeRows.waitMatch, + matches: dataWithRuntimeRows.waitMatch.matches.map((match) => + isRuntimeElement(match) ? compactRuntimeElementCandidate(match) : match, + ), + } + : dataWithRuntimeRows.waitMatch; + + if (uiError === dataWithRuntimeRows.uiError && waitMatch === dataWithRuntimeRows.waitMatch) { + return projectedData; + } + + return { + ...projectedData, + ...(uiError ? { uiError } : {}), + ...(waitMatch ? { waitMatch } : {}), + }; +} + export function toStructuredEnvelope( result: TResult, schema: string, schemaVersion: string, options: StructuredEnvelopeOptions = {}, -): StructuredOutputEnvelope> { +): StructuredOutputEnvelope { const { nextSteps, nextStepRuntime = 'cli', outputStyle = 'normal' } = options; const { kind: _kind, didError, error, ...data } = result; + const projectedData = projectRuntimeSnapshotData(data as DomainResultData, options); const serializedNextSteps = schema === 'xcodebuildmcp.output.error' ? undefined @@ -64,12 +328,12 @@ export function toStructuredEnvelope( runtime: nextStepRuntime, }); - const envelope: StructuredOutputEnvelope> = { + const envelope: StructuredOutputEnvelope = { schema, schemaVersion, didError, error, - data: Object.keys(data).length === 0 ? null : (data as DomainResultData), + data: isRecord(projectedData) && Object.keys(projectedData).length === 0 ? null : projectedData, ...(serializedNextSteps ? { nextSteps: serializedNextSteps } : {}), }; From c1e7fb3a70e3fb5f1215ae52e9d00fa8cef4aeb4 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Mon, 18 May 2026 23:10:22 +0100 Subject: [PATCH 02/35] feat(ui-automation): Add runtime snapshot guidance Return compact runtime snapshot captures after UI actions and derive next-step guidance from foreground runtime elements. This reduces reliance on full snapshots and screenshot verification loops while preserving structured output contracts. Co-Authored-By: Codex --- manifests/tools/batch.yaml | 4 +- manifests/tools/snapshot_ui.yaml | 4 +- manifests/tools/tap.yaml | 4 +- manifests/tools/wait_for_ui.yaml | 4 +- .../2.schema.json | 17 + .../2.schema.json | 26 ++ src/daemon.ts | 2 +- src/mcp/tools/ui-automation/batch.ts | 152 ++++++- .../ui-automation/shared/domain-result.ts | 29 +- .../shared/post-action-snapshot.ts | 26 ++ .../shared/runtime-next-steps.ts | 419 ++++++++++++++++++ .../ui-automation/shared/runtime-snapshot.ts | 64 ++- .../ui-automation/shared/semantic-tap.ts | 9 + .../ui-automation/shared/wait-predicate.ts | 7 +- src/mcp/tools/ui-automation/snapshot_ui.ts | 144 +----- src/mcp/tools/ui-automation/swipe.ts | 8 +- src/mcp/tools/ui-automation/tap.ts | 9 +- src/mcp/tools/ui-automation/type_text.ts | 8 +- src/mcp/tools/ui-automation/wait_for_ui.ts | 35 +- src/types/common.ts | 12 +- src/types/domain-results.ts | 1 + src/utils/axe-helpers.ts | 54 ++- src/utils/config-store.ts | 11 + src/utils/project-config.ts | 2 +- src/utils/responses/next-step-formatting.ts | 28 +- src/utils/runtime-config-schema.ts | 1 + src/utils/sentry.ts | 2 +- src/utils/structured-output-envelope.ts | 37 +- src/utils/typed-tool-factory.ts | 33 +- 29 files changed, 959 insertions(+), 193 deletions(-) create mode 100644 src/mcp/tools/ui-automation/shared/post-action-snapshot.ts create mode 100644 src/mcp/tools/ui-automation/shared/runtime-next-steps.ts diff --git a/manifests/tools/batch.yaml b/manifests/tools/batch.yaml index b9209d703..9967e4669 100644 --- a/manifests/tools/batch.yaml +++ b/manifests/tools/batch.yaml @@ -3,10 +3,10 @@ module: mcp/tools/ui-automation/batch names: mcp: batch cli: batch -description: Execute multiple AXe UI interaction steps in one simulator session to reduce process launches. +description: Execute structured rs/1 tap steps in one simulator session. Use refs from the latest snapshot_ui or wait_for_ui output, for example {"steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}. Do not pass raw AXe strings such as "tap e7". Omit preDelay/postDelay for switch elementRefs; switches execute as touch down/up steps and reject delays. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "2" + version: '2' routing: stateful: true annotations: diff --git a/manifests/tools/snapshot_ui.yaml b/manifests/tools/snapshot_ui.yaml index 5d7622195..c0746db8c 100644 --- a/manifests/tools/snapshot_ui.yaml +++ b/manifests/tools/snapshot_ui.yaml @@ -3,10 +3,10 @@ module: mcp/tools/ui-automation/snapshot_ui names: mcp: snapshot_ui cli: snapshot-ui -description: Capture a semantic rs/1 runtime UI snapshot with stable elementRef targets for UI automation. +description: Capture a semantic rs/1 runtime UI snapshot with elementRef targets. Observe once, use tap for one target or batch for multiple same-screen targets, and refresh after navigation, scrolling, sheet changes, or obvious layout changes. outputSchema: schema: xcodebuildmcp.output.capture-result - version: "2" + version: '2' routing: stateful: true annotations: diff --git a/manifests/tools/tap.yaml b/manifests/tools/tap.yaml index 6fdff7fc2..33740afd7 100644 --- a/manifests/tools/tap.yaml +++ b/manifests/tools/tap.yaml @@ -3,10 +3,10 @@ module: mcp/tools/ui-automation/tap names: mcp: tap cli: tap -description: Tap a UI element by elementRef from a current rs/1 runtime snapshot. +description: Tap one elementRef from the latest snapshot_ui or wait_for_ui output. Other same-screen refs may remain usable after success; refresh after navigation, scrolling, sheet changes, or obvious layout changes. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "2" + version: '2' routing: stateful: true annotations: diff --git a/manifests/tools/wait_for_ui.yaml b/manifests/tools/wait_for_ui.yaml index 4ae62a0bc..f4d29b19f 100644 --- a/manifests/tools/wait_for_ui.yaml +++ b/manifests/tools/wait_for_ui.yaml @@ -3,10 +3,10 @@ module: mcp/tools/ui-automation/wait_for_ui names: mcp: wait_for_ui cli: wait-for-ui -description: Poll rs/1 runtime UI snapshots until a selector-based UI predicate, selector-free textContains predicate, or selector-free settled predicate is satisfied. Select with elementRef, identifier, label, role, or value when a selector is needed. +description: Poll rs/1 runtime UI snapshots until a selector-based UI predicate, selector-free textContains/gone text predicate, or selector-free settled predicate is satisfied, then record the latest snapshot. Prefer this after navigation or layout changes. Select with elementRef, identifier, label, role, or value when a selector is needed. outputSchema: schema: xcodebuildmcp.output.capture-result - version: "2" + version: '2' routing: stateful: true nextSteps: diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json index 9bf964c63..cbb6ff18f 100644 --- a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -129,6 +129,10 @@ "type": "array", "items": { "type": "string" } }, + "text": { + "type": "array", + "items": { "type": "string" } + }, "udid": { "type": "string" } }, "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] @@ -158,6 +162,18 @@ }, "required": ["type", "rs", "screenHash", "seq", "unchanged", "udid"] }, + "videoRecordingCapture": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "video-recording" }, + "state": { "enum": ["started", "stopped"] }, + "fps": { "type": "number" }, + "outputFile": { "type": "string" }, + "sessionId": { "type": "string" } + }, + "required": ["type", "state"] + }, "waitPredicate": { "enum": ["exists", "gone", "enabled", "focused", "textContains", "settled"] }, @@ -243,6 +259,7 @@ }, { "$ref": "#/$defs/runtimeSnapshot" }, { "$ref": "#/$defs/compactRuntimeSnapshot" }, + { "$ref": "#/$defs/videoRecordingCapture" }, { "$ref": "#/$defs/runtimeSnapshotUnchanged" }, { "$ref": "#/$defs/compactRuntimeSnapshotUnchanged" } ] diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json index e38ead610..4d6b09e5f 100644 --- a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -70,6 +70,31 @@ }, "required": ["ref", "frame", "actions"] }, + "compactRuntimeSnapshot": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "count": { "type": "integer", "minimum": 0 }, + "targets": { + "type": "array", + "items": { "type": "string" } + }, + "scroll": { + "type": "array", + "items": { "type": "string" } + }, + "text": { + "type": "array", + "items": { "type": "string" } + }, + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] + }, "direction": { "enum": ["up", "down", "left", "right"] }, @@ -231,6 +256,7 @@ "diagnostics": { "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/basicDiagnostics" }, + "capture": { "$ref": "#/$defs/compactRuntimeSnapshot" }, "uiError": { "$ref": "#/$defs/recoverableUiError" } }, "required": ["summary", "action", "artifacts"] diff --git a/src/daemon.ts b/src/daemon.ts index e397e991f..efcdfea06 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -223,7 +223,7 @@ async function main(): Promise { const xcodeIdeWorkflowEnabled = daemonWorkflows.includes('xcode-ide'); const axeBinary = resolveAxeBinary(); const axeAvailable = axeBinary !== null; - const axeSource: 'env' | 'bundled' | 'path' | 'unavailable' = + const axeSource: 'env' | 'source' | 'bundled' | 'path' | 'unavailable' = axeBinary?.source ?? 'unavailable'; const xcodemakeAvailable = isXcodemakeBinaryAvailable(); const xcodemakeEnabled = isXcodemakeEnabled(); diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts index 0f5de5ff2..d6c84fea8 100644 --- a/src/mcp/tools/ui-automation/batch.ts +++ b/src/mcp/tools/ui-automation/batch.ts @@ -12,7 +12,9 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; -import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { createSemanticTapBatchSteps, createSemanticTapCommand } from './shared/semantic-tap.ts'; +import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -24,17 +26,30 @@ import { shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; -const batchSchema = z.object({ +const batchStepSchema = z.strictObject({ + action: z.literal('tap'), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), + preDelay: z + .number() + .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) + .optional() + .describe('Seconds before this step. Omit for switch elementRefs.'), + postDelay: z + .number() + .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) + .optional() + .describe('Seconds after this step. Omit for switch elementRefs.'), +}); + +const batchSchema = z.strictObject({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), steps: z - .array(z.string().min(1, { message: 'steps must not contain empty values' })) + .array(batchStepSchema) .min(1, { message: 'At least one batch step is required' }) .max(100, { message: 'At most 100 batch steps are supported' }), axCache: z.enum(['perBatch', 'perStep', 'none']).optional(), - typeSubmission: z.enum(['chunked', 'composite']).optional(), - typeChunkSize: z.number().int().min(1).optional(), - tapStyle: z.enum(['automatic', 'simulator', 'physical']).optional(), - continueOnError: z.boolean().optional(), waitTimeout: z.number().min(0, { message: 'waitTimeout must be non-negative' }).optional(), pollInterval: z.number().positive({ message: 'pollInterval must be greater than 0' }).optional(), }); @@ -44,26 +59,39 @@ type BatchResult = UiActionResultDomainResult; const LOG_PREFIX = '[AXe]'; -function buildBatchCommandArgs(params: BatchParams): string[] { +function compactBatchElementValue(value: string | undefined): string { + return value?.trim().toLowerCase() ?? ''; +} + +function isSafeSameScreenBatchElement(element: { + role?: string; + state?: { selected?: boolean }; + value?: string; +}): boolean { + const value = compactBatchElementValue(element.value); + const isAlreadyActive = + element.state?.selected === true || value === 'selected' || value === '1' || value === 'on'; + if (isAlreadyActive || element.role === 'tab') { + return false; + } + + return ( + element.role === 'switch' || + element.state?.selected === false || + value === 'not selected' || + value === '0' || + value === 'off' + ); +} + +function buildBatchCommandArgs(params: BatchParams, resolvedSteps: readonly string[]): string[] { const commandArgs = ['batch']; - for (const step of params.steps) { + for (const step of resolvedSteps) { commandArgs.push('--step', step); } if (params.axCache !== undefined) { commandArgs.push('--ax-cache', params.axCache); } - if (params.typeSubmission !== undefined) { - commandArgs.push('--type-submission', params.typeSubmission); - } - if (params.typeChunkSize !== undefined) { - commandArgs.push('--type-chunk-size', String(params.typeChunkSize)); - } - if (params.tapStyle !== undefined) { - commandArgs.push('--tap-style', params.tapStyle); - } - if (params.continueOnError === true) { - commandArgs.push('--continue-on-error'); - } if (params.waitTimeout !== undefined) { commandArgs.push('--wait-timeout', String(params.waitTimeout)); } @@ -73,6 +101,70 @@ function buildBatchCommandArgs(params: BatchParams): string[] { return commandArgs; } +function resolveBatchSteps( + params: BatchParams, +): { ok: true; steps: string[]; preserveSnapshot: boolean } | { ok: false; result: BatchResult } { + const resolvedSteps: string[] = []; + let preserveSnapshot = true; + + for (const step of params.steps) { + const resolution = resolveElementRef(params.simulatorId, step.elementRef, 'tap'); + if (!resolution.ok) { + return { + ok: false, + result: createUiActionFailureResult( + { type: 'batch' as const, stepCount: params.steps.length }, + params.simulatorId, + resolution.error.message, + { uiError: resolution.error }, + ), + }; + } + + const usesTouchActivation = resolution.element.publicElement.role === 'switch'; + preserveSnapshot &&= isSafeSameScreenBatchElement(resolution.element.publicElement); + if (usesTouchActivation && (step.preDelay !== undefined || step.postDelay !== undefined)) { + const message = + 'preDelay and postDelay are not supported for switch elementRefs because switches execute as touch down/up batch steps.'; + return { + ok: false, + result: createUiActionFailureResult( + { type: 'batch' as const, stepCount: params.steps.length }, + params.simulatorId, + message, + { + uiError: { + code: 'ACTION_FAILED', + message, + recoveryHint: + 'Remove preDelay/postDelay from switch steps, or wait between separate batch calls.', + elementRef: step.elementRef, + }, + }, + ), + }; + } + + const extraArgs: string[] = []; + if (step.preDelay !== undefined) { + extraArgs.push('--pre-delay', String(step.preDelay)); + } + if (step.postDelay !== undefined) { + extraArgs.push('--post-delay', String(step.postDelay)); + } + + const tapCommand = createSemanticTapCommand( + resolution.element, + step.elementRef, + extraArgs, + resolution.snapshot.elements, + ); + resolvedSteps.push(...createSemanticTapBatchSteps(tapCommand)); + } + + return { ok: true, steps: resolvedSteps, preserveSnapshot }; +} + export function createBatchExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, @@ -92,14 +184,26 @@ export function createBatchExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const commandArgs = buildBatchCommandArgs(params); + const resolvedSteps = resolveBatchSteps(params); + if (!resolvedSteps.ok) { + return resolvedSteps.result; + } + + const commandArgs = buildBatchCommandArgs(params, resolvedSteps.steps); log('info', `${LOG_PREFIX}/${toolName}: Starting ${steps.length} step batch on ${simulatorId}`); try { await executeAxeCommand(commandArgs, simulatorId, 'batch', executor, axeHelpers); - clearRuntimeSnapshot(simulatorId); + if (!resolvedSteps.preserveSnapshot) { + clearRuntimeSnapshot(simulatorId); + } + const capture = await captureRuntimeSnapshotAfterAction({ + simulatorId, + executor, + axeHelpers, + }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); diff --git a/src/mcp/tools/ui-automation/shared/domain-result.ts b/src/mcp/tools/ui-automation/shared/domain-result.ts index b2168dfbc..560709d2c 100644 --- a/src/mcp/tools/ui-automation/shared/domain-result.ts +++ b/src/mcp/tools/ui-automation/shared/domain-result.ts @@ -1,4 +1,5 @@ import type { RenderHints, ToolHandlerContext } from '../../../../rendering/types.ts'; +import type { NextStep } from '../../../../types/common.ts'; import type { BasicDiagnostics, CapturePayload, @@ -14,6 +15,7 @@ import type { import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { createBasicDiagnostics } from '../../../../utils/diagnostics.ts'; import { AxeError, DependencyError, SystemError } from '../../../../utils/errors.ts'; +import { createRuntimeSnapshotNextSteps } from './runtime-next-steps.ts'; const UI_ACTION_SCHEMA = 'xcodebuildmcp.output.ui-action-result'; const CAPTURE_SCHEMA = 'xcodebuildmcp.output.capture-result'; @@ -35,6 +37,20 @@ function compact(values: Array): string[] { return values.filter((value): value is string => typeof value === 'string' && value.length > 0); } +function createUiActionSuccessNextSteps(result: UiActionResultDomainResult): NextStep[] { + if (result.didError) { + return []; + } + + return [ + { + label: 'Refresh after UI action', + tool: 'snapshot_ui', + params: { simulatorId: result.artifacts.simulatorId }, + }, + ]; +} + export function createUiAutomationRecoverableError(params: { code: UiAutomationRecoverableErrorCode; message: string; @@ -53,7 +69,7 @@ export function createUiActionSuccessResult( action: UiAction, simulatorId: string, warnings: Array = [], - options: { uiError?: UiAutomationRecoverableError } = {}, + options: { capture?: CapturePayload; uiError?: UiAutomationRecoverableError } = {}, ): UiActionResultDomainResult { return { kind: 'ui-action-result', @@ -62,6 +78,7 @@ export function createUiActionSuccessResult( summary: { status: 'SUCCEEDED' }, action, artifacts: { simulatorId }, + ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(warnings), []), ...(options.uiError ? { uiError: options.uiError } : {}), }; @@ -193,6 +210,16 @@ export function setUiActionStructuredOutput( schema: UI_ACTION_SCHEMA, schemaVersion: '2', }; + if (result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot') { + ctx.nextSteps = createRuntimeSnapshotNextSteps({ + simulatorId: result.artifacts.simulatorId, + runtimeSnapshot: result.capture, + includeRefreshAndWait: false, + }); + return; + } + + ctx.nextSteps = createUiActionSuccessNextSteps(result); } export function setCaptureStructuredOutput( diff --git a/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts new file mode 100644 index 000000000..7abd643cb --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts @@ -0,0 +1,26 @@ +import type { CapturePayload } from '../../../../types/domain-results.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { executeAxeCommand } from './axe-command.ts'; +import type { AxeHelpers } from './axe-command.ts'; +import { parseRuntimeSnapshotResponse } from './runtime-snapshot.ts'; +import { recordRuntimeSnapshot } from './snapshot-ui-state.ts'; + +export async function captureRuntimeSnapshotAfterAction(params: { + simulatorId: string; + executor: CommandExecutor; + axeHelpers: AxeHelpers; +}): Promise { + const responseText = await executeAxeCommand( + ['describe-ui'], + params.simulatorId, + 'describe-ui', + params.executor, + params.axeHelpers, + ); + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId: params.simulatorId, + responseText, + }); + recordRuntimeSnapshot(snapshot); + return snapshot.payload; +} diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts new file mode 100644 index 000000000..2c4632747 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -0,0 +1,419 @@ +import type { NextStep } from '../../../../types/common.ts'; +import type { + RuntimeElementV1, + RuntimeSnapshotElementRecord, + RuntimeSnapshotV1, +} from '../../../../types/ui-snapshot.ts'; +import { getRuntimeSnapshot } from './snapshot-ui-state.ts'; + +const HIDDEN_TAP_NEXT_STEP_LABELS = new Set(['sheet grabber']); + +const LOW_PRIORITY_TAP_NEXT_STEP_LABELS = new Set([ + 'close', + 'clear search', + 'remove', + 'delete', + 'clear', + 'c', + 'ac', + 'Âą', + '%', + 'Ãˇ', + '×', + '-', + '+', + '=', +]); + +const SCREEN_CHANGING_TAP_NEXT_STEP_LABELS = new Set([ + 'back', + 'cancel', + 'done', + 'settings', + 'menu', + 'home', + 'next', + 'previous', +]); + +const FOREGROUND_DISMISS_TAP_NEXT_STEP_LABELS = new Set(['back', 'cancel', 'close', 'done']); + +function compactTapNextStepText(value: string | undefined): string { + return (value ?? '').replace(/\s+/g, ' ').trim(); +} + +function isHiddenTapNextStepElement(label: string | undefined): boolean { + return HIDDEN_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isLowPriorityTapNextStepElement(label: string | undefined): boolean { + return LOW_PRIORITY_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isContentRichTapNextStepElement(element: { + label?: string; + identifier?: string; +}): boolean { + const label = compactTapNextStepText(element.label); + const identifier = compactTapNextStepText(element.identifier); + return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); +} + +function isScreenChangingTapNextStepElement(element: { + label?: string; + identifier?: string; + role?: string; +}): boolean { + const label = compactTapNextStepText(element.label).toLowerCase(); + const identifier = compactTapNextStepText(element.identifier).toLowerCase(); + return ( + element.role === 'tab' || + SCREEN_CHANGING_TAP_NEXT_STEP_LABELS.has(label) || + /(?:^|[._-])(back|navigation|tab|detail|details)(?:$|[._-])/i.test(identifier) + ); +} + +function isGenericRowTapNextStepElement(element: { identifier?: string; role?: string }): boolean { + const identifier = compactTapNextStepText(element.identifier).toLowerCase(); + return element.role === 'cell' || /(?:^|[._-])(row|cell|item)(?:$|[._-])/i.test(identifier); +} + +function isStateChangingTapNextStepElement(element: { + role?: string; + state?: { selected?: boolean }; + value?: string; +}): boolean { + const value = compactTapNextStepText(element.value).toLowerCase(); + return ( + element.role === 'switch' || + element.state?.selected === true || + element.state?.selected === false || + value === 'selected' || + value === 'not selected' || + value === '0' || + value === '1' || + value === 'off' || + value === 'on' + ); +} + +/** + * Ranks generic tap next-step candidates. + * + * Business rules: + * - Prefer content-rich controls because they usually represent cards, rows, or details worth opening. + * - Prefer generic rows/cells/items over chrome when content-rich signals are absent. + * - Deprioritize navigation/screen-changing controls so agents do not immediately leave useful content. + * - Deprioritize utility/destructive controls such as close, clear, remove, and calculator operators. + * - State-changing controls are filtered out before ranking; they remain valid targets, but are not + * promoted as generic "try this next" suggestions because toggling state can be destructive. + */ +function getTapNextStepElementPriority(element: { + label?: string; + identifier?: string; + role?: string; + state?: { selected?: boolean }; + value?: string; +}): number { + if (isLowPriorityTapNextStepElement(element.label)) { + return 90; + } + if (isContentRichTapNextStepElement(element)) { + return 10; + } + if (isScreenChangingTapNextStepElement(element)) { + return 60; + } + if (isGenericRowTapNextStepElement(element)) { + return 30; + } + return 20; +} + +function isScrollableNextStepElement(element: { + actions: readonly string[]; + role?: string; +}): boolean { + return element.actions.includes('swipeWithin') && element.role === 'scroll-view'; +} + +/** + * Checks AX hierarchy ancestry using the snapshot metadata path. + * + * This is the strongest foreground/background signal because it comes from the raw accessibility + * tree. If a candidate path starts with the root path, it is structurally inside that root. + */ +function isSameOrDescendantPath(parentPath: string, candidatePath: string): boolean { + return candidatePath === parentPath || candidatePath.startsWith(`${parentPath}.`); +} + +/** + * Checks whether a candidate visually fits inside a potential foreground container. + * + * Business rules: + * - Use geometry as a fallback for AX layouts that flatten sheet/dialog children as siblings. + * - The candidate center must be inside the parent frame. + * - The candidate must not be larger than the parent; this prevents full-screen/background scroll + * views from being pulled into a smaller foreground panel just because their center overlaps it. + */ +function isFrameInside(parent: RuntimeElementV1, candidate: RuntimeElementV1): boolean { + const candidateCenterX = candidate.frame.x + candidate.frame.width / 2; + const candidateCenterY = candidate.frame.y + candidate.frame.height / 2; + return ( + candidate.frame.width <= parent.frame.width && + candidate.frame.height <= parent.frame.height && + candidateCenterX >= parent.frame.x && + candidateCenterX <= parent.frame.x + parent.frame.width && + candidateCenterY >= parent.frame.y && + candidateCenterY <= parent.frame.y + parent.frame.height + ); +} + +/** + * Decides whether a candidate belongs to a foreground root. + * + * Business rules: + * - Prefer AX hierarchy membership when available. + * - Fall back to frame containment for flattened AX trees. + * - This is intentionally app-agnostic: it does not rely on app-specific identifiers or labels. + */ +function isForegroundCandidateForRoot( + root: RuntimeSnapshotElementRecord, + candidate: RuntimeSnapshotElementRecord, +): boolean { + return ( + isSameOrDescendantPath(root.metadata.path, candidate.metadata.path) || + isFrameInside(root.publicElement, candidate.publicElement) + ); +} + +/** + * Looks up the stored per-ref metadata for the exact runtime snapshot being rendered. + * + * Next-step generation receives the compact public snapshot, but foreground filtering needs private + * metadata such as hierarchy path and depth. We only use stored metadata when both screen hash and + * sequence match, so stale records from an older UI state cannot influence current next steps. + */ +function findStoredSnapshotRecords(params: { + simulatorId: string; + runtimeSnapshot: RuntimeSnapshotV1; +}): Map { + const storedSnapshot = getRuntimeSnapshot(params.simulatorId); + if ( + storedSnapshot?.payload.screenHash !== params.runtimeSnapshot.screenHash || + storedSnapshot.payload.seq !== params.runtimeSnapshot.seq + ) { + return new Map(); + } + + return storedSnapshot.elementsByRef; +} + +/** + * Finds the most likely active foreground scroll container. + * + * Business rules: + * - Only scrollable elements can become foreground roots because next-step filtering is currently + * used to choose better tap/scroll guidance around scrollable panels, sheets, and detail views. + * - A foreground root must contain at least one generic foreground cue: + * - dismiss/navigation-out control: back, cancel, close, done + * - text-entry control + * - state-changing control such as a switch/selected segment + * - Dismiss controls score highest because they are strong sheet/dialog/detail indicators. + * - Text fields score next because search panels and forms often appear as foreground overlays. + * - State controls score lower because settings panels are foreground, but controls themselves + * should not become generic tap suggestions. + * - Depth and later snapshot order are tie-breakers for nested/later-presented UI. + * + * Limitations: + * - This does not yet rank competing foreground scroll views by identifier specificity or visible + * area. After filtering, scroll selection still chooses the first remaining scrollable element. + */ +function findActiveForegroundRoot( + recordsByRef: Map, +): RuntimeSnapshotElementRecord | null { + const records = [...recordsByRef.values()]; + const indexByRef = new Map(records.map((record, index) => [record.publicElement.ref, index])); + const descendantsByRoot = new Map( + records.map((root) => [ + root, + records.filter((candidate) => isForegroundCandidateForRoot(root, candidate)), + ]), + ); + + function foregroundScore(record: RuntimeSnapshotElementRecord): number { + if (!isScrollableNextStepElement(record.publicElement)) { + return 0; + } + + const descendants = descendantsByRoot.get(record) ?? []; + const hasDismissControl = descendants.some((candidate) => + FOREGROUND_DISMISS_TAP_NEXT_STEP_LABELS.has( + compactTapNextStepText(candidate.publicElement.label).toLowerCase(), + ), + ); + const hasTextEntry = descendants.some((candidate) => + candidate.publicElement.actions.includes('typeText'), + ); + const hasStateControls = descendants.some((candidate) => + isStateChangingTapNextStepElement(candidate.publicElement), + ); + + if (!hasDismissControl && !hasTextEntry && !hasStateControls) { + return 0; + } + + return ( + (hasDismissControl ? 100 : 0) + + (hasTextEntry ? 60 : 0) + + (hasStateControls ? 30 : 0) + + record.metadata.depth + + (indexByRef.get(record.publicElement.ref) ?? 0) / 1000 + ); + } + + return records.reduce((best, candidate) => { + const candidateScore = foregroundScore(candidate); + if (candidateScore <= 0) { + return best; + } + if (!best || candidateScore > foregroundScore(best)) { + return candidate; + } + return best; + }, null); +} + +/** + * Filters public snapshot elements to the active foreground region when one can be detected. + * + * Business rules: + * - If foreground detection is confident, next-step examples should prefer controls in the active + * panel/sheet/detail instead of background controls that remain visible in the raw AX snapshot. + * - If no foreground root is detected, keep all elements rather than guessing; conservative output + * is better than hiding valid controls. + */ +function filterToForegroundElements( + elements: RuntimeElementV1[], + recordsByRef: Map, +): RuntimeElementV1[] { + const foregroundRoot = findActiveForegroundRoot(recordsByRef); + if (!foregroundRoot) { + return elements; + } + + return elements.filter((element) => { + const record = recordsByRef.get(element.ref); + return record && isForegroundCandidateForRoot(foregroundRoot, record); + }); +} + +/** + * Creates human/model-facing next-step examples from a runtime snapshot. + * + * Business rules: + * - Refs in next steps must come from the current runtime snapshot only. + * - Prefer runtime tap/scroll guidance over screenshots; screenshots are only suggested when there + * is no useful tap, batch, or scroll action to try. + * - Tap examples skip text fields, hidden controls, and state-changing controls to avoid destructive + * generic suggestions. + * - Scroll examples currently use the first scrollable element left after foreground filtering. + * - Refresh/wait examples are included for fresh snapshot captures, but not after every action. + */ +export function createRuntimeSnapshotNextSteps(params: { + simulatorId: string; + runtimeSnapshot: RuntimeSnapshotV1; + includeRefreshAndWait: boolean; +}): NextStep[] { + const recordsByRef = findStoredSnapshotRecords(params); + const nextStepElements = filterToForegroundElements( + params.runtimeSnapshot.elements, + recordsByRef, + ); + const tapElements = nextStepElements + .map((element, index) => ({ element, index })) + .filter( + ({ element }) => + element.actions.includes('tap') && + !element.actions.includes('typeText') && + !isHiddenTapNextStepElement(element.label) && + !isStateChangingTapNextStepElement(element), + ) + .sort((left, right) => { + const priorityDelta = + getTapNextStepElementPriority(left.element) - getTapNextStepElementPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); + const tapElement = tapElements[0] ?? null; + const batchElements: typeof tapElements = []; + const scrollElement = nextStepElements.find(isScrollableNextStepElement) ?? null; + const scrollNextStep: NextStep | null = scrollElement + ? { + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId: params.simulatorId, + withinElementRef: scrollElement.ref, + direction: 'up', + distance: 0.5, + }, + } + : null; + const shouldPrioritizeScroll = + scrollNextStep !== null && + tapElement !== null && + !batchElements.length && + isScreenChangingTapNextStepElement(tapElement); + const hasUsefulRuntimeGuidance = + batchElements.length >= 2 || scrollNextStep !== null || tapElement !== null; + const screenshotNextStep: NextStep = { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: params.simulatorId }, + }; + + return [ + ...(params.includeRefreshAndWait + ? [ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: params.simulatorId }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { simulatorId: params.simulatorId, predicate: 'settled' }, + }, + ] + : []), + ...(batchElements.length >= 2 + ? [ + { + label: 'Batch same-screen taps', + tool: 'batch', + params: { + simulatorId: params.simulatorId, + steps: batchElements.slice(0, 2).map((element) => ({ + action: 'tap', + elementRef: element.ref, + })), + }, + }, + ] + : []), + ...(scrollNextStep && shouldPrioritizeScroll ? [scrollNextStep] : []), + ...(tapElement + ? [ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId: params.simulatorId, elementRef: tapElement.ref }, + }, + ] + : []), + ...(scrollNextStep && !shouldPrioritizeScroll ? [scrollNextStep] : []), + ...(!hasUsefulRuntimeGuidance ? [screenshotNextStep] : []), + ]; +} diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 4adba639e..f7df3cba5 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -256,7 +256,7 @@ function normalizeNode(input: NormalizedNodeInput, index: number): RuntimeSnapsh const role = deriveRole(node); const label = readText(node, ['AXLabel', 'title', 'help', 'label']); const value = readText(node, ['AXValue', 'value']); - const identifier = readText(node, ['AXUniqueId', 'identifier', 'id']); + const identifier = readText(node, ['AXUniqueId', 'AXIdentifier', 'identifier', 'id']); const enabled = node.enabled !== false; const customActions = normalizeCustomActions(node.custom_actions); const actions = deriveActions({ @@ -448,6 +448,42 @@ function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): vo publicElement.actions.push('swipeWithin'); } } + + pruneGenericFallbackSwipeTargets(elements); +} + +function isUnidentifiedOtherSwipeTarget(element: RuntimeSnapshotElementRecord): boolean { + const publicElement = element.publicElement; + return ( + publicElement.role === 'other' && + publicElement.actions.includes('swipeWithin') && + !publicElement.label && + !publicElement.value && + !publicElement.identifier + ); +} + +function isPreferredSwipeTarget(element: RuntimeSnapshotElementRecord): boolean { + const publicElement = element.publicElement; + if (!publicElement.actions.includes('swipeWithin')) { + return false; + } + return !isUnidentifiedOtherSwipeTarget(element); +} + +function pruneGenericFallbackSwipeTargets(elements: RuntimeSnapshotElementRecord[]): void { + if (!elements.some(isPreferredSwipeTarget)) { + return; + } + + for (const element of elements) { + if (!isUnidentifiedOtherSwipeTarget(element)) { + continue; + } + element.publicElement.actions = element.publicElement.actions.filter( + (action) => action !== 'swipeWithin', + ); + } } function flattenHierarchy(roots: AccessibilityNode[]): NormalizedNodeInput[] { @@ -494,17 +530,27 @@ export function extractAccessibilityHierarchy(responseText: string): Accessibili throw new RuntimeSnapshotParseError(`AXe describe-ui returned invalid JSON: ${message}`); } - if (Array.isArray(parsed)) { - return parsed as AccessibilityNode[]; - } + const hierarchy = (() => { + if (Array.isArray(parsed)) { + return parsed as AccessibilityNode[]; + } + + if (isRecord(parsed) && Array.isArray(parsed.elements)) { + return parsed.elements as AccessibilityNode[]; + } - if (isRecord(parsed) && Array.isArray(parsed.elements)) { - return parsed.elements as AccessibilityNode[]; + throw new RuntimeSnapshotParseError( + 'AXe describe-ui did not return an accessibility element array.', + ); + })(); + + if (hierarchy.length === 0) { + throw new RuntimeSnapshotParseError( + 'AXe describe-ui returned an empty accessibility element array.', + ); } - throw new RuntimeSnapshotParseError( - 'AXe describe-ui did not return an accessibility element array.', - ); + return hierarchy; } export function createRuntimeSnapshotRecord(params: { diff --git a/src/mcp/tools/ui-automation/shared/semantic-tap.ts b/src/mcp/tools/ui-automation/shared/semantic-tap.ts index 0ea5c2b6e..1c7990bfd 100644 --- a/src/mcp/tools/ui-automation/shared/semantic-tap.ts +++ b/src/mcp/tools/ui-automation/shared/semantic-tap.ts @@ -118,6 +118,15 @@ export function createSemanticTapCommand( }; } +export function createSemanticTapBatchSteps(command: SemanticTapCommand): string[] { + if (command.coordinateArgs[0] !== 'touch') { + return [command.coordinateArgs.join(' ')]; + } + + const baseArgs = command.coordinateArgs.filter((arg) => arg !== '--down' && arg !== '--up'); + return [`${baseArgs.join(' ')} --down`, `${baseArgs.join(' ')} --up`]; +} + export async function executeSemanticTapWithAmbiguityFallback(params: { command: SemanticTapCommand; simulatorId: string; diff --git a/src/mcp/tools/ui-automation/shared/wait-predicate.ts b/src/mcp/tools/ui-automation/shared/wait-predicate.ts index ffa838975..234573a26 100644 --- a/src/mcp/tools/ui-automation/shared/wait-predicate.ts +++ b/src/mcp/tools/ui-automation/shared/wait-predicate.ts @@ -222,7 +222,7 @@ function ambiguousSelectorError( code: 'TARGET_AMBIGUOUS', message: 'The wait selector matched multiple runtime UI elements.', recoveryHint: - 'Provide a more specific selector, or refresh with snapshot_ui and choose a stable elementRef.', + 'Retry with the intended candidate elementRef from this result, or narrow the selector with role, label, value, or identifier. Refresh with snapshot_ui only if the refs are stale.', ...(selector.sourceElementRef ? { elementRef: selector.sourceElementRef } : {}), candidates, }; @@ -278,7 +278,10 @@ export function evaluateElementPredicate(params: { } if (predicate === 'gone') { - return { matched: candidates.length === 0, candidates }; + const goneCandidates = text + ? candidates.filter((candidate) => elementTextContains(candidate, text)) + : candidates; + return { matched: goneCandidates.length === 0, candidates: goneCandidates }; } if (predicate === 'textContains') { diff --git a/src/mcp/tools/ui-automation/snapshot_ui.ts b/src/mcp/tools/ui-automation/snapshot_ui.ts index 9a197a4ee..e0eca4c80 100644 --- a/src/mcp/tools/ui-automation/snapshot_ui.ts +++ b/src/mcp/tools/ui-automation/snapshot_ui.ts @@ -11,12 +11,12 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import { recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; -import type { NextStep } from '../../../types/common.ts'; import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import { createRuntimeSnapshotNextSteps } from './shared/runtime-next-steps.ts'; import { createCaptureFailureResult, createCaptureSuccessResult, @@ -42,74 +42,6 @@ type SnapshotUiResult = CaptureResultDomainResult; const LOG_PREFIX = '[AXe]'; -const HIDDEN_TAP_NEXT_STEP_LABELS = new Set(['sheet grabber']); - -const LOW_PRIORITY_TAP_NEXT_STEP_LABELS = new Set([ - 'close', - 'clear search', - 'remove', - 'delete', - 'clear', - 'c', - 'ac', - 'Âą', - '%', - 'Ãˇ', - '×', - '-', - '+', - '=', -]); - -function compactTapNextStepText(value: string | undefined): string { - return (value ?? '').replace(/\s+/g, ' ').trim(); -} - -function isHiddenTapNextStepElement(label: string | undefined): boolean { - return HIDDEN_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); -} - -function isLowPriorityTapNextStepElement(label: string | undefined): boolean { - return LOW_PRIORITY_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); -} - -function isContentRichTapNextStepElement(element: { - label?: string; - identifier?: string; -}): boolean { - const label = compactTapNextStepText(element.label); - const identifier = compactTapNextStepText(element.identifier); - return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); -} - -function isAlreadySelectedTapNextStepElement(element: { - state?: { selected?: boolean }; - value?: string; -}): boolean { - return ( - element.state?.selected === true || - compactTapNextStepText(element.value).toLowerCase() === 'selected' - ); -} - -function getTapNextStepElementPriority(element: { - label?: string; - identifier?: string; - state?: { selected?: boolean }; - value?: string; -}): number { - if (isLowPriorityTapNextStepElement(element.label)) { - return 90; - } - if (isAlreadySelectedTapNextStepElement(element)) { - return 70; - } - if (isContentRichTapNextStepElement(element)) { - return 0; - } - return 20; -} - export function createSnapshotUiExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, @@ -126,8 +58,14 @@ export function createSnapshotUiExecutor( toolName, }); if (guard.blockedMessage) { - clearRuntimeSnapshot(simulatorId); - return createCaptureFailureResult(simulatorId, guard.blockedMessage); + return createCaptureFailureResult(simulatorId, guard.blockedMessage, { + uiError: { + code: 'ACTION_FAILED', + message: guard.blockedMessage, + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }, + }); } log('info', `${LOG_PREFIX}/${toolName}: Starting for ${simulatorId}`); @@ -163,8 +101,6 @@ export function createSnapshotUiExecutor( warnings: [guard.warningText], }); } catch (error) { - clearRuntimeSnapshot(simulatorId); - if (error instanceof RuntimeSnapshotParseError) { const message = 'Failed to parse runtime UI snapshot.'; log('error', `${LOG_PREFIX}/${toolName}: Failed - ${message}`); @@ -201,55 +137,17 @@ export async function snapshot_uiLogic( setCaptureStructuredOutput(ctx, result); - const runtimeSnapshot = - result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' - ? result.capture - : null; - const tapElement = runtimeSnapshot - ? (runtimeSnapshot.elements - .map((element, index) => ({ element, index })) - .filter( - ({ element }) => - element.actions.includes('tap') && - !element.actions.includes('typeText') && - !isHiddenTapNextStepElement(element.label), - ) - .sort((left, right) => { - const priorityDelta = - getTapNextStepElementPriority(left.element) - - getTapNextStepElementPriority(right.element); - return priorityDelta === 0 ? left.index - right.index : priorityDelta; - })[0]?.element ?? null) - : null; - - if (!result.didError) { - const nextSteps: NextStep[] = [ - { - label: 'Refresh after layout changes', - tool: 'snapshot_ui', - params: { simulatorId: params.simulatorId }, - }, - { - label: 'Wait for UI to settle', - tool: 'wait_for_ui', - params: { simulatorId: params.simulatorId, predicate: 'settled' }, - }, - ...(tapElement - ? [ - { - label: 'Tap an elementRef', - tool: 'tap', - params: { simulatorId: params.simulatorId, elementRef: tapElement.ref }, - }, - ] - : []), - { - label: 'Take screenshot for verification', - tool: 'screenshot', - params: { simulatorId: params.simulatorId }, - }, - ]; - ctx.nextSteps = nextSteps; + if ( + !result.didError && + result.capture && + 'type' in result.capture && + result.capture.type === 'runtime-snapshot' + ) { + ctx.nextSteps = createRuntimeSnapshotNextSteps({ + simulatorId: params.simulatorId, + runtimeSnapshot: result.capture, + includeRefreshAndWait: true, + }); } } diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index f0167de27..35648c255 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -20,6 +20,7 @@ import { import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; import { getRuntimeElementSwipePoints } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -139,8 +140,13 @@ export function createSwipeExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'swipe', executor, axeHelpers); clearRuntimeSnapshot(simulatorId); + const capture = await captureRuntimeSnapshotAfterAction({ + simulatorId, + executor, + axeHelpers, + }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index 245d7ead2..ba023f2c6 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -17,6 +17,7 @@ import { createSemanticTapCommand, executeSemanticTapWithAmbiguityFallback, } from './shared/semantic-tap.ts'; +import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -119,9 +120,13 @@ export function createTapExecutor( if (usesTouchActivation && postDelay !== undefined) { await delayMs(postDelay * 1000); } - clearRuntimeSnapshot(simulatorId); + const capture = await captureRuntimeSnapshotAfterAction({ + simulatorId, + executor, + axeHelpers, + }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); diff --git a/src/mcp/tools/ui-automation/type_text.ts b/src/mcp/tools/ui-automation/type_text.ts index d4999df97..5a46384f9 100644 --- a/src/mcp/tools/ui-automation/type_text.ts +++ b/src/mcp/tools/ui-automation/type_text.ts @@ -23,6 +23,7 @@ import { createSemanticTapCommand, executeSemanticTapWithAmbiguityFallback, } from './shared/semantic-tap.ts'; +import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -153,8 +154,13 @@ export function createTypeTextExecutor( } await executeAxeCommand(typeCommandArgs, simulatorId, 'type', executor, axeHelpers); clearRuntimeSnapshot(simulatorId); + const capture = await captureRuntimeSnapshotAfterAction({ + simulatorId, + executor, + axeHelpers, + }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); diff --git a/src/mcp/tools/ui-automation/wait_for_ui.ts b/src/mcp/tools/ui-automation/wait_for_ui.ts index 3fb55ccbc..ae2e2c6df 100644 --- a/src/mcp/tools/ui-automation/wait_for_ui.ts +++ b/src/mcp/tools/ui-automation/wait_for_ui.ts @@ -21,7 +21,7 @@ import type { } from '../../../types/ui-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; -import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import { recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import { parseRuntimeSnapshotResponse, RuntimeSnapshotParseError, @@ -106,6 +106,7 @@ const waitForUiSchema = z.strictObject(waitForUiSchemaShape).superRefine((value, if ( value.predicate !== 'settled' && value.predicate !== 'textContains' && + !(value.predicate === 'gone' && value.text !== undefined) && !hasSelectorFields(value) ) { ctx.addIssue({ @@ -123,11 +124,15 @@ const waitForUiSchema = z.strictObject(waitForUiSchemaShape).superRefine((value, }); } - if (value.predicate !== 'textContains' && value.text !== undefined) { + if ( + value.predicate !== 'textContains' && + value.predicate !== 'gone' && + value.text !== undefined + ) { ctx.addIssue({ code: 'custom', path: ['text'], - message: 'text is only supported for textContains waits', + message: 'text is only supported for textContains and gone waits', }); } }); @@ -185,7 +190,7 @@ export function createWaitForUiExecutor( } } - if (predicate !== 'settled' && predicate !== 'textContains' && !selector) { + if (predicate !== 'settled' && predicate !== 'textContains' && !selector && !text) { const message = `${predicate} waits require at least one selector field.`; return createCaptureFailureResult(simulatorId, message, { uiError: { @@ -203,8 +208,14 @@ export function createWaitForUiExecutor( toolName, }); if (guard.blockedMessage) { - clearRuntimeSnapshot(simulatorId); - return createCaptureFailureResult(simulatorId, guard.blockedMessage); + return createCaptureFailureResult(simulatorId, guard.blockedMessage, { + uiError: { + code: 'ACTION_FAILED', + message: guard.blockedMessage, + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }, + }); } let latestSnapshot: RuntimeSnapshotRecord | null = null; @@ -241,7 +252,16 @@ export function createWaitForUiExecutor( }) : predicate === 'textContains' && !selector ? evaluateTextContainsPredicate({ snapshot, text: text! }) - : evaluateElementPredicate({ predicate, selector: selector!, snapshot, text }); + : predicate === 'gone' && !selector && text + ? (() => { + const textMatch = evaluateTextContainsPredicate({ snapshot, text }); + return { + matched: (textMatch.candidates ?? []).length === 0, + candidates: textMatch.candidates ?? [], + uiError: undefined, + }; + })() + : evaluateElementPredicate({ predicate, selector: selector!, snapshot, text }); if (typeof matched === 'boolean') { if (matched) { @@ -302,7 +322,6 @@ export function createWaitForUiExecutor( }); } - clearRuntimeSnapshot(simulatorId); if (lastParseError) { const message = 'Failed to parse runtime UI snapshot while waiting for UI.'; return createCaptureFailureResult(simulatorId, message, { diff --git a/src/types/common.ts b/src/types/common.ts index 3534c2c6d..64ecd325f 100644 --- a/src/types/common.ts +++ b/src/types/common.ts @@ -15,6 +15,14 @@ /** * Represents a suggested next step that can be rendered for CLI or MCP. */ +export type NextStepParamValue = + | string + | number + | boolean + | null + | NextStepParamValue[] + | { [key: string]: NextStepParamValue }; + export interface NextStep { /** Optional MCP tool name (e.g., "boot_sim") */ tool?: string; @@ -25,14 +33,14 @@ export interface NextStep { /** Human-readable description of the action (optional when manifest template provides it) */ label?: string; /** Optional parameters to pass to the tool */ - params?: Record; + params?: Record; /** Optional ordering hint for merged steps */ priority?: number; /** When to show this step: 'always' (default), 'success', or 'failure' */ when?: 'always' | 'success' | 'failure'; } -export type NextStepParams = Record; +export type NextStepParams = Record; export type NextStepParamsMap = Record; /** diff --git a/src/types/domain-results.ts b/src/types/domain-results.ts index 1ad1f0a47..ea2ea0ff2 100644 --- a/src/types/domain-results.ts +++ b/src/types/domain-results.ts @@ -641,6 +641,7 @@ export type UiActionResultDomainResult = ToolDomainResultBase & { summary: StatusSummary; action: UiAction; artifacts: { simulatorId: string }; + capture?: CapturePayload; diagnostics?: BasicDiagnostics; uiError?: UiAutomationRecoverableError; }; diff --git a/src/utils/axe-helpers.ts b/src/utils/axe-helpers.ts index 3ae6e4ae1..436b5fca5 100644 --- a/src/utils/axe-helpers.ts +++ b/src/utils/axe-helpers.ts @@ -5,14 +5,14 @@ * Prefers bundled AXe when present, but allows env and PATH fallback. */ -import { accessSync, constants, existsSync } from 'fs'; +import { accessSync, constants, existsSync, readdirSync, statSync } from 'fs'; import { delimiter, join, resolve } from 'path'; import type { CommandExecutor } from './execution/index.ts'; import { getDefaultCommandExecutor } from './execution/index.ts'; import { getConfig } from './config-store.ts'; import { getBundledAxePath, getBundledFrameworksDir } from '../core/resource-root.ts'; -export type AxeBinarySource = 'env' | 'bundled' | 'path'; +export type AxeBinarySource = 'env' | 'source' | 'bundled' | 'path'; export type AxeBinary = { path: string; @@ -35,6 +35,50 @@ function resolveAxePathFromConfig(): string | null { return isExecutable(resolved) ? resolved : null; } +function isDirectory(path: string): boolean { + try { + return statSync(path).isDirectory(); + } catch { + return false; + } +} + +function getAxeSourceBuildCandidates(sourcePath: string): string[] { + const candidates = [join(sourcePath, '.build', 'release', 'axe')]; + const swiftBuildDir = join(sourcePath, '.build'); + + if (isDirectory(swiftBuildDir)) { + for (const entry of readdirSync(swiftBuildDir, { withFileTypes: true })) { + if (entry.isDirectory() && entry.name.endsWith('-apple-macosx')) { + candidates.push(join(swiftBuildDir, entry.name, 'release', 'axe')); + } + } + } + + return candidates; +} + +function resolveAxePathFromSourceConfig(): string | null { + const value = getConfig().axeSourcePath; + if (!value) return null; + + const sourcePath = resolve(value); + if (!isDirectory(sourcePath)) { + throw new Error(`Configured axeSourcePath does not exist or is not a directory: ${sourcePath}`); + } + + const candidates = getAxeSourceBuildCandidates(sourcePath); + for (const candidate of candidates) { + if (isExecutable(candidate)) { + return candidate; + } + } + + throw new Error( + `Configured axeSourcePath does not contain an executable release AXe build. Expected one of: ${candidates.join(', ')}`, + ); +} + function resolveBundledAxePath(): string | null { const candidates = new Set(); candidates.add(getBundledAxePath()); @@ -66,6 +110,11 @@ export function resolveAxeBinary(): AxeBinary | null { return { path: configPath, source: 'env' }; } + const sourcePath = resolveAxePathFromSourceConfig(); + if (sourcePath) { + return { path: sourcePath, source: 'source' }; + } + const bundledPath = resolveBundledAxePath(); if (bundledPath) { return { path: bundledPath, source: 'bundled' }; @@ -118,6 +167,7 @@ export function areAxeToolsAvailable(): boolean { export const AXE_NOT_AVAILABLE_MESSAGE = 'AXe tool not found. UI automation features are not available.\n\n' + 'Install AXe (brew tap cameroncooke/axe && brew install axe) or set XCODEBUILDMCP_AXE_PATH.\n' + + 'For local source validation, set XCODEBUILDMCP_AXE_SOURCE_PATH to an AXe checkout with a release build.\n' + 'Ensure bundled artifacts are included or PATH is configured.'; /** diff --git a/src/utils/config-store.ts b/src/utils/config-store.ts index dafd35759..c7e0aadc5 100644 --- a/src/utils/config-store.ts +++ b/src/utils/config-store.ts @@ -28,6 +28,7 @@ export type RuntimeConfigOverrides = Partial<{ dapLogEvents: boolean; launchJsonWaitMs: number; axePath: string; + axeSourcePath: string; iosTemplatePath: string; iosTemplateVersion: string; macosTemplatePath: string; @@ -54,6 +55,7 @@ export type ResolvedRuntimeConfig = { dapLogEvents: boolean; launchJsonWaitMs: number; axePath?: string; + axeSourcePath?: string; iosTemplatePath?: string; iosTemplateVersion?: string; macosTemplatePath?: string; @@ -227,6 +229,9 @@ function readEnvConfig(env: NodeJS.ProcessEnv): RuntimeConfigOverrides { const axePath = env.XCODEBUILDMCP_AXE_PATH ?? env.AXE_PATH; if (axePath) config.axePath = axePath; + const axeSourcePath = env.XCODEBUILDMCP_AXE_SOURCE_PATH ?? env.AXE_SOURCE_PATH; + if (axeSourcePath) config.axeSourcePath = axeSourcePath; + const iosTemplatePath = env.XCODEBUILDMCP_IOS_TEMPLATE_PATH; if (iosTemplatePath) config.iosTemplatePath = iosTemplatePath; @@ -557,6 +562,12 @@ function resolveConfig(opts: { fileConfig: opts.fileConfig, envConfig, }), + axeSourcePath: resolveFromLayers({ + key: 'axeSourcePath', + overrides: opts.overrides, + fileConfig: opts.fileConfig, + envConfig, + }), iosTemplatePath: resolveFromLayers({ key: 'iosTemplatePath', overrides: opts.overrides, diff --git a/src/utils/project-config.ts b/src/utils/project-config.ts index ce291bb9d..155397161 100644 --- a/src/utils/project-config.ts +++ b/src/utils/project-config.ts @@ -206,7 +206,7 @@ function normalizeCustomWorkflows(value: unknown): Record { function resolveRelativeTopLevelPaths(config: ProjectConfig, cwd: string): ProjectConfig { const resolved: ProjectConfig = { ...config }; - const pathKeys = ['axePath', 'iosTemplatePath', 'macosTemplatePath'] as const; + const pathKeys = ['axePath', 'axeSourcePath', 'iosTemplatePath', 'macosTemplatePath'] as const; for (const key of pathKeys) { const value = resolved[key]; diff --git a/src/utils/responses/next-step-formatting.ts b/src/utils/responses/next-step-formatting.ts index a149e580c..5e2084bc1 100644 --- a/src/utils/responses/next-step-formatting.ts +++ b/src/utils/responses/next-step-formatting.ts @@ -1,5 +1,5 @@ import type { RuntimeKind } from '../../runtime/types.ts'; -import type { NextStep } from '../../types/common.ts'; +import type { NextStep, NextStepParamValue } from '../../types/common.ts'; import { toKebabCase } from '../../runtime/naming.ts'; import { shellEscapeArg } from '../shell-escape.ts'; @@ -23,6 +23,17 @@ function formatCliArg(value: string): string { : shellEscapeArg(value); } +function hasComplexCliParamValue(value: NextStepParamValue): boolean { + return typeof value === 'object' && value !== null; +} + +function formatCliParamValue(value: Exclude): string { + if (typeof value === 'string' || typeof value === 'number') { + return formatCliArg(String(value)); + } + return shellEscapeArg(JSON.stringify(value)); +} + function formatNextStepForCli(step: NextStep): string { const commandName = step.cliTool ?? (step.tool ? toKebabCase(step.tool) : undefined); if (!commandName) { @@ -35,24 +46,33 @@ function formatNextStepForCli(step: NextStep): string { } parts.push(commandName); - for (const [key, value] of Object.entries(step.params ?? {})) { + const params = step.params ?? {}; + if (Object.values(params).some(hasComplexCliParamValue)) { + parts.push('--json', formatCliParamValue(params)); + return parts.join(' '); + } + + for (const [key, value] of Object.entries(params)) { const flagName = toKebabCase(key); if (typeof value === 'boolean') { if (value) { parts.push(`--${flagName}`); } } else { - parts.push(`--${flagName}`, formatCliArg(String(value))); + parts.push(`--${flagName}`, formatCliParamValue(value)); } } return parts.join(' '); } -function formatMcpValue(value: string | number | boolean): string { +function formatMcpValue(value: NextStepParamValue): string { if (typeof value === 'string') { return JSON.stringify(value); } + if (typeof value === 'object' && value !== null) { + return JSON.stringify(value); + } return String(value); } diff --git a/src/utils/runtime-config-schema.ts b/src/utils/runtime-config-schema.ts index 7f4766bc7..9fb123c83 100644 --- a/src/utils/runtime-config-schema.ts +++ b/src/utils/runtime-config-schema.ts @@ -19,6 +19,7 @@ export const runtimeConfigFileSchema = z dapLogEvents: z.boolean().optional(), launchJsonWaitMs: z.number().int().nonnegative().optional(), axePath: z.string().optional(), + axeSourcePath: z.string().optional(), iosTemplatePath: z.string().optional(), iosTemplateVersion: z.string().optional(), macosTemplatePath: z.string().optional(), diff --git a/src/utils/sentry.ts b/src/utils/sentry.ts index c31af55b0..747b8e841 100644 --- a/src/utils/sentry.ts +++ b/src/utils/sentry.ts @@ -31,7 +31,7 @@ export interface SentryRuntimeContext { uiDebuggerGuardMode?: string; xcodeIdeWorkflowEnabled?: boolean; axeAvailable?: boolean; - axeSource?: 'env' | 'bundled' | 'path' | 'unavailable'; + axeSource?: 'env' | 'source' | 'bundled' | 'path' | 'unavailable'; axeVersion?: string; xcodeDeveloperDir?: string; xcodebuildPath?: string; diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts index 68417ea54..3c0a05299 100644 --- a/src/utils/structured-output-envelope.ts +++ b/src/utils/structured-output-envelope.ts @@ -32,6 +32,7 @@ type RuntimeSnapshotCompactCapture = { count: number; targets: string[]; scroll: string[]; + text?: string[]; udid: string; }; @@ -152,12 +153,28 @@ function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElem .map((element, index) => ({ element, index })) .sort((left, right) => { const priorityDelta = - getRuntimeTargetDisplayPriority(left.element) - getRuntimeTargetDisplayPriority(right.element); + getRuntimeTargetDisplayPriority(left.element) - + getRuntimeTargetDisplayPriority(right.element); return priorityDelta === 0 ? left.index - right.index : priorityDelta; }) .map(({ element }) => element); } +function sortRuntimeTextForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const yDelta = left.element.frame.y - right.element.frame.y; + if (yDelta !== 0) { + return yDelta; + } + + const xDelta = left.element.frame.x - right.element.frame.x; + return xDelta === 0 ? left.index - right.index : xDelta; + }) + .map(({ element }) => element); +} + function compactRuntimeElementRow(element: RuntimeElementV1, action: string): string { return [ element.ref, @@ -178,6 +195,15 @@ function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNa ); } +function isRuntimeTextSummaryElement(element: RuntimeElementV1): boolean { + return ( + element.role === 'text' && + element.state?.visible !== false && + (compactRuntimeSnapshotText(element.label).length > 0 || + compactRuntimeSnapshotText(element.value).length > 0) + ); +} + function toRuntimeSnapshotCompactCapture( snapshot: RuntimeSnapshotV1, ): RuntimeSnapshotCompactCapture { @@ -199,6 +225,9 @@ function toRuntimeSnapshotCompactCapture( !element.actions.includes('typeText'), ) .map((element) => compactRuntimeElementRow(element, 'swipe')); + const text = sortRuntimeTextForDisplay(snapshot.elements.filter(isRuntimeTextSummaryElement)) + .slice(0, 64) + .map((element) => compactRuntimeElementRow(element, 'text')); return { type: 'runtime-snapshot', @@ -208,6 +237,7 @@ function toRuntimeSnapshotCompactCapture( count: snapshot.elements.length, targets, scroll, + ...(text.length > 0 ? { text } : {}), udid: snapshot.simulatorId, }; } @@ -262,7 +292,10 @@ function toRuntimeSnapshotUnchangedCompactCapture( }; } -function projectRuntimeSnapshotData(data: TData, options: StructuredEnvelopeOptions): unknown { +function projectRuntimeSnapshotData( + data: TData, + options: StructuredEnvelopeOptions, +): unknown { if (options.runtimeSnapshot === 'full' || typeof data !== 'object' || data === null) { return data; } diff --git a/src/utils/typed-tool-factory.ts b/src/utils/typed-tool-factory.ts index fe96ae9da..f409cd058 100644 --- a/src/utils/typed-tool-factory.ts +++ b/src/utils/typed-tool-factory.ts @@ -101,6 +101,37 @@ function missingFromMerged( return keys.filter((k) => merged[k] == null); } +function getObjectSchemaKeys(schema: z.ZodType): Set | null { + if (typeof schema !== 'object' || schema === null || !('shape' in schema)) { + return null; + } + + const shape = (schema as { shape?: unknown }).shape; + if (typeof shape !== 'object' || shape === null) { + return null; + } + + return new Set(Object.keys(shape)); +} + +function filterSessionDefaultsForSchema( + defaults: SessionDefaults, + schema: z.ZodType, +): Record { + const schemaKeys = getObjectSchemaKeys(schema); + if (!schemaKeys) { + return defaults; + } + + const filteredDefaults: Record = {}; + for (const [key, value] of Object.entries(defaults)) { + if (schemaKeys.has(key)) { + filteredDefaults[key] = value; + } + } + return filteredDefaults; +} + function formatRequirementError(opts: { message: string; setHint?: string; @@ -201,7 +232,7 @@ function createSessionAwareHandler(opts: { } } - const sessionDefaults = sessionStore.getAll(); + const sessionDefaults = filterSessionDefaultsForSchema(sessionStore.getAll(), internalSchema); const merged = mergeSessionDefaultArgs({ defaults: sessionDefaults, explicitArgs: sanitizedArgs, From 7d1c713e6f20d1905abebfa887e0d117abb89116 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Mon, 18 May 2026 23:10:43 +0100 Subject: [PATCH 03/35] test(ui-automation): Cover runtime snapshot guidance Add coverage for runtime captures, post-action snapshot behavior, compact next-step rendering, and session-aware structured output handling. Co-Authored-By: Codex --- .../ui-automation/__tests__/batch.test.ts | 228 ++++++-- .../__tests__/non_streaming_progress.test.ts | 8 +- .../__tests__/runtime-snapshot.test.ts | 104 +++- .../__tests__/snapshot_ui.test.ts | 507 +++++++++++++++++- .../ui-automation/__tests__/swipe.test.ts | 26 + .../tools/ui-automation/__tests__/tap.test.ts | 77 +-- .../ui-automation/__tests__/touch.test.ts | 7 +- .../ui-automation/__tests__/type_text.test.ts | 53 +- .../__tests__/ui-action-test-helpers.ts | 17 + .../__tests__/wait_for_ui.test.ts | 206 ++++++- src/runtime/__tests__/tool-invoker.test.ts | 4 +- src/utils/__tests__/axe-helpers.test.ts | 98 +++- src/utils/__tests__/config-store.test.ts | 27 +- src/utils/__tests__/project-config.test.ts | 6 + .../session-aware-tool-factory.test.ts | 120 +++++ .../structured-output-envelope.test.ts | 11 +- .../__tests__/next-steps-renderer.test.ts | 40 ++ 17 files changed, 1399 insertions(+), 140 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/batch.test.ts b/src/mcp/tools/ui-automation/__tests__/batch.test.ts index 1bd022c27..5c0957660 100644 --- a/src/mcp/tools/ui-automation/__tests__/batch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/batch.test.ts @@ -1,6 +1,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import * as z from 'zod'; import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; @@ -18,6 +19,10 @@ import { simulatorId, } from './ui-action-test-helpers.ts'; +function actionCommands(calls: Array<{ command: string[] }>): string[][] { + return calls.map((call) => call.command).filter((command) => command[1] !== 'describe-ui'); +} + async function runBatch( params: Parameters[0], executor = createTrackingExecutor().executor, @@ -36,42 +41,57 @@ describe('Batch UI Automation Tool', () => { }); describe('Schema Validation', () => { - it('exposes batch steps and AXe batch options', () => { + it('exposes structured tap steps and rejects raw AXe strings', () => { expect(typeof handler).toBe('function'); expect(schema).toHaveProperty('steps'); expect(schema).toHaveProperty('axCache'); - expect(schema).toHaveProperty('tapStyle'); + expect(schema).not.toHaveProperty('tapStyle'); const schemaObject = z.object(schema); - expect(schemaObject.safeParse({ steps: ['tap --id login'] }).success).toBe(true); + expect(schemaObject.safeParse({ steps: [{ action: 'tap', elementRef: 'e1' }] }).success).toBe( + true, + ); expect( schemaObject.safeParse({ - steps: ['tap --id login', 'type user@example.com'], + steps: [ + { action: 'tap', elementRef: 'e1', preDelay: 0.25, postDelay: 0.5 }, + { action: 'tap', elementRef: 'e2' }, + ], axCache: 'perBatch', - typeSubmission: 'chunked', - typeChunkSize: 8, - tapStyle: 'automatic', - continueOnError: true, waitTimeout: 2, pollInterval: 0.25, }).success, ).toBe(true); + expect(schemaObject.safeParse({ steps: ['tap --id login'] }).success).toBe(false); expect(schemaObject.safeParse({ steps: [] }).success).toBe(false); - expect(schemaObject.safeParse({ steps: [''] }).success).toBe(false); - expect(schemaObject.safeParse({ steps: ['tap --id login'], pollInterval: 0 }).success).toBe( + expect(schemaObject.safeParse({ steps: [{ action: 'tap', elementRef: '' }] }).success).toBe( false, ); + expect( + schemaObject.safeParse({ steps: [{ action: 'swipe', elementRef: 'e1' }] }).success, + ).toBe(false); + expect( + schemaObject.safeParse({ steps: [{ action: 'tap', elementRef: 'e1' }], pollInterval: 0 }) + .success, + ).toBe(false); }); }); describe('Command Generation', () => { - it('builds repeated AXe --step arguments', async () => { + it('pre-resolves element refs into AXe coordinate batch steps', async () => { + recordSnapshot([ + createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } }), + createNode({ frame: { x: 200, y: 300, width: 80, height: 60 }, AXLabel: 'Next' }), + ]); const { calls, executor } = createTrackingExecutor(); const result = await runBatch( { simulatorId, - steps: ['tap --id username-field', 'type user@example.com'], + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2', preDelay: 0.25, postDelay: 0.5 }, + ], }, executor, ); @@ -80,32 +100,80 @@ describe('Batch UI Automation Tool', () => { didError: false, action: { type: 'batch', stepCount: 2 }, }); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'batch', '--step', - 'tap --id username-field', + 'tap -x 60 -y 40', '--step', - 'type user@example.com', + 'tap -x 240 -y 330 --pre-delay 0.25 --post-delay 0.5', '--udid', simulatorId, ], ]); }); - it('passes AXe batch options through unchanged', async () => { + it('uses touch down/up batch steps for switch refs', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + AXLabel: 'Reduce transparency', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + await runBatch({ simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, executor); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'batch', + '--step', + 'touch -x 307 -y 903 --down', + '--step', + 'touch -x 307 -y 903 --up', + '--udid', + simulatorId, + ]); + }); + + it('rejects delays for switch refs before AXe execution', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + AXLabel: 'Reduce transparency', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1', postDelay: 0.5 }] }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: + 'Remove preDelay/postDelay from switch steps, or wait between separate batch calls.', + }); + expect(calls).toEqual([]); + }); + + it('passes supported AXe batch options through unchanged', async () => { + recordSnapshot([createNode()]); const { calls, executor } = createTrackingExecutor(); await runBatch( { simulatorId, - steps: ['tap --id login'], + steps: [{ action: 'tap', elementRef: 'e1' }], axCache: 'perStep', - typeSubmission: 'composite', - typeChunkSize: 4, - tapStyle: 'physical', - continueOnError: true, waitTimeout: 3, pollInterval: 0.5, }, @@ -116,16 +184,9 @@ describe('Batch UI Automation Tool', () => { '/mocked/axe/path', 'batch', '--step', - 'tap --id login', + 'tap -x 60 -y 40', '--ax-cache', 'perStep', - '--type-submission', - 'composite', - '--type-chunk-size', - '4', - '--tap-style', - 'physical', - '--continue-on-error', '--wait-timeout', '3', '--poll-interval', @@ -137,34 +198,73 @@ describe('Batch UI Automation Tool', () => { }); describe('Runtime snapshot invalidation', () => { - it('clears the cached runtime snapshot after a successful batch', async () => { + it('preserves the cached runtime snapshot after a successful safe same-screen batch', async () => { + recordSnapshot([ + createNode({ type: 'Switch', role: 'AXSwitch', AXValue: '0' }), + createNode({ type: 'Switch', role: 'AXSwitch', AXValue: 'off' }), + ]); + + const result = await runBatch({ + simulatorId, + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }); + + expect(result.didError).toBe(false); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('records a fresh runtime snapshot after a successful arbitrary batch', async () => { recordSnapshot([createNode()]); - const result = await runBatch({ simulatorId, steps: ['tap --id login'] }); + const result = await runBatch({ simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }); expect(result.didError).toBe(false); - expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(result.capture).toMatchObject({ type: 'runtime-snapshot', simulatorId }); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('pre-resolves all refs and fails before execution if any ref is invalid', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { + simulatorId, + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e404' }, + ], + }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); it('clears the cached runtime snapshot when AXe runs and reports batch failure', async () => { recordSnapshot([createNode()]); const result = await runBatch( - { simulatorId, steps: ['type Secret123'] }, - createFailingExecutor('step failed: type Secret123'), + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, + createFailingExecutor('step failed'), ); expect(result.didError).toBe(true); - expect(JSON.stringify(result)).not.toContain('Secret123'); expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); it('preserves the cached runtime snapshot when AXe is unavailable before execution', async () => { - recordSnapshot([createNode()]); + recordSnapshot([createNode({ type: 'Switch', role: 'AXSwitch', AXValue: '0' })]); const { executor } = createTrackingExecutor(); const result = await runBatch( - { simulatorId, steps: ['tap --id login'] }, + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, executor, createMockAxeHelpers({ getAxePathReturn: null }), ); @@ -174,7 +274,7 @@ describe('Batch UI Automation Tool', () => { }); it('preserves the cached runtime snapshot when the debugger guard blocks before AXe runs', async () => { - recordSnapshot([createNode()]); + recordSnapshot([createNode({ type: 'Switch', role: 'AXSwitch', AXValue: '0' })]); const { calls, executor } = createTrackingExecutor(); const debuggerManager = new DebuggerManager(); vi.spyOn(debuggerManager, 'findSessionForSimulator').mockReturnValue({ @@ -191,7 +291,10 @@ describe('Batch UI Automation Tool', () => { }); const executeBatch = createBatchExecutor(executor, createMockAxeHelpers(), debuggerManager); - const result = await executeBatch({ simulatorId, steps: ['tap --id login'] }); + const result = await executeBatch({ + simulatorId, + steps: [{ action: 'tap', elementRef: 'e1' }], + }); expect(result.didError).toBe(true); expect(calls).toEqual([]); @@ -201,11 +304,56 @@ describe('Batch UI Automation Tool', () => { describe('Handler Behavior', () => { it('requires simulatorId session default', async () => { - const result = await callHandler(handler, { steps: ['tap --id login'] }); + const result = await callHandler(handler, { steps: [{ action: 'tap', elementRef: 'e1' }] }); expect(result.isError).toBe(true); expect(result.content[0].text).toContain('Missing required session defaults'); expect(result.content[0].text).toContain('simulatorId is required'); }); + + it('ignores unrelated project session defaults before strict validation', async () => { + sessionStore.setDefaults({ + simulatorId, + projectPath: '/tmp/App.xcodeproj', + scheme: 'App', + simulatorName: 'iPhone 17 Pro', + simulatorPlatform: 'iOS Simulator', + }); + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + ( + handler as unknown as ( + args: Record, + executor: CommandExecutor, + ) => Promise + )({ steps: [{ action: 'tap', elementRef: 'e1' }] }, executor), + ); + + expect(ctx.structuredOutput?.result.didError).toBe(false); + expect(calls[0]?.command.slice(1)).toEqual([ + 'batch', + '--step', + 'tap -x 60 -y 40', + '--udid', + simulatorId, + ]); + }); + + it('rejects removed legacy top-level fields', async () => { + sessionStore.setDefaults({ simulatorId }); + + const result = await callHandler(handler, { + steps: [{ action: 'tap', elementRef: 'e1' }], + tapStyle: 'physical', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Parameter validation failed'); + expect(result.content[0].text).toContain('Unrecognized key'); + expect(result.content[0].text).toContain('tapStyle'); + }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts index 914a4f0df..551aa9913 100644 --- a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts @@ -20,6 +20,8 @@ import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state import { createNode, recordSnapshot } from './ui-action-test-helpers.ts'; const simulatorId = '12345678-1234-4234-8234-123456789012'; +const runtimeSnapshotOutput = + '{"elements":[{"type":"Button","role":"AXButton","frame":{"x":100,"y":200,"width":50,"height":30},"enabled":true,"children":[],"custom_actions":[]}]}'; function createMockAxeHelpers() { return { @@ -91,7 +93,7 @@ describe('ui automation non-streaming tools', () => { recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); return swipeLogic( { simulatorId, withinElementRef: 'e1', direction: 'up' }, - createMockExecutor({ success: true }), + createMockExecutor({ success: true, output: runtimeSnapshotOutput }), axeHelpers, ); }, @@ -103,7 +105,7 @@ describe('ui automation non-streaming tools', () => { recordSnapshot([createNode()]); return tapLogic( { simulatorId, elementRef: 'e1' }, - createMockExecutor({ success: true }), + createMockExecutor({ success: true, output: runtimeSnapshotOutput }), axeHelpers, ); }, @@ -127,7 +129,7 @@ describe('ui automation non-streaming tools', () => { recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); return type_textLogic( { simulatorId, elementRef: 'e1', text: 'Hello' }, - createMockExecutor({ success: true }), + createMockExecutor({ success: true, output: runtimeSnapshotOutput }), axeHelpers, ); }, diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index e8bb69054..982dc4869 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -81,6 +81,18 @@ describe('runtime snapshot normalization', () => { expect(snapshot.elementsByRef.get('e2')?.rawNode).toBe(child); }); + it('reads AXIdentifier as a stable runtime element identifier', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXIdentifier: 'weather.detailsButton' })], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ identifier: 'weather.detailsButton' }), + ); + }); + it('derives deterministic screen hashes from normalized UI content', () => { const uiHierarchy = [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Cancel' })]; @@ -107,11 +119,17 @@ describe('runtime snapshot normalization', () => { expect(hierarchy[0]?.AXLabel).toBe('Continue'); }); - it('throws typed parse errors for invalid describe-ui responses', () => { + it('throws typed parse errors for invalid or empty describe-ui responses', () => { expect(() => extractAccessibilityHierarchy('not json')).toThrow(RuntimeSnapshotParseError); expect(() => extractAccessibilityHierarchy(JSON.stringify({ value: [] }))).toThrow( RuntimeSnapshotParseError, ); + expect(() => extractAccessibilityHierarchy(JSON.stringify([]))).toThrow( + RuntimeSnapshotParseError, + ); + expect(() => extractAccessibilityHierarchy(JSON.stringify({ elements: [] }))).toThrow( + RuntimeSnapshotParseError, + ); }); it('selects the primary element for semantic next steps', () => { @@ -491,6 +509,90 @@ describe('runtime snapshot normalization', () => { ); }); + it('keeps an unlabeled other swipe target as fallback when no better scroll ref exists', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXUniqueId: undefined, + frame: { x: 0, y: 0, width: 200, height: 200 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 260, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('removes unlabeled other swipe targets when better scroll refs exist', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXUniqueId: undefined, + frame: { x: 0, y: 0, width: 300, height: 300 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Generic overflow', + frame: { x: 10, y: 360, width: 120, height: 20 }, + }), + ], + }), + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'weather.locationsSheet', + frame: { x: 0, y: 400, width: 390, height: 300 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + expect(snapshot.payload.elements[3]).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'weather.locationsSheet', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + it('derives trailing activation points for wide switch rows', () => { const snapshot = createRuntimeSnapshotRecord({ simulatorId, diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index 4abea4060..c7e054f6d 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -2,6 +2,8 @@ import { describe, it, expect } from 'vitest'; import * as z from 'zod'; import { createMockExecutor, createNoopExecutor } from '../../../../test-utils/mock-executors.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import type { DebuggerBackend } from '../../../../utils/debugger/backends/DebuggerBackend.ts'; +import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; import { schema, handler, snapshot_uiLogic } from '../snapshot_ui.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { @@ -15,6 +17,26 @@ import { getRuntimeSnapshot, } from '../shared/snapshot-ui-state.ts'; +async function createStoppedDebuggerManager(simulatorId: string): Promise { + const backend: DebuggerBackend = { + kind: 'lldb-cli', + attach: async () => {}, + detach: async () => {}, + runCommand: async () => '', + resume: async () => {}, + addBreakpoint: async (spec) => ({ id: 1, spec, rawOutput: '' }), + removeBreakpoint: async () => '', + getStack: async () => '', + getVariables: async () => '', + getExecutionState: async () => ({ status: 'stopped', reason: 'breakpoint' }), + dispose: async () => {}, + }; + const manager = new DebuggerManager({ backendFactory: async () => backend }); + const session = await manager.createSession({ simulatorId, pid: 12345 }); + manager.setCurrentSession(session.id); + return manager; +} + describe('Snapshot UI Plugin', () => { describe('Export Field Validation (Literal)', () => { it('should have handler function', () => { @@ -159,11 +181,6 @@ describe('Snapshot UI Plugin', () => { elementRef, }, }, - { - label: 'Take screenshot for verification', - tool: 'screenshot', - params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, - }, ]); }); @@ -274,7 +291,7 @@ describe('Snapshot UI Plugin', () => { { type: 'StaticText', role: 'AXStaticText', - AXLabel: 'Loading weather...', + AXLabel: 'Loading content...', frame: { x: 20, y: 100, width: 200, height: 44 }, }, ], @@ -322,6 +339,370 @@ describe('Snapshot UI Plugin', () => { ]); }); + it('should include scroll guidance before screenshots when scrollable content is present', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 120, width: 390, height: 600 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Open Details', + frame: { x: 20, y: 180, width: 200, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e1', + direction: 'up', + distance: 0.5, + }); + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'tap', + 'swipe', + ]); + }); + + it('should prioritize scroll guidance over screen-changing tap guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 120, width: 390, height: 600 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + AXIdentifier: 'app.settingsButton', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'swipe', + 'tap', + ]); + }); + + it('should prefer foreground container guidance over background controls', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Background item, older screen content', + frame: { x: 20, y: 100, width: 300, height: 80 }, + }, + ], + }, + { + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.foregroundPanel', + frame: { x: 0, y: 320, width: 390, height: 524 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 320, y: 340, width: 44, height: 44 }, + }, + { + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 390, width: 300, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Foreground result, current panel content', + frame: { x: 20, y: 450, width: 320, height: 80 }, + }, + ], + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e7', + }); + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e4', + direction: 'up', + distance: 0.5, + }); + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'tap', + 'swipe', + ]); + }); + + it('should keep state-changing controls in targets without promoting them as generic next steps', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce Motion', + AXValue: '0', + frame: { x: 20, y: 40, width: 300, height: 44 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce Transparency', + AXValue: '0', + frame: { x: 20, y: 100, width: 300, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + const targets = + capture && 'type' in capture && capture.type === 'runtime-snapshot' ? capture.actions : []; + expect(targets).toContainEqual(expect.objectContaining({ action: 'tap', elementRef: 'e1' })); + expect(targets).toContainEqual(expect.objectContaining({ action: 'tap', elementRef: 'e2' })); + }); + + it('should not promote state-changing controls into batch or tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + frame: { x: 20, y: 40, width: 100, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '°F', + AXValue: 'selected', + frame: { x: 20, y: 100, width: 100, height: 44 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Already Enabled', + AXValue: '1', + AXUniqueId: 'settings.enabledRowSwitch', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + AXUniqueId: 'app.contentRow', + frame: { x: 20, y: 210, width: 300, height: 80 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Use Celsius', + AXValue: '0', + AXUniqueId: 'settings.useCelsiusRowSwitch', + frame: { x: 20, y: 310, width: 300, height: 44 }, + }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Severe Weather Alerts', + AXValue: '0', + AXUniqueId: 'settings.alertsRowSwitch', + frame: { x: 20, y: 370, width: 300, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should keep single tap guidance without batch when only one safe batch target exists', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + frame: { x: 20, y: 40, width: 100, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland', + frame: { x: 20, y: 100, width: 100, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + it('should prefer a non-text-field tap target in next steps', async () => { const uiHierarchy = JSON.stringify({ elements: [ @@ -422,7 +803,7 @@ describe('Snapshot UI Plugin', () => { }); }); - it('should prefer an unselected segmented choice over an already-selected choice for tap next-step guidance', async () => { + it('should not promote segmented choices as generic tap next-step guidance', async () => { const uiHierarchy = JSON.stringify({ elements: [ { @@ -462,10 +843,7 @@ describe('Snapshot UI Plugin', () => { ), ); - expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ - simulatorId: '12345678-1234-4234-8234-123456789012', - elementRef: 'e2', - }); + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); }); it('should skip low-value controls for tap next-step guidance when another tap target exists', async () => { @@ -613,30 +991,37 @@ describe('Snapshot UI Plugin', () => { }); }); - it('should prefer content-rich targets for tap next-step guidance', async () => { + it('should prefer content-rich cards over navigation and state-changing controls for tap next-step guidance', async () => { const uiHierarchy = JSON.stringify({ elements: [ { type: 'Button', role: 'AXButton', AXLabel: 'Portland', - AXIdentifier: 'weather.locationButton', + AXIdentifier: 'app.navigationButton', frame: { x: 20, y: 40, width: 160, height: 44 }, }, { type: 'Button', role: 'AXButton', AXLabel: 'Settings', - AXIdentifier: 'weather.settingsButton', + AXIdentifier: 'app.settingsButton', frame: { x: 320, y: 40, width: 44, height: 44 }, }, { type: 'Button', role: 'AXButton', AXLabel: 'PRECIP., 78%, Next 24 hours', - AXIdentifier: 'weather.precipitationCard', + AXIdentifier: 'app.summaryCard', frame: { x: 20, y: 260, width: 340, height: 140 }, }, + { + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Severe Weather Alerts', + AXValue: '0', + frame: { x: 20, y: 440, width: 300, height: 44 }, + }, ], }); const mockExecutor = createMockExecutor({ @@ -664,9 +1049,10 @@ describe('Snapshot UI Plugin', () => { simulatorId: '12345678-1234-4234-8234-123456789012', elementRef: 'e3', }); + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); }); - it('should clear runtime snapshot store when AXe output cannot be parsed', async () => { + it('should preserve runtime snapshot store when AXe output cannot be parsed', async () => { __resetRuntimeSnapshotStoreForTests(); const simulatorId = '12345678-1234-4234-8234-123456789012'; const seededExecutor = createMockExecutor({ @@ -682,7 +1068,8 @@ describe('Snapshot UI Plugin', () => { }; await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); - expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + const previousSnapshot = getRuntimeSnapshot(simulatorId); + expect(previousSnapshot).not.toBeNull(); const invalidJsonExecutor = createMockExecutor({ success: true, @@ -694,7 +1081,7 @@ describe('Snapshot UI Plugin', () => { await run(() => snapshot_uiLogic({ simulatorId }, invalidJsonExecutor, mockAxeHelpers)); expect(result.isError()).toBe(true); - expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); expect(ctx.structuredOutput?.schemaVersion).toBe('2'); expect( ctx.structuredOutput?.result.kind === 'capture-result' @@ -703,10 +1090,94 @@ describe('Snapshot UI Plugin', () => { ).toEqual( expect.objectContaining({ code: 'SNAPSHOT_PARSE_FAILED', + recoveryHint: 'Run snapshot_ui again after the app is fully launched and responsive.', }), ); }); + it('should reject empty AXe payloads without replacing a prior runtime snapshot', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + const previousSnapshot = getRuntimeSnapshot(simulatorId); + expect(previousSnapshot?.payload.elements).toHaveLength(1); + + for (const output of ['[]', '{"elements": []}', '{}']) { + const emptyExecutor = createMockExecutor({ + success: true, + output, + error: undefined, + process: { pid: 12345 }, + }); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic({ simulatorId }, emptyExecutor, mockAxeHelpers)); + + expect(result.isError()).toBe(true); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError?.code + : undefined, + ).toBe('SNAPSHOT_PARSE_FAILED'); + expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); + } + }); + + it('should preserve runtime snapshot store when the debugger guard blocks before AXe runs', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + const previousSnapshot = getRuntimeSnapshot(simulatorId); + const stoppedDebugger = await createStoppedDebuggerManager(simulatorId); + const guardedExecutor: CommandExecutor = async () => { + throw new Error('AXe should not run when debugger guard blocks'); + }; + + try { + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic({ simulatorId }, guardedExecutor, mockAxeHelpers, stoppedDebugger), + ); + + expect(result.isError()).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError + : undefined, + ).toEqual( + expect.objectContaining({ + code: 'ACTION_FAILED', + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }), + ); + } finally { + await stoppedDebugger.disposeAll(); + } + }); + it('should handle DependencyError when axe is not available', async () => { // Create mock axe helpers that return null for axe path const mockAxeHelpers = { diff --git a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts index c85265a46..fa5130fcc 100644 --- a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts @@ -301,5 +301,31 @@ describe('Swipe Tool', () => { expect(result.uiError).not.toHaveProperty('withinElementRef'); expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); + + it('suggests the next action from the post-swipe runtime snapshot', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + swipeLogic( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + createTrackingExecutor().executor, + createMockAxeHelpers(), + ), + ); + + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + expect(result.capture).toMatchObject({ + type: 'runtime-snapshot', + simulatorId, + }); + expect(ctx.nextSteps).toEqual([ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: 'e1' }, + }, + ]); + }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/tap.test.ts b/src/mcp/tools/ui-automation/__tests__/tap.test.ts index 0bb82a9ab..34a3c6c5b 100644 --- a/src/mcp/tools/ui-automation/__tests__/tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/tap.test.ts @@ -18,6 +18,10 @@ import { simulatorId, } from './ui-action-test-helpers.ts'; +function actionCommands(calls: Array<{ command: string[] }>): string[][] { + return calls.map((call) => call.command).filter((command) => command[1] !== 'describe-ui'); +} + async function runTap( params: Parameters[0], executor = createTrackingExecutor().executor, @@ -63,7 +67,7 @@ describe('Tap Plugin', () => { const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); expect(result).toMatchObject({ didError: false, action: { type: 'tap', elementRef: 'e1' } }); - expect(calls).toHaveLength(1); + expect(actionCommands(calls)).toHaveLength(1); expect(calls[0]).toEqual({ command: [ '/mocked/axe/path', @@ -81,14 +85,14 @@ describe('Tap Plugin', () => { }); }); - it('clears the cached runtime snapshot after a successful tap', async () => { + it('preserves the cached runtime snapshot after a successful tap', async () => { recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); const { executor } = createTrackingExecutor(); const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); expect(result.didError).toBe(false); - expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); it('includes element type when tapping a referenced element with a shared identifier', async () => { @@ -112,7 +116,7 @@ describe('Tap Plugin', () => { const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -148,7 +152,7 @@ describe('Tap Plugin', () => { const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ ['/mocked/axe/path', 'tap', '-x', '325', '-y', '440', '--udid', simulatorId], ]); }); @@ -162,15 +166,18 @@ describe('Tap Plugin', () => { AXUniqueId: 'shared-action', }), ]); - const { calls, executor } = createSequencedExecutor([ - { success: false, error: 'Multiple accessibility elements matched selector' }, - { success: true, output: 'tapped by coordinate' }, - ]); + const { calls, executor } = createSequencedExecutor( + [ + { success: false, error: 'Multiple accessibility elements matched selector' }, + { success: true, output: 'tapped by coordinate' }, + ], + { describeUiAfterSequence: true }, + ); const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -195,19 +202,22 @@ describe('Tap Plugin', () => { AXLabel: 'Clear search', }), ]); - const { calls, executor } = createSequencedExecutor([ - { - success: false, - error: - "Multiple (2) accessibility elements matched --id 'weather.locationsSheet'. No tap performed.", - }, - { success: true, output: 'tapped by coordinate' }, - ]); + const { calls, executor } = createSequencedExecutor( + [ + { + success: false, + error: + "Multiple (2) accessibility elements matched --id 'weather.locationsSheet'. No tap performed.", + }, + { success: true, output: 'tapped by coordinate' }, + ], + { describeUiAfterSequence: true }, + ); const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -233,19 +243,22 @@ describe('Tap Plugin', () => { AXLabel: 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°', }), ]); - const { calls, executor } = createSequencedExecutor([ - { - success: false, - error: - "No accessibility element matched --label 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°'. No tap performed.", - }, - { success: true, output: 'tapped by coordinate' }, - ]); + const { calls, executor } = createSequencedExecutor( + [ + { + success: false, + error: + "No accessibility element matched --label 'Portland, 1:24 PM ¡ Light Rain, 52°, H:55° L:48°'. No tap performed.", + }, + { success: true, output: 'tapped by coordinate' }, + ], + { describeUiAfterSequence: true }, + ); const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -277,8 +290,8 @@ describe('Tap Plugin', () => { const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); expect(result.didError).toBe(true); - expect(calls).toHaveLength(1); - expect(calls[0]?.command).toEqual([ + expect(actionCommands(calls)).toHaveLength(1); + expect(actionCommands(calls)[0]).toEqual([ '/mocked/axe/path', 'tap', '--id', @@ -298,8 +311,8 @@ describe('Tap Plugin', () => { await runTap({ simulatorId, elementRef: 'e1', preDelay: 0.25, postDelay: 0.5 }, executor); - expect(calls).toHaveLength(1); - expect(calls[0]?.command).toEqual([ + expect(actionCommands(calls)).toHaveLength(1); + expect(actionCommands(calls)[0]).toEqual([ '/mocked/axe/path', 'tap', '-x', diff --git a/src/mcp/tools/ui-automation/__tests__/touch.test.ts b/src/mcp/tools/ui-automation/__tests__/touch.test.ts index bb440fd61..c114c6bd8 100644 --- a/src/mcp/tools/ui-automation/__tests__/touch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/touch.test.ts @@ -203,7 +203,12 @@ describe('Touch Plugin', () => { describe('Handler Behavior', () => { it('rejects delay unless both down and up are true before AXe runs', async () => { - const result = await callHandler(handler, { simulatorId, elementRef: 'e1', down: true, delay: 1 }); + const result = await callHandler(handler, { + simulatorId, + elementRef: 'e1', + down: true, + delay: 1, + }); expect(result.isError).toBe(true); expect(result.content[0].text).toContain( diff --git a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts index e7a01fc4c..ba32c4d4d 100644 --- a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts @@ -14,6 +14,10 @@ import { simulatorId, } from './ui-action-test-helpers.ts'; +function actionCommands(calls: Array<{ command: string[] }>): string[][] { + return calls.map((call) => call.command).filter((command) => command[1] !== 'describe-ui'); +} + async function runTypeText( params: Parameters[0], executor = createTrackingExecutor().executor, @@ -70,7 +74,7 @@ describe('Type Text Tool', () => { didError: false, action: { type: 'type-text', elementRef: 'e1', textLength: 16 }, }); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -102,7 +106,7 @@ describe('Type Text Tool', () => { didError: false, action: { type: 'type-text', elementRef: 'e1', textLength: text.length }, }); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -168,7 +172,7 @@ describe('Type Text Tool', () => { const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -205,7 +209,7 @@ describe('Type Text Tool', () => { const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ ['/mocked/axe/path', 'tap', '-x', '130', '-y', '220', '--udid', simulatorId], ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); @@ -220,16 +224,19 @@ describe('Type Text Tool', () => { AXUniqueId: 'locationSearchField', }), ]); - const { calls, executor } = createSequencedExecutor([ - { success: false, error: 'Multiple 2 accessibility elements matched selector' }, - { success: true, output: 'focused by coordinate' }, - { success: true, output: 'typed' }, - ]); + const { calls, executor } = createSequencedExecutor( + [ + { success: false, error: 'Multiple 2 accessibility elements matched selector' }, + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ], + { describeUiAfterSequence: true }, + ); const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'London' }, executor); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -256,14 +263,18 @@ describe('Type Text Tool', () => { AXLabel: 'Search for a city', }), ]); - const { calls, executor } = createSequencedExecutor([ - { - success: false, - error: "No accessibility element matched --label 'Search for a city'. No tap performed.", - }, - { success: true, output: 'focused by coordinate' }, - { success: true, output: 'typed' }, - ]); + const { calls, executor } = createSequencedExecutor( + [ + { + success: false, + error: + "No accessibility element matched --label 'Search for a city'. No tap performed.", + }, + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ], + { describeUiAfterSequence: true }, + ); const result = await runTypeText( { simulatorId, elementRef: 'e1', text: 'Portland' }, @@ -271,7 +282,7 @@ describe('Type Text Tool', () => { ); expect(result.didError).toBe(false); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -304,7 +315,7 @@ describe('Type Text Tool', () => { executor, ); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ [ '/mocked/axe/path', 'tap', @@ -342,7 +353,7 @@ describe('Type Text Tool', () => { await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(calls.map((call) => call.command)).toEqual([ + expect(actionCommands(calls)).toEqual([ ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], ['/mocked/axe/path', 'type', 'Hello', '--udid', simulatorId], ]); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts index b5ab08e07..e07a0a9fc 100644 --- a/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts @@ -35,6 +35,14 @@ export function createTrackingExecutor(): { const calls: CapturedCommandCall[] = []; const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { calls.push({ command, logPrefix, useShell, opts }); + if (command[1] === 'describe-ui') { + return { + success: true, + output: JSON.stringify({ elements: [createNode()] }), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'ok', error: undefined, process: mockProcess }; }; @@ -47,6 +55,7 @@ export function createFailingExecutor(error: string): CommandExecutor { export function createSequencedExecutor( results: Array<{ success: boolean; output?: string; error?: string }>, + options: { describeUiAfterSequence?: boolean } = {}, ): { calls: CapturedCommandCall[]; executor: CommandExecutor; @@ -55,6 +64,14 @@ export function createSequencedExecutor( let index = 0; const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { calls.push({ command, logPrefix, useShell, opts }); + if (options.describeUiAfterSequence === true && command[1] === 'describe-ui') { + return { + success: true, + output: JSON.stringify({ elements: [createNode()] }), + error: undefined, + process: mockProcess, + }; + } const result = results[index] ?? results.at(-1) ?? { success: true }; index += 1; return { diff --git a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts index 13e9fcac2..17be123c0 100644 --- a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts @@ -2,6 +2,8 @@ import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; import type { CaptureResultDomainResult } from '../../../../types/domain-results.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import type { DebuggerBackend } from '../../../../utils/debugger/backends/DebuggerBackend.ts'; +import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; import { @@ -37,6 +39,26 @@ function createTiming(startMs = 0): { }; } +async function createStoppedDebuggerManager(): Promise { + const backend: DebuggerBackend = { + kind: 'lldb-cli', + attach: async () => {}, + detach: async () => {}, + runCommand: async () => '', + resume: async () => {}, + addBreakpoint: async (spec) => ({ id: 1, spec, rawOutput: '' }), + removeBreakpoint: async () => '', + getStack: async () => '', + getVariables: async () => '', + getExecutionState: async () => ({ status: 'stopped', reason: 'breakpoint' }), + dispose: async () => {}, + }; + const manager = new DebuggerManager({ backendFactory: async () => backend }); + const session = await manager.createSession({ simulatorId, pid: 12345 }); + manager.setCurrentSession(session.id); + return manager; +} + async function runWaitForUi( params: Parameters[0], executor: CommandExecutor, @@ -116,16 +138,22 @@ describe('Wait for UI Plugin', () => { expect(result.content[0].text).toContain('textContains waits require text'); }); - it('rejects text on non-textContains predicates instead of ignoring it', async () => { - const result = await callHandler(handler, { - simulatorId, - predicate: 'gone', - role: 'text', - text: 'Loading', - }); + it('allows text on gone waits for loading messages', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + ( + handler as unknown as ( + args: Record, + executor: CommandExecutor, + ) => Promise + )({ simulatorId, predicate: 'gone', text: 'Loading', timeoutMs: 0 }, executor), + ); - expect(result.isError).toBe(true); - expect(result.content[0].text).toContain('text is only supported for textContains waits'); + expect(ctx.structuredOutput?.result.didError).toBe(false); }); it('rejects unknown fields instead of silently broadening wait selectors', async () => { @@ -139,6 +167,32 @@ describe('Wait for UI Plugin', () => { expect(result.isError).toBe(true); expect(result.content[0].text).toContain('Unrecognized key: "selector"'); }); + + it('ignores unrelated project session defaults before strict validation', async () => { + sessionStore.setDefaults({ + simulatorId, + projectPath: '/tmp/App.xcodeproj', + scheme: 'App', + simulatorName: 'iPhone 17 Pro', + simulatorPlatform: 'iOS Simulator', + }); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + ( + handler as unknown as ( + args: Record, + executor: CommandExecutor, + ) => Promise + )({ predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, executor), + ); + + expect(ctx.structuredOutput?.result.didError).toBe(false); + expect(calls[0]?.command.slice(1)).toEqual(['describe-ui', '--udid', simulatorId]); + }); }); it('uses the resolved simulatorId in next-step params', async () => { @@ -330,6 +384,80 @@ describe('Wait for UI Plugin', () => { expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); }); + it('succeeds for selector-free gone when no element contains text', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', text: 'Loading weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + }); + + it('times out for selector-free gone while an element contains text', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Loading weather...' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', text: 'Loading weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + candidates: [expect.objectContaining({ label: 'Loading weather...' })], + }); + }); + + it('succeeds for gone when selector matches remain but none contain text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Loading weather...', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Ready', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', role: 'text', text: 'Searching weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + }); + + it('times out for gone when selector matches contain text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Loading weather...', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Ready', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', role: 'text', text: 'Loading weather', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + candidates: [expect.objectContaining({ label: 'Loading weather...' })], + }); + }); + it('returns TARGET_AMBIGUOUS when focused selector matches multiple elements', async () => { const { executor } = createSequencedExecutor([ { @@ -652,8 +780,9 @@ describe('Wait for UI Plugin', () => { }); }); - it('clears the runtime store when every poll returns unparsable UI', async () => { + it('preserves the runtime store when every poll returns unparsable UI', async () => { recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const previousSnapshot = getRuntimeSnapshot(simulatorId, 0); const { executor } = createSequencedExecutor([{ success: true, output: 'not json' }]); const result = await runWaitForUi( @@ -661,9 +790,64 @@ describe('Wait for UI Plugin', () => { executor, ); + expect(result.didError).toBe(true); + expect(result.uiError).toEqual( + expect.objectContaining({ + code: 'SNAPSHOT_PARSE_FAILED', + recoveryHint: 'Retry after the app is fully launched and responsive.', + }), + ); + expect(getRuntimeSnapshot(simulatorId, 0)).toBe(previousSnapshot); + }); + + it('preserves the runtime store when every poll returns an empty UI payload', async () => { + recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const previousSnapshot = getRuntimeSnapshot(simulatorId, 0); + const { executor } = createSequencedExecutor([{ success: true, output: '[]' }]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'settled', timeoutMs: 0 }, + executor, + ); + expect(result.didError).toBe(true); expect(result.uiError?.code).toBe('SNAPSHOT_PARSE_FAILED'); - expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(getRuntimeSnapshot(simulatorId, 0)).toBe(previousSnapshot); + }); + + it('preserves the runtime store when the debugger guard blocks before polling', async () => { + recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const previousSnapshot = getRuntimeSnapshot(simulatorId, 0); + const stoppedDebugger = await createStoppedDebuggerManager(); + const guardedExecutor: CommandExecutor = async () => { + throw new Error('AXe should not run when debugger guard blocks'); + }; + + try { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + wait_for_uiLogic( + { simulatorId, predicate: 'settled', timeoutMs: 0 }, + guardedExecutor, + createMockAxeHelpers(), + stoppedDebugger, + createTiming().timing, + ), + ); + + const result = ctx.structuredOutput?.result as CaptureResultDomainResult; + expect(result.didError).toBe(true); + expect(result.uiError).toEqual( + expect.objectContaining({ + code: 'ACTION_FAILED', + recoveryHint: + 'Resume execution with debug_continue, remove breakpoints, or detach with debug_detach before retrying UI automation.', + }), + ); + expect(getRuntimeSnapshot(simulatorId, 0)).toBe(previousSnapshot); + } finally { + await stoppedDebugger.disposeAll(); + } }); it('waits until runtime snapshot element signatures remain settled', async () => { diff --git a/src/runtime/__tests__/tool-invoker.test.ts b/src/runtime/__tests__/tool-invoker.test.ts index 3beee0172..2e7c47cfd 100644 --- a/src/runtime/__tests__/tool-invoker.test.ts +++ b/src/runtime/__tests__/tool-invoker.test.ts @@ -704,8 +704,8 @@ describe('DefaultToolInvoker next steps post-processing', () => { const response = await invokeAndFinalize(invoker, 'snapshot-ui', {}, { runtime: 'cli' }); const text = response.content.map((c) => (c.type === 'text' ? c.text : '')).join('\n'); - expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id "123"'); - expect(text).not.toContain('xcodebuildmcp simulator screenshot --simulator-id "123"'); + expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id 123'); + expect(text).not.toContain('xcodebuildmcp simulator screenshot --simulator-id 123'); }); it('injects manifest template next steps from dynamic nextStepParams when response omits nextSteps', async () => { diff --git a/src/utils/__tests__/axe-helpers.test.ts b/src/utils/__tests__/axe-helpers.test.ts index 468a48e45..e2b5465ec 100644 --- a/src/utils/__tests__/axe-helpers.test.ts +++ b/src/utils/__tests__/axe-helpers.test.ts @@ -1,19 +1,39 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { chmodSync, mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { getBundledAxeEnvironment } from '../axe-helpers.ts'; +import { dirname, join } from 'node:path'; +import { getBundledAxeEnvironment, resolveAxeBinary } from '../axe-helpers.ts'; import { resetResourceRootCacheForTests } from '../../core/resource-root.ts'; +import { __resetConfigStoreForTests } from '../config-store.ts'; + +function writeExecutable(path: string): void { + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, ''); + chmodSync(path, 0o755); +} describe('axe-helpers', () => { let originalResourceRoot: string | undefined; let originalDyldFrameworkPath: string | undefined; + let originalAxePath: string | undefined; + let originalLegacyAxePath: string | undefined; + let originalAxeSourcePath: string | undefined; + let originalLegacyAxeSourcePath: string | undefined; let tempDir: string; beforeEach(() => { originalResourceRoot = process.env.XCODEBUILDMCP_RESOURCE_ROOT; originalDyldFrameworkPath = process.env.DYLD_FRAMEWORK_PATH; + originalAxePath = process.env.XCODEBUILDMCP_AXE_PATH; + originalLegacyAxePath = process.env.AXE_PATH; + originalAxeSourcePath = process.env.XCODEBUILDMCP_AXE_SOURCE_PATH; + originalLegacyAxeSourcePath = process.env.AXE_SOURCE_PATH; tempDir = mkdtempSync(join(tmpdir(), 'xbmcp-axe-helpers-')); + delete process.env.XCODEBUILDMCP_AXE_PATH; + delete process.env.AXE_PATH; + delete process.env.XCODEBUILDMCP_AXE_SOURCE_PATH; + delete process.env.AXE_SOURCE_PATH; + __resetConfigStoreForTests(); resetResourceRootCacheForTests(); }); @@ -30,7 +50,32 @@ describe('axe-helpers', () => { process.env.DYLD_FRAMEWORK_PATH = originalDyldFrameworkPath; } + if (originalAxePath === undefined) { + delete process.env.XCODEBUILDMCP_AXE_PATH; + } else { + process.env.XCODEBUILDMCP_AXE_PATH = originalAxePath; + } + + if (originalLegacyAxePath === undefined) { + delete process.env.AXE_PATH; + } else { + process.env.AXE_PATH = originalLegacyAxePath; + } + + if (originalAxeSourcePath === undefined) { + delete process.env.XCODEBUILDMCP_AXE_SOURCE_PATH; + } else { + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = originalAxeSourcePath; + } + + if (originalLegacyAxeSourcePath === undefined) { + delete process.env.AXE_SOURCE_PATH; + } else { + process.env.AXE_SOURCE_PATH = originalLegacyAxeSourcePath; + } + rmSync(tempDir, { recursive: true, force: true }); + __resetConfigStoreForTests(); resetResourceRootCacheForTests(); }); @@ -39,8 +84,7 @@ describe('axe-helpers', () => { const axePath = join(resourceRoot, 'bundled', 'axe'); const frameworksDir = join(resourceRoot, 'bundled', 'Frameworks'); mkdirSync(frameworksDir, { recursive: true }); - writeFileSync(axePath, ''); - chmodSync(axePath, 0o755); + writeExecutable(axePath); process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; delete process.env.DYLD_FRAMEWORK_PATH; @@ -55,8 +99,7 @@ describe('axe-helpers', () => { const axePath = join(resourceRoot, 'bundled', 'axe'); const frameworksDir = join(resourceRoot, 'bundled', 'Frameworks'); mkdirSync(frameworksDir, { recursive: true }); - writeFileSync(axePath, ''); - chmodSync(axePath, 0o755); + writeExecutable(axePath); process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; process.env.DYLD_FRAMEWORK_PATH = '/existing/frameworks'; @@ -65,4 +108,47 @@ describe('axe-helpers', () => { DYLD_FRAMEWORK_PATH: `${frameworksDir}:/existing/frameworks`, }); }); + + it('resolves an explicit AXe source checkout before bundled and PATH fallback', () => { + const sourceRoot = join(tempDir, 'AXe'); + const sourceAxePath = join(sourceRoot, '.build', 'arm64-apple-macosx', 'release', 'axe'); + const resourceRoot = join(tempDir, 'portable-root'); + const bundledAxePath = join(resourceRoot, 'bundled', 'axe'); + writeExecutable(sourceAxePath); + writeExecutable(bundledAxePath); + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = sourceRoot; + process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; + + expect(resolveAxeBinary()).toEqual({ path: sourceAxePath, source: 'source' }); + }); + + it('keeps explicit axePath precedence over axeSourcePath', () => { + const configuredAxePath = join(tempDir, 'configured', 'axe'); + writeExecutable(configuredAxePath); + process.env.XCODEBUILDMCP_AXE_PATH = configuredAxePath; + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = join(tempDir, 'missing-source'); + + expect(resolveAxeBinary()).toEqual({ path: configuredAxePath, source: 'env' }); + }); + + it('preserves existing invalid axePath fallback behavior', () => { + const sourceRoot = join(tempDir, 'AXe'); + const sourceAxePath = join(sourceRoot, '.build', 'arm64-apple-macosx', 'release', 'axe'); + writeExecutable(sourceAxePath); + process.env.XCODEBUILDMCP_AXE_PATH = join(tempDir, 'missing', 'axe'); + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = sourceRoot; + + expect(resolveAxeBinary()).toEqual({ path: sourceAxePath, source: 'source' }); + }); + + it('fails loudly for invalid explicit axeSourcePath instead of falling back', () => { + const resourceRoot = join(tempDir, 'portable-root'); + writeExecutable(join(resourceRoot, 'bundled', 'axe')); + process.env.XCODEBUILDMCP_RESOURCE_ROOT = resourceRoot; + process.env.XCODEBUILDMCP_AXE_SOURCE_PATH = join(tempDir, 'missing-source'); + + expect(() => resolveAxeBinary()).toThrow( + 'Configured axeSourcePath does not exist or is not a directory', + ); + }); }); diff --git a/src/utils/__tests__/config-store.test.ts b/src/utils/__tests__/config-store.test.ts index e0f6307bd..8d953de4c 100644 --- a/src/utils/__tests__/config-store.test.ts +++ b/src/utils/__tests__/config-store.test.ts @@ -56,6 +56,7 @@ describe('config-store', () => { XCODEBUILDMCP_UI_DEBUGGER_GUARD_MODE: 'warn', XCODEBUILDMCP_DEBUGGER_BACKEND: 'lldb', XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list', + XCODEBUILDMCP_AXE_SOURCE_PATH: '/Volumes/Developer/AXe', }; await initConfigStore({ cwd, fs: createFs(), env }); @@ -71,6 +72,7 @@ describe('config-store', () => { expect(config.uiDebuggerGuardMode).toBe('warn'); expect(config.debuggerBackend).toBe('lldb-cli'); expect(config.filePathRenderStyle).toBe('list'); + expect(config.axeSourcePath).toBe('/Volumes/Developer/AXe'); }); it('prefers overrides over config file values and config over env', async () => { @@ -79,18 +81,25 @@ describe('config-store', () => { 'debug: false', 'dapRequestTimeoutMs: 4000', 'filePathRenderStyle: tree', + 'axeSourcePath: /file/AXe', '', ].join('\n'); const env = { XCODEBUILDMCP_DEBUG: 'true', XCODEBUILDMCP_DAP_REQUEST_TIMEOUT_MS: '999', XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list', + XCODEBUILDMCP_AXE_SOURCE_PATH: '/env/AXe', }; await initConfigStore({ cwd, fs: createFs(yaml), - overrides: { debug: true, dapRequestTimeoutMs: 12345, filePathRenderStyle: 'list' }, + overrides: { + debug: true, + dapRequestTimeoutMs: 12345, + filePathRenderStyle: 'list', + axeSourcePath: '/override/AXe', + }, env, }); @@ -98,15 +107,25 @@ describe('config-store', () => { expect(config.debug).toBe(true); expect(config.dapRequestTimeoutMs).toBe(12345); expect(config.filePathRenderStyle).toBe('list'); + expect(config.axeSourcePath).toBe('/override/AXe'); }); - it('uses filePathRenderStyle from config before env when no override is provided', async () => { - const yaml = ['schemaVersion: 1', 'filePathRenderStyle: tree', ''].join('\n'); - const env = { XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list' }; + it('uses file config before env when no override is provided', async () => { + const yaml = [ + 'schemaVersion: 1', + 'filePathRenderStyle: tree', + 'axeSourcePath: /file/AXe', + '', + ].join('\n'); + const env = { + XCODEBUILDMCP_FILE_PATH_RENDER_STYLE: 'list', + XCODEBUILDMCP_AXE_SOURCE_PATH: '/env/AXe', + }; await initConfigStore({ cwd, fs: createFs(yaml), env }); expect(getConfig().filePathRenderStyle).toBe('tree'); + expect(getConfig().axeSourcePath).toBe('/file/AXe'); }); it('reads sentryDisabled from config file', async () => { diff --git a/src/utils/__tests__/project-config.test.ts b/src/utils/__tests__/project-config.test.ts index c72483cd6..130d8724e 100644 --- a/src/utils/__tests__/project-config.test.ts +++ b/src/utils/__tests__/project-config.test.ts @@ -68,6 +68,7 @@ describe('project-config', () => { ' - SCREENSHOT', 'debug: true', 'axePath: "./bin/axe"', + 'axeSourcePath: "../AXe"', 'sessionDefaults:', ' projectPath: "./App.xcodeproj"', ' workspacePath: "./App.xcworkspace"', @@ -89,6 +90,7 @@ describe('project-config', () => { }); expect(result.config.debug).toBe(true); expect(result.config.axePath).toBe(path.join(cwd, 'bin', 'axe')); + expect(result.config.axeSourcePath).toBe(path.join(cwd, '..', 'AXe')); expect(defaults.workspacePath).toBe(path.join(cwd, 'App.xcworkspace')); expect(defaults.projectPath).toBeUndefined(); expect(defaults.simulatorId).toBe('SIM-1'); @@ -154,6 +156,7 @@ describe('project-config', () => { const yaml = [ 'schemaVersion: 1', 'axePath: "file:///repo/bin/axe"', + 'axeSourcePath: "file:///repo/AXe"', 'sessionDefaults:', ' workspacePath: "file:///repo/App.xcworkspace"', ' derivedDataPath: "file:///repo/.derivedData"', @@ -166,6 +169,7 @@ describe('project-config', () => { if (!result.found) throw new Error('expected config to be found'); expect(result.config.axePath).toBe('/repo/bin/axe'); + expect(result.config.axeSourcePath).toBe('/repo/AXe'); const defaults = result.config.sessionDefaults ?? {}; expect(defaults.workspacePath).toBe('/repo/App.xcworkspace'); expect(defaults.derivedDataPath).toBe('/repo/.derivedData'); @@ -193,6 +197,7 @@ describe('project-config', () => { const yaml = [ 'schemaVersion: 1', 'axePath: "~/tools/axe"', + 'axeSourcePath: "~/Code/AXe"', 'iosTemplatePath: "~/templates/ios"', '', ].join('\n'); @@ -202,6 +207,7 @@ describe('project-config', () => { if (!result.found) throw new Error('expected config to be found'); expect(result.config.axePath).toBe(path.join(homedir(), 'tools/axe')); + expect(result.config.axeSourcePath).toBe(path.join(homedir(), 'Code/AXe')); expect(result.config.iosTemplatePath).toBe(path.join(homedir(), 'templates/ios')); }); diff --git a/src/utils/__tests__/session-aware-tool-factory.test.ts b/src/utils/__tests__/session-aware-tool-factory.test.ts index b28f87086..0e89f941c 100644 --- a/src/utils/__tests__/session-aware-tool-factory.test.ts +++ b/src/utils/__tests__/session-aware-tool-factory.test.ts @@ -376,6 +376,126 @@ describe('createSessionAwareTool', () => { expect(parsed).toEqual({ API_KEY: 'abc123', DEBUG: 'true', VERBOSE: '0' }); }); + it('only merges session defaults that exist in the schema before strict validation', async () => { + const strictSchema = z.strictObject({ + bundleId: z.string(), + }); + + const strictHandler = createSessionAwareTool>({ + internalSchema: strictSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['bundleId'] }], + }); + + sessionStore.setDefaults({ + scheme: 'App', + projectPath: '/a.xcodeproj', + simulatorId: 'SIM-123', + bundleId: 'com.example.app', + }); + + const result = await invokeAndCollect(strictHandler, {}); + expect(result.isError).toBe(false); + + const parsed = JSON.parse(result.text.replace(/\n/g, '').replace(/^.*?(\{.*\}).*$/, '$1')); + expect(parsed).toEqual({ bundleId: 'com.example.app' }); + }); + + it('uses filtered session defaults to satisfy required fields on strict schemas', async () => { + const strictSchema = z.strictObject({ + scheme: z.string(), + projectPath: z.string(), + }); + + const strictHandler = createSessionAwareTool>({ + internalSchema: strictSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['scheme', 'projectPath'] }], + }); + + sessionStore.setDefaults({ + scheme: 'App', + projectPath: '/a.xcodeproj', + simulatorId: 'SIM-123', + }); + + const result = await invokeAndCollect(strictHandler, {}); + expect(result.isError).toBe(false); + + const parsed = JSON.parse(result.text.replace(/\n/g, '').replace(/^.*?(\{.*\}).*$/, '$1')); + expect(parsed).toEqual({ scheme: 'App', projectPath: '/a.xcodeproj' }); + }); + + it('rejects explicit unknown args on strict schemas after filtering session defaults', async () => { + const strictSchema = z.strictObject({ + bundleId: z.string(), + }); + + const strictHandler = createSessionAwareTool>({ + internalSchema: strictSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['bundleId'] }], + }); + + sessionStore.setDefaults({ + bundleId: 'com.example.app', + simulatorId: 'SIM-123', + }); + + const result = await invokeAndCollect(strictHandler, { simulatorName: 'iPhone 17' }); + expect(result.isError).toBe(true); + expect(result.text).toContain('Parameter validation failed'); + expect(result.text).toContain('simulatorName'); + }); + + it('applies refinements after filtering unrelated session defaults', async () => { + const refinedSchema = z + .strictObject({ + scheme: z.string(), + projectPath: z.string().optional(), + workspacePath: z.string().optional(), + }) + .refine((params) => !!params.projectPath !== !!params.workspacePath, { + message: 'provide exactly one projectPath or workspacePath', + path: ['projectPath'], + }); + + const refinedHandler = createSessionAwareTool>({ + internalSchema: refinedSchema, + logicFunction: async (params) => { + const ctx = getHandlerContext(); + ctx.emit(statusFragment('success', JSON.stringify(params))); + }, + getExecutor: () => createMockExecutor({ success: true }), + requirements: [{ allOf: ['scheme'] }], + }); + + sessionStore.setDefaults({ + scheme: 'App', + projectPath: '/a.xcodeproj', + workspacePath: '/a.xcworkspace', + simulatorId: 'SIM-123', + }); + + const result = await invokeAndCollect(refinedHandler, {}); + expect(result.isError).toBe(true); + expect(result.text).toContain('Parameter validation failed'); + expect(result.text).toContain('provide exactly one projectPath or workspacePath'); + expect(result.text).not.toContain('simulatorId'); + }); + it('rejects array passed as env instead of deep-merging it', async () => { const envSchema = z.object({ scheme: z.string(), diff --git a/src/utils/__tests__/structured-output-envelope.test.ts b/src/utils/__tests__/structured-output-envelope.test.ts index c440a6587..b378bc590 100644 --- a/src/utils/__tests__/structured-output-envelope.test.ts +++ b/src/utils/__tests__/structured-output-envelope.test.ts @@ -95,6 +95,14 @@ describe('toStructuredEnvelope', () => { frame: { x: 12, y: 81, width: 178, height: 33 }, actions: ['tap', 'longPress', 'touch'], }, + { + ref: 'e3', + role: 'text', + label: '10.7 mm', + frame: { x: 24, y: 140, width: 80, height: 24 }, + state: { visible: true }, + actions: ['longPress', 'touch'], + }, ], actions: [ { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, @@ -116,9 +124,10 @@ describe('toStructuredEnvelope', () => { rs: '1', screenHash: 'screen-one', seq: 1, - count: 2, + count: 3, targets: ['e2|tap|button|San Francisco||weather.locationButton'], scroll: ['e1|swipe|application|Weather||'], + text: ['e3|text|text|10.7 mm||'], udid: 'SIMULATOR-1', }, waitMatch: { diff --git a/src/utils/responses/__tests__/next-steps-renderer.test.ts b/src/utils/responses/__tests__/next-steps-renderer.test.ts index 4903fb0b0..35b2a255a 100644 --- a/src/utils/responses/__tests__/next-steps-renderer.test.ts +++ b/src/utils/responses/__tests__/next-steps-renderer.test.ts @@ -130,6 +130,27 @@ describe('next-steps-renderer', () => { ); }); + it('should format complex CLI params through json', () => { + const step: NextStep = { + tool: 'batch', + cliTool: 'batch', + workflow: 'ui-automation', + label: 'Batch same-screen taps', + params: { + simulatorId: 'ABC123', + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + }; + + const result = renderNextStep(step, 'cli'); + expect(result).toBe( + 'Batch same-screen taps: xcodebuildmcp ui-automation batch --json \'{"simulatorId":"ABC123","steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}\'', + ); + }); + it('should format step for MCP with no params', () => { const step: NextStep = { tool: 'open_sim', @@ -175,6 +196,25 @@ describe('next-steps-renderer', () => { expect(result).toBe('Do something: some_tool({ verbose: true })'); }); + it('should format complex MCP params as JSON instead of object string coercions', () => { + const step: NextStep = { + tool: 'batch', + label: 'Batch same-screen taps', + params: { + simulatorId: 'ABC123', + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + }; + + const result = renderNextStep(step, 'mcp'); + expect(result).toBe( + 'Batch same-screen taps: batch({ simulatorId: "ABC123", steps: [{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}] })', + ); + }); + it('should handle daemon runtime same as MCP', () => { const step: NextStep = { tool: 'open_sim', From 15edeec9920354295b536bdfb8675db06899c5b5 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Mon, 18 May 2026 23:11:01 +0100 Subject: [PATCH 04/35] docs: Update UI automation changelog Record the runtime snapshot and next-step guidance improvements for the upcoming release. Co-Authored-By: Codex --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66091b316..d16353709 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged. - Added `batch` for executing multiple AXe UI automation steps in one simulator session. - Added `wait_for_ui` for polling rs/1 runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. +- Added structured rs/1 element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions. Validated the flow with a source-built AXe binary and the full Weather/Safari Claude Code task, with no raw-string batch attempts. ### Fixed @@ -52,6 +53,10 @@ - Fixed runtime snapshot tips so compact output names all target-ref action tools, including `long_press` and `touch`. - Clarified key press and key sequence tool descriptions so agents know key codes are AXe/macOS virtual key codes and should prefer `type_text` for text entry. - Clarified `wait_for_ui` timeout recovery hints so agents know selector fields match exact values and should use `textContains` for partial visible text. +- Fixed UI action success next steps so agents are prompted to refresh runtime snapshots before reusing element refs after actions such as swipes. +- Fixed `snapshot_ui` next-step guidance so state-changing controls such as segmented units and switches remain available in targets without being promoted as generic tap or batch suggestions. +- Fixed `snapshot_ui` tap next-step priority so content-rich cards are suggested before navigation controls like Settings. +- Fixed successful UI action results so they include a fresh runtime snapshot and actionable next steps, reducing follow-up refresh calls after taps, typing, swipes, and batches. ## [2.5.2] @@ -689,4 +694,3 @@ Please note that the UI automation features are an early preview and currently i - Initial release of XcodeBuildMCP - Basic support for building iOS and macOS applications - From 3a771959958ac95e75bd74eec62e936ca31c7b7f Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Mon, 18 May 2026 23:42:28 +0100 Subject: [PATCH 05/35] fix(ui-automation): Align structured output schemas Allow existing coordinate-based UI action payloads and UI hierarchy captures in the v2 schemas. Avoid publishing wait-for-ui next-step params when the wait fails, and remove an unused simulator test import. Co-Authored-By: Codex --- .../2.schema.json | 16 ++++ .../2.schema.json | 77 +++++++++++++++++++ .../__tests__/launch_app_sim.test.ts | 5 +- .../__tests__/wait_for_ui.test.ts | 20 +++++ src/mcp/tools/ui-automation/wait_for_ui.ts | 10 ++- 5 files changed, 120 insertions(+), 8 deletions(-) diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json index cbb6ff18f..3a1ca36a5 100644 --- a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -20,6 +20,21 @@ }, "required": ["x", "y", "width", "height"] }, + "uiHierarchyNode": { + "type": "object" + }, + "uiHierarchyCapture": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "ui-hierarchy" }, + "uiHierarchy": { + "type": "array", + "items": { "$ref": "#/$defs/uiHierarchyNode" } + } + }, + "required": ["type", "uiHierarchy"] + }, "runtimeActionName": { "enum": ["tap", "typeText", "longPress", "touch", "swipeWithin"] }, @@ -257,6 +272,7 @@ }, "required": ["format", "width", "height"] }, + { "$ref": "#/$defs/uiHierarchyCapture" }, { "$ref": "#/$defs/runtimeSnapshot" }, { "$ref": "#/$defs/compactRuntimeSnapshot" }, { "$ref": "#/$defs/videoRecordingCapture" }, diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json index 4d6b09e5f..286c15c9e 100644 --- a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -95,6 +95,15 @@ }, "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] }, + "point": { + "type": "object", + "additionalProperties": false, + "properties": { + "x": { "type": "number" }, + "y": { "type": "number" } + }, + "required": ["x", "y"] + }, "direction": { "enum": ["up", "down", "left", "right"] }, @@ -154,6 +163,16 @@ }, "required": ["type", "elementRef"] }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "tap" }, + "x": { "type": "number" }, + "y": { "type": "number" } + }, + "required": ["type", "x", "y"] + }, { "type": "object", "additionalProperties": false, @@ -165,6 +184,25 @@ }, "required": ["type", "withinElementRef", "direction"] }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "swipe" }, + "from": { "$ref": "#/$defs/point" }, + "to": { "$ref": "#/$defs/point" }, + "durationSeconds": { "type": "number", "minimum": 0 } + }, + "required": ["type", "from", "to"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "swipe" } + }, + "required": ["type"] + }, { "type": "object", "additionalProperties": false, @@ -175,6 +213,25 @@ }, "required": ["type", "elementRef"] }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "touch" }, + "event": { "type": "string" }, + "x": { "type": "number" }, + "y": { "type": "number" } + }, + "required": ["type", "x", "y"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "touch" } + }, + "required": ["type"] + }, { "type": "object", "additionalProperties": false, @@ -185,6 +242,17 @@ }, "required": ["type", "elementRef", "durationMs"] }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "long-press" }, + "x": { "type": "number" }, + "y": { "type": "number" }, + "durationMs": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "x", "y", "durationMs"] + }, { "type": "object", "additionalProperties": false, @@ -213,6 +281,15 @@ }, "required": ["type", "elementRef"] }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "type-text" }, + "textLength": { "type": "integer", "minimum": 0 } + }, + "required": ["type"] + }, { "type": "object", "additionalProperties": false, diff --git a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts index 73bbe4b65..a7633257f 100644 --- a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts @@ -1,9 +1,6 @@ import { describe, it, expect, beforeEach } from 'vitest'; import * as z from 'zod'; -import { - createMockCommandResponse, - createMockExecutor, -} from '../../../../test-utils/mock-executors.ts'; +import { createMockCommandResponse } from '../../../../test-utils/mock-executors.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; import { schema, handler, launch_app_simLogic, type SimulatorLauncher } from '../launch_app_sim.ts'; import type { LaunchWithLoggingResult } from '../../../../utils/simulator-steps.ts'; diff --git a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts index 17be123c0..cdf474769 100644 --- a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts @@ -217,6 +217,26 @@ describe('Wait for UI Plugin', () => { }); }); + it('does not suggest follow-up steps when the wait fails', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Loading' })]) }, + ]); + const { result, ctx, run } = createMockToolHandlerContext(); + + await run(() => + wait_for_uiLogic( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + createMockAxeHelpers(), + undefined, + createTiming().timing, + ), + ); + + expect(ctx.structuredOutput?.result.didError).toBe(true); + expect(result.nextStepParams).toBeUndefined(); + }); + it('converts elementRef to identifier before polling', async () => { recordSnapshot([createNode({ AXUniqueId: 'continue-button', AXLabel: 'Continue' })], 0); const { calls, executor } = createSequencedExecutor([ diff --git a/src/mcp/tools/ui-automation/wait_for_ui.ts b/src/mcp/tools/ui-automation/wait_for_ui.ts index ae2e2c6df..5b0160f51 100644 --- a/src/mcp/tools/ui-automation/wait_for_ui.ts +++ b/src/mcp/tools/ui-automation/wait_for_ui.ts @@ -360,10 +360,12 @@ export async function wait_for_uiLogic( setCaptureStructuredOutput(ctx, result, { headerTitle: 'Wait for UI' }); - ctx.nextStepParams = { - snapshot_ui: { simulatorId: params.simulatorId }, - wait_for_ui: { simulatorId: params.simulatorId, predicate: 'settled' }, - }; + if (!result.didError) { + ctx.nextStepParams = { + snapshot_ui: { simulatorId: params.simulatorId }, + wait_for_ui: { simulatorId: params.simulatorId, predicate: 'settled' }, + }; + } } const publicSchemaObject = z.strictObject( From fb0cded3c450ef89c92514ac1056459fa196aae6 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 00:05:34 +0100 Subject: [PATCH 06/35] fix(ui-automation): Remove static wait next steps Remove the wait_for_ui manifest nextSteps block so the tool relies on dynamic next-step params instead of leaking SIMULATOR_UUID placeholders. Co-Authored-By: Codex --- manifests/tools/wait_for_ui.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/manifests/tools/wait_for_ui.yaml b/manifests/tools/wait_for_ui.yaml index f4d29b19f..543288129 100644 --- a/manifests/tools/wait_for_ui.yaml +++ b/manifests/tools/wait_for_ui.yaml @@ -9,18 +9,6 @@ outputSchema: version: '2' routing: stateful: true -nextSteps: - - label: Refresh runtime snapshot - toolId: snapshot_ui - params: - simulatorId: SIMULATOR_UUID - when: success - - label: Wait again - toolId: wait_for_ui - params: - simulatorId: SIMULATOR_UUID - predicate: settled - when: success annotations: title: Wait for UI readOnlyHint: true From 8206d7b2c23a8309af727108c6e848d07eb396b4 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 00:10:11 +0100 Subject: [PATCH 07/35] fix(schema): Preserve video fps constraints Restore the capture-result schema fps constraint to a positive integer while keeping the UI hierarchy capture additions intact. Co-Authored-By: Codex --- .../xcodebuildmcp.output.capture-result/2.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json index 3a1ca36a5..654589ee6 100644 --- a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -183,7 +183,7 @@ "properties": { "type": { "const": "video-recording" }, "state": { "enum": ["started", "stopped"] }, - "fps": { "type": "number" }, + "fps": { "type": "integer", "minimum": 1 }, "outputFile": { "type": "string" }, "sessionId": { "type": "string" } }, From 0eead99f3616c5deb16dcdf0c6ec863ba3142248 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 00:36:10 +0100 Subject: [PATCH 08/35] fix(cli): Reuse output style for next-step rendering Remove the redundant argv.style alias and use the normalized outputStyle value when deciding whether to include CLI next steps. Co-Authored-By: Codex --- src/cli/register-tool-commands.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cli/register-tool-commands.ts b/src/cli/register-tool-commands.ts index 16fe678f5..fa5fde6c4 100644 --- a/src/cli/register-tool-commands.ts +++ b/src/cli/register-tool-commands.ts @@ -298,7 +298,6 @@ function registerToolSubcommand( const outputStyle: OutputStyle = argv.style === 'minimal' ? 'minimal' : 'normal'; const socketPath = argv.socket as string; const logLevel = argv['log-level'] as string | undefined; - const style = argv.style as string | undefined; const filePathRenderStyle = argv.filePathRenderStyle as FilePathRenderStyle | undefined; const verboseOutput = argv.verbose === true; @@ -409,7 +408,7 @@ function registerToolSubcommand( runtime: 'cli', outputStyle, filePathRenderStyle, - includeNextSteps: style !== 'minimal', + includeNextSteps: outputStyle !== 'minimal', }); const writeJsonlFragment = outputFormat === 'jsonl' From 16c363a59c354c08acc2faf99606cd1a5dfb518b Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 01:15:31 +0100 Subject: [PATCH 09/35] fix(ui-automation): Resolve review feedback Route key_press and key_sequence through the daemon in CLI mode so they match the other stateful UI automation tools. Keep successful UI actions successful when the post-action snapshot refresh fails, and surface the refresh problem as a recoverable snapshot diagnostic. Also tighten runtime next-step ranking, role classification, and snapshot fixture coverage for the elementRef-based UI automation contract. Co-Authored-By: Codex --- manifests/tools/key_press.yaml | 4 +- manifests/tools/key_sequence.yaml | 4 +- .../tools/ui-automation/__tests__/tap.test.ts | 22 ++++++++ src/mcp/tools/ui-automation/batch.ts | 23 +++++--- .../shared/post-action-snapshot.ts | 40 +++++++++++++- .../shared/runtime-next-steps.ts | 31 +++++++---- .../ui-automation/shared/runtime-snapshot.ts | 2 +- src/mcp/tools/ui-automation/swipe.ts | 23 +++++--- src/mcp/tools/ui-automation/tap.ts | 23 +++++--- src/mcp/tools/ui-automation/type_text.ts | 23 +++++--- .../ui-automation/key-press--success.json | 5 +- .../ui-automation/key-sequence--success.json | 5 +- .../swipe--error-no-simulator.json | 17 +++--- .../swipe--error-not-actionable.json | 27 ++++++++++ .../type-text--error-no-simulator.json | 17 +++--- .../type-text--error-not-actionable.json | 27 ++++++++++ .../ui-automation/type-text--success.json | 17 ------ .../ui-automation/wait-for-ui--success.json | 53 +++++++++++++++++++ .../__tests__/json-normalize.test.ts | 8 +-- .../suites/ui-automation-suite.ts | 1 + 20 files changed, 285 insertions(+), 87 deletions(-) create mode 100644 src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-not-actionable.json create mode 100644 src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-not-actionable.json delete mode 100644 src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--success.json create mode 100644 src/snapshot-tests/__fixtures__/cli/json/ui-automation/wait-for-ui--success.json diff --git a/manifests/tools/key_press.yaml b/manifests/tools/key_press.yaml index b282b3fb4..56336773e 100644 --- a/manifests/tools/key_press.yaml +++ b/manifests/tools/key_press.yaml @@ -6,7 +6,9 @@ names: description: Press one hardware key using an AXe HID key code. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "2" + version: '2' +routing: + stateful: true annotations: title: Key Press readOnlyHint: true diff --git a/manifests/tools/key_sequence.yaml b/manifests/tools/key_sequence.yaml index 9b2cb8bf3..8550b6396 100644 --- a/manifests/tools/key_sequence.yaml +++ b/manifests/tools/key_sequence.yaml @@ -6,7 +6,9 @@ names: description: Press hardware keys using AXe HID key codes. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "2" + version: '2' +routing: + stateful: true annotations: title: Key Sequence readOnlyHint: true diff --git a/src/mcp/tools/ui-automation/__tests__/tap.test.ts b/src/mcp/tools/ui-automation/__tests__/tap.test.ts index 34a3c6c5b..e118c4a06 100644 --- a/src/mcp/tools/ui-automation/__tests__/tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/tap.test.ts @@ -95,6 +95,28 @@ describe('Tap Plugin', () => { expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); + it('reports post-action snapshot parse failures without failing the tap action', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: 'tap succeeded' }, + { success: true, output: 'not json' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(result.uiError).toMatchObject({ + code: 'SNAPSHOT_PARSE_FAILED', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.diagnostics?.warnings?.[0]?.message).toContain( + 'UI action succeeded, but the refreshed runtime snapshot could not be parsed.', + ); + expect(result.capture).toBeUndefined(); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(actionCommands(calls)).toHaveLength(1); + }); + it('includes element type when tapping a referenced element with a shared identifier', async () => { recordSnapshot([ createNode({ diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts index d6c84fea8..ad28e6ef3 100644 --- a/src/mcp/tools/ui-automation/batch.ts +++ b/src/mcp/tools/ui-automation/batch.ts @@ -14,7 +14,7 @@ import { import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; import { createSemanticTapBatchSteps, createSemanticTapCommand } from './shared/semantic-tap.ts'; -import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -197,13 +197,7 @@ export function createBatchExecutor( if (!resolvedSteps.preserveSnapshot) { clearRuntimeSnapshot(simulatorId); } - const capture = await captureRuntimeSnapshotAfterAction({ - simulatorId, - executor, - axeHelpers, - }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); @@ -214,6 +208,21 @@ export function createBatchExecutor( log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts index 7abd643cb..8ab5b0dc9 100644 --- a/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts @@ -1,9 +1,13 @@ import type { CapturePayload } from '../../../../types/domain-results.ts'; +import type { UiAutomationRecoverableError } from '../../../../types/ui-snapshot.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { executeAxeCommand } from './axe-command.ts'; import type { AxeHelpers } from './axe-command.ts'; -import { parseRuntimeSnapshotResponse } from './runtime-snapshot.ts'; -import { recordRuntimeSnapshot } from './snapshot-ui-state.ts'; +import { RuntimeSnapshotParseError, parseRuntimeSnapshotResponse } from './runtime-snapshot.ts'; +import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './snapshot-ui-state.ts'; + +const POST_ACTION_SNAPSHOT_RECOVERY_HINT = + 'Run snapshot_ui again before reusing elementRefs from the previous snapshot.'; export async function captureRuntimeSnapshotAfterAction(params: { simulatorId: string; @@ -24,3 +28,35 @@ export async function captureRuntimeSnapshotAfterAction(params: { recordRuntimeSnapshot(snapshot); return snapshot.payload; } + +export async function captureRuntimeSnapshotAfterActionSafely(params: { + simulatorId: string; + executor: CommandExecutor; + axeHelpers: AxeHelpers; +}): Promise< + | { capture: CapturePayload; warning?: never; uiError?: never } + | { capture?: never; warning: string; uiError: UiAutomationRecoverableError } +> { + try { + return { + capture: await captureRuntimeSnapshotAfterAction(params), + }; + } catch (error) { + clearRuntimeSnapshot(params.simulatorId); + + const isParseFailure = error instanceof RuntimeSnapshotParseError; + const message = isParseFailure + ? 'UI action succeeded, but the refreshed runtime snapshot could not be parsed.' + : 'UI action succeeded, but the refreshed runtime snapshot could not be captured.'; + const detail = error instanceof Error ? error.message : String(error); + + return { + warning: `${message} ${POST_ACTION_SNAPSHOT_RECOVERY_HINT}`, + uiError: { + code: isParseFailure ? 'SNAPSHOT_PARSE_FAILED' : 'ACTION_FAILED', + message: `${message} ${detail}`, + recoveryHint: POST_ACTION_SNAPSHOT_RECOVERY_HINT, + }, + }; + } +} diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index 2c4632747..47ed634aa 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -234,19 +234,21 @@ function findActiveForegroundRoot( ): RuntimeSnapshotElementRecord | null { const records = [...recordsByRef.values()]; const indexByRef = new Map(records.map((record, index) => [record.publicElement.ref, index])); - const descendantsByRoot = new Map( - records.map((root) => [ - root, - records.filter((candidate) => isForegroundCandidateForRoot(root, candidate)), - ]), - ); + const scoreByRef = new Map(); function foregroundScore(record: RuntimeSnapshotElementRecord): number { + const cachedScore = scoreByRef.get(record.publicElement.ref); + if (cachedScore !== undefined) { + return cachedScore; + } if (!isScrollableNextStepElement(record.publicElement)) { + scoreByRef.set(record.publicElement.ref, 0); return 0; } - const descendants = descendantsByRoot.get(record) ?? []; + const descendants = records.filter((candidate) => + isForegroundCandidateForRoot(record, candidate), + ); const hasDismissControl = descendants.some((candidate) => FOREGROUND_DISMISS_TAP_NEXT_STEP_LABELS.has( compactTapNextStepText(candidate.publicElement.label).toLowerCase(), @@ -260,16 +262,18 @@ function findActiveForegroundRoot( ); if (!hasDismissControl && !hasTextEntry && !hasStateControls) { + scoreByRef.set(record.publicElement.ref, 0); return 0; } - return ( + const score = (hasDismissControl ? 100 : 0) + (hasTextEntry ? 60 : 0) + (hasStateControls ? 30 : 0) + record.metadata.depth + - (indexByRef.get(record.publicElement.ref) ?? 0) / 1000 - ); + (indexByRef.get(record.publicElement.ref) ?? 0) / 1000; + scoreByRef.set(record.publicElement.ref, score); + return score; } return records.reduce((best, candidate) => { @@ -346,7 +350,12 @@ export function createRuntimeSnapshotNextSteps(params: { }) .map(({ element }) => element); const tapElement = tapElements[0] ?? null; - const batchElements: typeof tapElements = []; + const batchElements = tapElements.filter( + (element) => + !isContentRichTapNextStepElement(element) && + !isScreenChangingTapNextStepElement(element) && + !isLowPriorityTapNextStepElement(element.label), + ); const scrollElement = nextStepElements.find(isScrollableNextStepElement) ?? null; const scrollNextStep: NextStep | null = scrollElement ? { diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index f7df3cba5..5e81afcba 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -120,10 +120,10 @@ function deriveRole(node: AccessibilityNode): RuntimeElementRoleV1 | undefined { if (/image/.test(roleText)) return 'image'; if (/switch|checkbox|check box/.test(roleText)) return 'switch'; if (/slider/.test(roleText)) return 'slider'; - if (/tab/.test(roleText)) return 'tab'; if (/cell|row/.test(roleText)) return 'cell'; if (/scroll/.test(roleText)) return 'scroll-view'; if (/table|list|outline|collection/.test(roleText)) return 'list'; + if (/(^|\b|ax)tab(\b|group|$)/.test(roleText)) return 'tab'; if (/menu/.test(roleText)) return 'menu'; return 'other'; } diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index 35648c255..b964d9bd8 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -20,7 +20,7 @@ import { import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; import { getRuntimeElementSwipePoints } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; -import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -140,13 +140,7 @@ export function createSwipeExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'swipe', executor, axeHelpers); clearRuntimeSnapshot(simulatorId); - const capture = await captureRuntimeSnapshotAfterAction({ - simulatorId, - executor, - axeHelpers, - }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); @@ -165,6 +159,21 @@ export function createSwipeExecutor( }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index ba023f2c6..8d527c198 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -17,7 +17,7 @@ import { createSemanticTapCommand, executeSemanticTapWithAmbiguityFallback, } from './shared/semantic-tap.ts'; -import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -120,13 +120,7 @@ export function createTapExecutor( if (usesTouchActivation && postDelay !== undefined) { await delayMs(postDelay * 1000); } - const capture = await captureRuntimeSnapshotAfterAction({ - simulatorId, - executor, - axeHelpers, - }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); @@ -144,6 +138,21 @@ export function createTapExecutor( }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/mcp/tools/ui-automation/type_text.ts b/src/mcp/tools/ui-automation/type_text.ts index 5a46384f9..c6f54b3d7 100644 --- a/src/mcp/tools/ui-automation/type_text.ts +++ b/src/mcp/tools/ui-automation/type_text.ts @@ -23,7 +23,7 @@ import { createSemanticTapCommand, executeSemanticTapWithAmbiguityFallback, } from './shared/semantic-tap.ts'; -import { captureRuntimeSnapshotAfterAction } from './shared/post-action-snapshot.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -154,13 +154,7 @@ export function createTypeTextExecutor( } await executeAxeCommand(typeCommandArgs, simulatorId, 'type', executor, axeHelpers); clearRuntimeSnapshot(simulatorId); - const capture = await captureRuntimeSnapshotAfterAction({ - simulatorId, - executor, - axeHelpers, - }); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText], { capture }); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); @@ -177,6 +171,21 @@ export function createTypeTextExecutor( }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json index bbfc05f0e..4b8c807a1 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-press--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id " + ] } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json index 4ef507f23..f17368243 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/key-sequence--success.json @@ -18,5 +18,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id " + ] } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json index 6968362b0..29c92f1c9 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate swipe.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "swipe" + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-not-actionable.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-not-actionable.json new file mode 100644 index 000000000..d7d3c80b9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/swipe--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'swipeWithin'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'swipeWithin'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json index 30d9ab14d..ffb164bb3 100644 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-no-simulator.json @@ -2,24 +2,23 @@ "schema": "xcodebuildmcp.output.ui-action-result", "schemaVersion": "2", "didError": true, - "error": "Failed to simulate text typing.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "type-text" + "type": "type-text", + "elementRef": "e3", + "textLength": 5 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-not-actionable.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-not-actionable.json new file mode 100644 index 000000000..e403f06f9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'typeText'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "type-text", + "elementRef": "e3", + "textLength": 5 + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'typeText'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--success.json deleted file mode 100644 index a2686f68c..000000000 --- a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/type-text--success.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "2", - "didError": false, - "error": null, - "data": { - "summary": { - "status": "SUCCEEDED" - }, - "action": { - "type": "type-text" - }, - "artifacts": { - "simulatorId": "" - } - } -} diff --git a/src/snapshot-tests/__fixtures__/cli/json/ui-automation/wait-for-ui--success.json b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/wait-for-ui--success.json new file mode 100644 index 000000000..b677ec15a --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/json/ui-automation/wait-for-ui--success.json @@ -0,0 +1,53 @@ +{ + "schema": "xcodebuildmcp.output.capture-result", + "schemaVersion": "2", + "didError": false, + "error": null, + "data": { + "summary": { + "status": "SUCCEEDED" + }, + "artifacts": { + "simulatorId": "" + }, + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", + "e21|tap|button|=||" + ], + "scroll": [], + "text": [ + "e2|text|text|0||" + ], + "udid": "" + }, + "waitMatch": { + "predicate": "exists", + "matches": [ + "e3|tap|button|C||" + ] + } + } +} diff --git a/src/snapshot-tests/__tests__/json-normalize.test.ts b/src/snapshot-tests/__tests__/json-normalize.test.ts index ce826a230..629a5a2b8 100644 --- a/src/snapshot-tests/__tests__/json-normalize.test.ts +++ b/src/snapshot-tests/__tests__/json-normalize.test.ts @@ -26,11 +26,7 @@ describe('normalizeStructuredEnvelope', () => { error: 'Tests failed', data: { summary: { target: 'simulator' }, - testCases: [ - { test: 'Swift Testing failure', status: 'failed', durationMs: 0 }, - { test: 'Volatile Swift Testing pass', status: 'passed', durationMs: 0 }, - { suite: 'XCTestSuite', test: 'testStablePass', status: 'passed', durationMs: 0 }, - ], + testCases: [{ test: 'Swift Testing failure', status: 'failed', durationMs: 0 }], }, }); }); @@ -156,7 +152,7 @@ describe('normalizeStructuredEnvelope', () => { error: null, data: { entries: [ - { key: 'SDKROOT', value: 'iphoneos' }, + { key: 'SDKROOT', value: '' }, { key: 'PATH', value: '' }, ], }, diff --git a/src/snapshot-tests/suites/ui-automation-suite.ts b/src/snapshot-tests/suites/ui-automation-suite.ts index ee2ebaa3c..d2acb9064 100644 --- a/src/snapshot-tests/suites/ui-automation-suite.ts +++ b/src/snapshot-tests/suites/ui-automation-suite.ts @@ -229,6 +229,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('type-text', () => { it('error - target not actionable', async () => { + snapshotCaptured = false; await refreshRuntimeSnapshot(); const { text, isError } = await harness.invoke('ui-automation', 'type-text', { From 89358faf7c137aa15942b62a6eaf4bc059bde09d Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 01:34:46 +0100 Subject: [PATCH 10/35] fix(ui-automation): Tighten runtime snapshot review issues Keep next-step guidance available for unchanged runtime snapshots, cap compact runtime snapshot rows, and simplify runtime snapshot rendering branches called out by review. Add direct semantic tap helper coverage for selector choice, duplicate selector fallback, switch touch batching, and recoverable AXe fallback. Co-Authored-By: Codex --- .../__tests__/semantic-tap.test.ts | 138 ++++++++++++++++++ .../__tests__/snapshot_ui.test.ts | 24 ++- src/mcp/tools/ui-automation/snapshot_ui.ts | 29 ++-- .../structured-output-envelope.test.ts | 52 +++++++ src/utils/renderers/domain-result-text.ts | 45 +++--- src/utils/structured-output-envelope.ts | 40 +++-- 6 files changed, 279 insertions(+), 49 deletions(-) create mode 100644 src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts diff --git a/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts b/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts new file mode 100644 index 000000000..47b57ee39 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, it } from 'vitest'; +import { mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { + createSemanticTapBatchSteps, + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, + isRecoverableAxeSelectorError, +} from '../shared/semantic-tap.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, + simulatorId, +} from './ui-action-test-helpers.ts'; + +function createElements(nodes = [createNode()]) { + return createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: 1_000 }).elements; +} + +describe('semantic tap helpers', () => { + it('recognizes recoverable AXe selector failures', () => { + expect( + isRecoverableAxeSelectorError( + new Error('Multiple (2) accessibility elements matched selector'), + ), + ).toBe(true); + expect( + isRecoverableAxeSelectorError({ + axeOutput: 'No accessibility element matched --label Continue', + }), + ).toBe(true); + expect(isRecoverableAxeSelectorError(new Error('Simulator is not booted'))).toBe(false); + }); + + it('uses a unique semantic selector before coordinates', () => { + const [element] = createElements([ + createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }), + ]); + + const command = createSemanticTapCommand(element!, 'e1', ['--duration', '0.1'], [element!]); + + expect(command.selectorArgs).toEqual([ + 'tap', + '--id', + 'continue.button', + '--element-type', + 'Button', + '--duration', + '0.1', + ]); + expect(command.primaryArgs).toBe(command.selectorArgs); + expect(command.usedSelector).toBe(true); + }); + + it('falls back to coordinates when semantic selectors are duplicated', () => { + const elements = createElements([ + createNode({ AXUniqueId: 'duplicate.button', AXLabel: 'Duplicate' }), + createNode({ + AXUniqueId: 'duplicate.button', + AXLabel: 'Duplicate', + frame: { x: 20, y: 80, width: 100, height: 40 }, + }), + ]); + + const command = createSemanticTapCommand(elements[0]!, 'e1', [], elements); + + expect(command.selectorArgs).toBeNull(); + expect(command.primaryArgs).toEqual(['tap', '-x', '60', '-y', '40']); + expect(command.usedSelector).toBe(false); + }); + + it('represents switch taps as down/up touch batch steps', () => { + const [element] = createElements([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Alerts', + frame: { x: 10, y: 20, width: 200, height: 40 }, + }), + ]); + + const command = createSemanticTapCommand(element!, 'e1'); + + expect(command.selectorArgs).toBeNull(); + expect(command.coordinateArgs).toEqual(['touch', '-x', '158', '-y', '40', '--down', '--up']); + expect(createSemanticTapBatchSteps(command)).toEqual([ + 'touch -x 158 -y 40 --down', + 'touch -x 158 -y 40 --up', + ]); + }); + + it('retries recoverable selector failures with coordinates', async () => { + const [element] = createElements([ + createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }), + ]); + const command = createSemanticTapCommand(element!, 'e1', [], [element!]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Multiple (2) accessibility elements matched selector' }, + { success: true, output: 'ok' }, + ]); + + await executeSemanticTapWithAmbiguityFallback({ + command, + simulatorId, + executor, + axeHelpers: createMockAxeHelpers(), + }); + + expect(calls.map((call) => call.command.slice(1, -2))).toEqual([ + ['tap', '--id', 'continue.button', '--element-type', 'Button'], + ['tap', '-x', '60', '-y', '40'], + ]); + }); + + it('does not retry unrecoverable selector failures', async () => { + const [element] = createElements([ + createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }), + ]); + const command = createSemanticTapCommand(element!, 'e1', [], [element!]); + const calls: string[][] = []; + const executor: CommandExecutor = async (commandArgs) => { + calls.push(commandArgs); + return { success: false, output: '', error: 'Simulator is not booted', process: mockProcess }; + }; + + await expect( + executeSemanticTapWithAmbiguityFallback({ + command, + simulatorId, + executor, + axeHelpers: createMockAxeHelpers(), + }), + ).rejects.toThrow("axe command 'tap' failed."); + expect(calls).toHaveLength(1); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index c7e054f6d..63120a5e3 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -239,7 +239,29 @@ describe('Snapshot UI Plugin', () => { seq: 2, }); expect(getRuntimeSnapshot('12345678-1234-4234-8234-123456789012')?.seq).toBe(2); - expect(second.ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); + expect(second.ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Tap an elementRef', + tool: 'tap', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e1', + }, + }, + ]); }); it('should return full runtime snapshot when sinceScreenHash differs from the current screen hash', async () => { diff --git a/src/mcp/tools/ui-automation/snapshot_ui.ts b/src/mcp/tools/ui-automation/snapshot_ui.ts index e0eca4c80..7d8dceeaa 100644 --- a/src/mcp/tools/ui-automation/snapshot_ui.ts +++ b/src/mcp/tools/ui-automation/snapshot_ui.ts @@ -11,11 +11,12 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { RuntimeSnapshotV1 } from '../../../types/ui-snapshot.ts'; import { createRuntimeSnapshotNextSteps } from './shared/runtime-next-steps.ts'; import { createCaptureFailureResult, @@ -137,17 +138,21 @@ export async function snapshot_uiLogic( setCaptureStructuredOutput(ctx, result); - if ( - !result.didError && - result.capture && - 'type' in result.capture && - result.capture.type === 'runtime-snapshot' - ) { - ctx.nextSteps = createRuntimeSnapshotNextSteps({ - simulatorId: params.simulatorId, - runtimeSnapshot: result.capture, - includeRefreshAndWait: true, - }); + if (!result.didError && result.capture && 'type' in result.capture) { + let runtimeSnapshot: RuntimeSnapshotV1 | undefined; + if (result.capture.type === 'runtime-snapshot') { + runtimeSnapshot = result.capture; + } else if (result.capture.type === 'runtime-snapshot-unchanged') { + runtimeSnapshot = getRuntimeSnapshot(params.simulatorId)?.payload; + } + + if (runtimeSnapshot) { + ctx.nextSteps = createRuntimeSnapshotNextSteps({ + simulatorId: params.simulatorId, + runtimeSnapshot, + includeRefreshAndWait: true, + }); + } } } diff --git a/src/utils/__tests__/structured-output-envelope.test.ts b/src/utils/__tests__/structured-output-envelope.test.ts index b378bc590..1e7ff263e 100644 --- a/src/utils/__tests__/structured-output-envelope.test.ts +++ b/src/utils/__tests__/structured-output-envelope.test.ts @@ -138,6 +138,58 @@ describe('toStructuredEnvelope', () => { }); }); + it('caps compact runtime snapshot rows by category', () => { + const targets = Array.from({ length: 80 }, (_, index) => ({ + ref: `e${index + 1}`, + role: 'button' as const, + label: `Target ${index + 1}`, + frame: { x: 0, y: index, width: 100, height: 40 }, + actions: ['tap' as const], + })); + const scroll = Array.from({ length: 40 }, (_, index) => ({ + ref: `e${index + 81}`, + role: 'scroll-view' as const, + label: `Scroll ${index + 1}`, + frame: { x: 0, y: index, width: 390, height: 600 }, + actions: ['swipeWithin' as const], + })); + const text = Array.from({ length: 70 }, (_, index) => ({ + ref: `e${index + 121}`, + role: 'text' as const, + label: `Text ${index + 1}`, + frame: { x: 0, y: index, width: 100, height: 20 }, + state: { visible: true }, + actions: ['touch' as const], + })); + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'large-screen', + seq: 4, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [...targets, ...scroll, ...text], + actions: [], + }, + }; + + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2'); + const data = envelope.data as { + capture: { targets: string[]; scroll: string[]; text?: string[] }; + }; + + expect(data.capture.targets).toHaveLength(64); + expect(data.capture.scroll).toHaveLength(32); + expect(data.capture.text).toHaveLength(64); + }); + it('compacts unchanged runtime snapshot captures by default', () => { const result: CaptureResultDomainResult = { kind: 'capture-result', diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index 9df4a4ddb..2c7c02b46 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -1249,16 +1249,24 @@ function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElem .map(({ element }) => element); } +function getPrimaryRuntimeElementAction(element: RuntimeElementV1, action?: string): string { + if (action) { + return action; + } + if (element.actions.includes('typeText')) { + return 'typeText'; + } + if (element.actions.includes('tap')) { + return 'tap'; + } + if (element.actions.includes('swipeWithin')) { + return 'swipe'; + } + return 'none'; +} + function formatRuntimeElementLine(element: RuntimeElementV1, action?: string): string { - const primaryAction = - action ?? - (element.actions.includes('typeText') - ? 'typeText' - : element.actions.includes('tap') - ? 'tap' - : element.actions.includes('swipeWithin') - ? 'swipe' - : 'none'); + const primaryAction = getPrimaryRuntimeElementAction(element, action); return [ element.ref, primaryAction, @@ -1466,17 +1474,14 @@ function createCaptureResultItems( if (result.didError) { items.push(...createStandardDiagnosticSections(result.diagnostics)); items.push(...createUiErrorItems(result.uiError)); - items.push( - createStatus( - 'error', - result.error ?? - (isUiHierarchy - ? isRuntimeSnapshot - ? 'Failed to get runtime UI snapshot.' - : 'Failed to get accessibility hierarchy.' - : 'Failed to capture screenshot.'), - ), - ); + let fallbackError = 'Failed to capture screenshot.'; + if (isRuntimeSnapshot) { + fallbackError = 'Failed to get runtime UI snapshot.'; + } else if (isUiHierarchy) { + fallbackError = 'Failed to get accessibility hierarchy.'; + } + + items.push(createStatus('error', result.error ?? fallbackError)); return items; } diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts index 3c0a05299..3e4b45a97 100644 --- a/src/utils/structured-output-envelope.ts +++ b/src/utils/structured-output-envelope.ts @@ -46,6 +46,9 @@ type RuntimeSnapshotUnchangedCompactCapture = { }; const MINIMAL_DATA_PRUNE_KEYS = ['request'] as const; +const COMPACT_RUNTIME_TARGET_LIMIT = 64; +const COMPACT_RUNTIME_SCROLL_LIMIT = 32; +const COMPACT_RUNTIME_TEXT_LIMIT = 64; const HIDDEN_RUNTIME_TARGET_LABELS = new Set(['sheet grabber']); const LOW_PRIORITY_RUNTIME_TARGET_LABELS = new Set([ 'sheet grabber', @@ -213,10 +216,12 @@ function toRuntimeSnapshotCompactCapture( !isHiddenRuntimeTarget(element) && (element.actions.includes('tap') || element.actions.includes('typeText')), ), - ).map((element) => { - const action = element.actions.includes('typeText') ? 'typeText' : 'tap'; - return compactRuntimeElementRow(element, action); - }); + ) + .slice(0, COMPACT_RUNTIME_TARGET_LIMIT) + .map((element) => { + const action = element.actions.includes('typeText') ? 'typeText' : 'tap'; + return compactRuntimeElementRow(element, action); + }); const scroll = snapshot.elements .filter( (element) => @@ -224,9 +229,10 @@ function toRuntimeSnapshotCompactCapture( !element.actions.includes('tap') && !element.actions.includes('typeText'), ) + .slice(0, COMPACT_RUNTIME_SCROLL_LIMIT) .map((element) => compactRuntimeElementRow(element, 'swipe')); const text = sortRuntimeTextForDisplay(snapshot.elements.filter(isRuntimeTextSummaryElement)) - .slice(0, 64) + .slice(0, COMPACT_RUNTIME_TEXT_LIMIT) .map((element) => compactRuntimeElementRow(element, 'text')); return { @@ -301,17 +307,19 @@ function projectRuntimeSnapshotData( } const dataWithCapture = data as TData & { capture?: unknown }; - const projectedData = isRuntimeSnapshotCapture(dataWithCapture.capture) - ? { - ...dataWithCapture, - capture: toRuntimeSnapshotCompactCapture(dataWithCapture.capture), - } - : isRuntimeSnapshotUnchangedCapture(dataWithCapture.capture) - ? { - ...dataWithCapture, - capture: toRuntimeSnapshotUnchangedCompactCapture(dataWithCapture.capture), - } - : dataWithCapture; + let projectedData: typeof dataWithCapture = dataWithCapture; + + if (isRuntimeSnapshotCapture(dataWithCapture.capture)) { + projectedData = { + ...dataWithCapture, + capture: toRuntimeSnapshotCompactCapture(dataWithCapture.capture), + }; + } else if (isRuntimeSnapshotUnchangedCapture(dataWithCapture.capture)) { + projectedData = { + ...dataWithCapture, + capture: toRuntimeSnapshotUnchangedCompactCapture(dataWithCapture.capture), + }; + } const dataWithRuntimeRows = projectedData as typeof projectedData & { uiError?: { candidates?: unknown[] }; From a2d8777cd7eec6c726f8b683925a012765fcb995 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 01:37:43 +0100 Subject: [PATCH 11/35] fix(ui-automation): Preserve runtime snapshot visibility contracts Keep non-streaming UI action tests anchored to explicit success text for every action case instead of accepting any non-empty output. Avoid re-adding swipe actions to containers already marked offscreen by viewport visibility filtering, and cover that ordering with a regression test. Co-Authored-By: Codex --- .../__tests__/non_streaming_progress.test.ts | 10 ++--- .../__tests__/runtime-snapshot.test.ts | 44 +++++++++++++++++++ .../ui-automation/shared/runtime-snapshot.ts | 1 + 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts index 551aa9913..65e47dc54 100644 --- a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts @@ -85,6 +85,7 @@ describe('ui automation non-streaming tools', () => { axeHelpers, ); }, + expectedText: 'Long press on elementRef e1 for 1500ms simulated successfully.', }, { name: 'swipe', @@ -97,6 +98,7 @@ describe('ui automation non-streaming tools', () => { axeHelpers, ); }, + expectedText: 'Swipe up within elementRef e1 simulated successfully.', }, { name: 'tap', @@ -109,6 +111,7 @@ describe('ui automation non-streaming tools', () => { axeHelpers, ); }, + expectedText: 'Tap on elementRef e1 simulated successfully.', }, { name: 'touch', @@ -121,6 +124,7 @@ describe('ui automation non-streaming tools', () => { axeHelpers, ); }, + expectedText: 'Touch event (touch down) on elementRef e1 executed successfully.', }, { name: 'type_text', @@ -141,11 +145,7 @@ describe('ui automation non-streaming tools', () => { const { result } = await runToolLogic(testCase.run); expect(result.events, `${testCase.name} should not emit progress events`).toEqual([]); expect(result.isError()).toBe(false); - if (testCase.expectedText) { - expect(result.text()).toContain(testCase.expectedText); - } else { - expect(result.text().trim().length).toBeGreaterThan(0); - } + expect(result.text()).toContain(testCase.expectedText); } }); diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index 982dc4869..540d962fd 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -316,6 +316,50 @@ describe('runtime snapshot normalization', () => { ); }); + it('does not re-add swipeWithin to offscreen containers', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Offscreen panel', + frame: { x: 0, y: 900, width: 300, height: 200 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflowing child', + frame: { x: 10, y: 1160, width: 100, height: 20 }, + }), + ], + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Offscreen panel', + state: expect.objectContaining({ visible: false }), + actions: [], + }), + ); + expect(snapshot.payload.actions).not.toContainEqual({ + action: 'swipeWithin', + elementRef: 'e2', + label: 'Offscreen panel', + }); + }); + it('removes point-based actions from clipped elements with offscreen activation points', () => { const root = createNode({ type: 'Application', diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 5e81afcba..7b8a44d9d 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -407,6 +407,7 @@ function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): vo const { publicElement, metadata } = element; if ( !isContainerRole(publicElement.role) || + publicElement.state?.visible === false || !isVisible(publicElement.frame) || !isLargeEnoughInferredScrollContainer(publicElement.role, publicElement.frame) ) { From 3a740c62ba06bf0921d21e0b3736834b852c8851 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 01:54:15 +0100 Subject: [PATCH 12/35] fix(ui-automation): close remaining snapshot review gaps Keep sheet swipe frames large enough to produce reliable swipe points, clear tap snapshots immediately after successful execution, and restore swipe success snapshot coverage for runtime refs. Update UI automation fixtures to match the current next-step output contract. --- .../__tests__/runtime-snapshot.test.ts | 30 +++++++++++ .../ui-automation/shared/runtime-snapshot.ts | 12 +++-- src/mcp/tools/ui-automation/tap.ts | 1 + .../text/ui-automation/button--success.txt | 3 ++ .../text/ui-automation/gesture--success.txt | 3 ++ .../text/ui-automation/key-press--success.txt | 3 ++ .../ui-automation/key-sequence--success.txt | 3 ++ .../ui-automation/long-press--success.txt | 3 ++ .../ui-automation/snapshot-ui--success.txt | 52 +++++++++---------- .../cli/text/ui-automation/swipe--success.txt | 10 ++++ .../cli/text/ui-automation/tap--success.txt | 4 ++ .../cli/text/ui-automation/touch--success.txt | 3 ++ .../ui-automation/wait-for-ui--success.txt | 51 +++++++++--------- .../json/ui-automation/button--success.json | 5 +- .../json/ui-automation/gesture--success.json | 5 +- .../ui-automation/key-press--success.json | 5 +- .../ui-automation/key-sequence--success.json | 5 +- .../ui-automation/long-press--success.json | 5 +- .../ui-automation/snapshot-ui--success.json | 25 ++++++--- .../json/ui-automation/swipe--success.json | 47 +++++++++++++++++ .../mcp/json/ui-automation/tap--success.json | 39 +++++++++++++- .../json/ui-automation/touch--success.json | 5 +- .../ui-automation/wait-for-ui--success.json | 23 +++++--- .../text/ui-automation/button--success.txt | 3 ++ .../text/ui-automation/gesture--success.txt | 3 ++ .../text/ui-automation/key-press--success.txt | 3 ++ .../ui-automation/key-sequence--success.txt | 3 ++ .../ui-automation/long-press--success.txt | 3 ++ .../ui-automation/snapshot-ui--success.txt | 50 +++++++++--------- .../mcp/text/ui-automation/swipe--success.txt | 8 +++ .../mcp/text/ui-automation/tap--success.txt | 4 ++ .../mcp/text/ui-automation/touch--success.txt | 3 ++ .../ui-automation/wait-for-ui--success.txt | 51 +++++++++--------- .../suites/ui-automation-suite.ts | 35 +++++++++++++ 34 files changed, 378 insertions(+), 130 deletions(-) create mode 100644 src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt create mode 100644 src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json create mode 100644 src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index 540d962fd..775ddd5c3 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -283,6 +283,36 @@ describe('runtime snapshot normalization', () => { }); }); + it('keeps sheet host swipe frames non-degenerate when the grabber is near the bottom', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Expanded', + frame: { x: 157, y: 620, width: 76, height: 5 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toEqual({ + ok: true, + from: { x: 195, y: 693 }, + to: { x: 195, y: 621 }, + }); + }); + it('removes actions from elements outside the viewport', () => { const root = createNode({ type: 'Application', diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 7b8a44d9d..772831262 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -345,19 +345,21 @@ function findSheetGrabberDescendant( } function createSheetSwipeFrame(containerFrame: Frame, grabberFrame: Frame): Frame { - const top = Math.round( + const minimumHeight = Math.min(120, Math.max(2, containerFrame.height * 0.3)); + const bottom = Math.round(containerFrame.y + containerFrame.height * 0.85); + const preferredTop = Math.round( Math.max( grabberFrame.y + grabberFrame.height + 120, containerFrame.y + containerFrame.height * 0.35, ), ); - const bottom = Math.round(containerFrame.y + containerFrame.height * 0.85); - const height = Math.max(2, bottom - top); + const top = Math.round(Math.min(preferredTop, bottom - minimumHeight)); + return normalizeFrame({ x: containerFrame.x, - y: Math.min(top, bottom - 2), + y: top, width: containerFrame.width, - height, + height: bottom - top, }); } diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index 8d527c198..f3a2ed119 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -117,6 +117,7 @@ export function createTapExecutor( executor, axeHelpers, }); + clearRuntimeSnapshot(simulatorId); if (usesTouchActivation && postDelay !== undefined) { await delayMs(postDelay * 1000); } diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt index 58a78296a..5afe9edbc 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/button--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Hardware button 'home' pressed successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt index f7cbf6735..322f385a9 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/gesture--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Gesture 'scroll-down' executed successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt index c687f6b65..9782d138b 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-press--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Key press (code: 4) simulated successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt index 6950454c4..7fa5eedd2 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/key-sequence--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Key sequence [4,5,6] executed successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt index dfce2922b..2bd35e372 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Long press on elementRef e3 for 500ms simulated successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt index 0dc63b95f..02c8fe280 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/snapshot-ui--success.txt @@ -3,36 +3,36 @@ Simulator: -Targets (19) — ref|action|role|label|id - e3|tap|button|C| - e4|tap|button|Âą| - e5|tap|button|%| - e6|tap|button|Ãˇ| - e7|tap|button|7| - e8|tap|button|8| - e9|tap|button|9| - e10|tap|button|×| - e11|tap|button|4| - e12|tap|button|5| - e13|tap|button|6| - e14|tap|button|-| - e15|tap|button|1| - e16|tap|button|2| - e17|tap|button|3| - e18|tap|button|+| - e19|tap|button|0| - e20|tap|button|.| - e21|tap|button|=| +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| Tips - - Use target refs with tap/type_text. - - Use scroll refs with swipe. + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. - Use wait_for_ui for text/assertions or changing UI. ✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: -1. Refresh after layout changes: xcodebuildmcp simulator snapshot-ui --simulator-id "" -2. Wait for UI to settle: xcodebuildmcp ui-automation wait-for-ui --simulator-id "SIMULATOR_UUID" --predicate "settled" -3. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id "" --element-ref "e3" -4. Take screenshot for verification: xcodebuildmcp simulator screenshot --simulator-id "" +1. Refresh after layout changes: xcodebuildmcp ui-automation snapshot-ui --simulator-id +2. Wait for UI to settle: xcodebuildmcp ui-automation wait-for-ui --simulator-id --predicate settled +3. Batch same-screen taps: xcodebuildmcp ui-automation batch --json '{"simulatorId":"","steps":[{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}]}' +4. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id --element-ref e7 diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt new file mode 100644 index 000000000..b8d804265 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--success.txt @@ -0,0 +1,10 @@ + +👆 Swipe + + Simulator: + +✅ Swipe up within elementRef e3 simulated successfully. + +Next steps: +1. Batch same-screen taps: xcodebuildmcp ui-automation batch --json '{"simulatorId":"","steps":[{"action":"tap","elementRef":"e4"},{"action":"tap","elementRef":"e6"}]}' +2. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id --element-ref e5 diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt index bc58f3e30..90db0d93e 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--success.txt @@ -4,3 +4,7 @@ Simulator: ✅ Tap on elementRef e3 simulated successfully. + +Next steps: +1. Batch same-screen taps: xcodebuildmcp ui-automation batch --json '{"simulatorId":"","steps":[{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}]}' +2. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id --element-ref e7 diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt index ea972a7a7..83a80a6cb 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--success.txt @@ -4,3 +4,6 @@ Simulator: ✅ Touch event (touch down+up) on elementRef e3 executed successfully. + +Next steps: +1. Refresh after UI action: xcodebuildmcp ui-automation snapshot-ui --simulator-id diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt index 1e74f961e..4801e6851 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/wait-for-ui--success.txt @@ -3,34 +3,33 @@ Simulator: -Targets (19) — ref|action|role|label|id - e3|tap|button|C| - e4|tap|button|Âą| - e5|tap|button|%| - e6|tap|button|Ãˇ| - e7|tap|button|7| - e8|tap|button|8| - e9|tap|button|9| - e10|tap|button|×| - e11|tap|button|4| - e12|tap|button|5| - e13|tap|button|6| - e14|tap|button|-| - e15|tap|button|1| - e16|tap|button|2| - e17|tap|button|3| - e18|tap|button|+| - e19|tap|button|0| - e20|tap|button|.| - e21|tap|button|=| +Matched exists (1) — ref|action|role|label|value|id + e3|tap|button|C|| + +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| Tips - - Use target refs with tap/type_text. - - Use scroll refs with swipe. + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. - Use wait_for_ui for text/assertions or changing UI. ✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. - -Next steps: -1. Refresh runtime snapshot: xcodebuildmcp simulator snapshot-ui --simulator-id "SIMULATOR_UUID" -2. Wait again: xcodebuildmcp ui-automation wait-for-ui --simulator-id "SIMULATOR_UUID" --predicate "settled" diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json index 7df9f68e4..8b3c5cbd3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/button--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json index 39cb6883a..153b5dec4 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/gesture--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json index bbfc05f0e..6251ea993 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-press--success.json @@ -14,5 +14,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json index 4ef507f23..6a480c985 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/key-sequence--success.json @@ -18,5 +18,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json index 8621e1a67..62d4fd08f 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/long-press--success.json @@ -15,5 +15,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json index de6328de9..d4beeeb25 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/snapshot-ui--success.json @@ -17,28 +17,37 @@ "seq": 1, "count": 21, "targets": [ - "e3|tap|button|C||", - "e4|tap|button|Âą||", - "e5|tap|button|%||", - "e6|tap|button|Ãˇ||", "e7|tap|button|7||", "e8|tap|button|8||", "e9|tap|button|9||", - "e10|tap|button|×||", "e11|tap|button|4||", "e12|tap|button|5||", "e13|tap|button|6||", - "e14|tap|button|-||", "e15|tap|button|1||", "e16|tap|button|2||", "e17|tap|button|3||", - "e18|tap|button|+||", "e19|tap|button|0||", "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", "e21|tap|button|=||" ], "scroll": [], + "text": [ + "e2|text|text|0||" + ], "udid": "" } - } + }, + "nextSteps": [ + "Refresh after layout changes: snapshot_ui({ simulatorId: \"\" })", + "Wait for UI to settle: wait_for_ui({ simulatorId: \"\", predicate: \"settled\" })", + "Batch same-screen taps: batch({ simulatorId: \"\", steps: [{\"action\":\"tap\",\"elementRef\":\"e7\"},{\"action\":\"tap\",\"elementRef\":\"e8\"}] })", + "Tap an elementRef: tap({ simulatorId: \"\", elementRef: \"e7\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json new file mode 100644 index 000000000..c74601169 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/swipe--success.json @@ -0,0 +1,47 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": false, + "error": null, + "data": { + "summary": { + "status": "SUCCEEDED" + }, + "action": { + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" + }, + "artifacts": { + "simulatorId": "" + }, + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 16, + "targets": [ + "e6|tap|button|Home Screen & App Library||com.apple.settings.homeScreen", + "e5|tap|button|Camera||com.apple.settings.camera", + "e7|tap|button|Search||com.apple.settings.search", + "e8|tap|button|StandBy||com.apple.settings.standBy", + "e9|tap|button|Screen Time||com.apple.settings.screenTime", + "e10|tap|button|Passcode||com.apple.settings.passcodeAndBiometrics", + "e11|tap|button|Privacy & Security||com.apple.settings.privacyAndSecurity", + "e12|tap|button|Game Center||com.apple.settings.gameCenter", + "e13|tap|button|iCloud||com.apple.settings.iCloud", + "e14|tap|button|Apps||com.apple.settings.apps", + "e15|tap|button|Developer||com.apple.settings.developer" + ], + "scroll": [ + "e3|swipe|other|||" + ], + "udid": "" + } + }, + "nextSteps": [ + "Batch same-screen taps: batch({ simulatorId: \"\", steps: [{\"action\":\"tap\",\"elementRef\":\"e5\"},{\"action\":\"tap\",\"elementRef\":\"e7\"}] })", + "Tap an elementRef: tap({ simulatorId: \"\", elementRef: \"e6\" })" + ] +} diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json index 7b215e9c7..1143d393c 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/tap--success.json @@ -13,6 +13,43 @@ }, "artifacts": { "simulatorId": "" + }, + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", + "e21|tap|button|=||" + ], + "scroll": [], + "text": [ + "e2|text|text|0||" + ], + "udid": "" } - } + }, + "nextSteps": [ + "Batch same-screen taps: batch({ simulatorId: \"\", steps: [{\"action\":\"tap\",\"elementRef\":\"e7\"},{\"action\":\"tap\",\"elementRef\":\"e8\"}] })", + "Tap an elementRef: tap({ simulatorId: \"\", elementRef: \"e7\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json index 365d4ca63..c00c1c649 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/touch--success.json @@ -15,5 +15,8 @@ "artifacts": { "simulatorId": "" } - } + }, + "nextSteps": [ + "Refresh after UI action: snapshot_ui({ simulatorId: \"\" })" + ] } diff --git a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json index de6328de9..b677ec15a 100644 --- a/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json +++ b/src/snapshot-tests/__fixtures__/mcp/json/ui-automation/wait-for-ui--success.json @@ -17,28 +17,37 @@ "seq": 1, "count": 21, "targets": [ - "e3|tap|button|C||", - "e4|tap|button|Âą||", - "e5|tap|button|%||", - "e6|tap|button|Ãˇ||", "e7|tap|button|7||", "e8|tap|button|8||", "e9|tap|button|9||", - "e10|tap|button|×||", "e11|tap|button|4||", "e12|tap|button|5||", "e13|tap|button|6||", - "e14|tap|button|-||", "e15|tap|button|1||", "e16|tap|button|2||", "e17|tap|button|3||", - "e18|tap|button|+||", "e19|tap|button|0||", "e20|tap|button|.||", + "e3|tap|button|C||", + "e4|tap|button|Âą||", + "e5|tap|button|%||", + "e6|tap|button|Ãˇ||", + "e10|tap|button|×||", + "e14|tap|button|-||", + "e18|tap|button|+||", "e21|tap|button|=||" ], "scroll": [], + "text": [ + "e2|text|text|0||" + ], "udid": "" + }, + "waitMatch": { + "predicate": "exists", + "matches": [ + "e3|tap|button|C||" + ] } } } diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt index 18de11b04..162a402b7 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/button--success.txt @@ -2,3 +2,6 @@ 👆 Button ✅ Hardware button 'home' pressed successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt index c693a2f5d..cf1e9175d 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/gesture--success.txt @@ -2,3 +2,6 @@ 👆 Gesture ✅ Gesture 'scroll-down' executed successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt index c886800b4..456c5cce9 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-press--success.txt @@ -2,3 +2,6 @@ âŒ¨ī¸ Key Press ✅ Key press (code: 4) simulated successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt index 6faf04241..84b66caa3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/key-sequence--success.txt @@ -2,3 +2,6 @@ âŒ¨ī¸ Key Sequence ✅ Key sequence [4,5,6] executed successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt index 46711e63c..f0df9dad8 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--success.txt @@ -2,3 +2,6 @@ 👆 Long Press ✅ Long press on elementRef e3 for 500ms simulated successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt index 014b6969e..7c6323932 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/snapshot-ui--success.txt @@ -1,36 +1,36 @@ 📷 Snapshot UI -Targets (19) — ref|action|role|label|id - e3|tap|button|C| - e4|tap|button|Âą| - e5|tap|button|%| - e6|tap|button|Ãˇ| - e7|tap|button|7| - e8|tap|button|8| - e9|tap|button|9| - e10|tap|button|×| - e11|tap|button|4| - e12|tap|button|5| - e13|tap|button|6| - e14|tap|button|-| - e15|tap|button|1| - e16|tap|button|2| - e17|tap|button|3| - e18|tap|button|+| - e19|tap|button|0| - e20|tap|button|.| - e21|tap|button|=| +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| Tips - - Use target refs with tap/type_text. - - Use scroll refs with swipe. + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. - Use wait_for_ui for text/assertions or changing UI. ✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: 1. Refresh after layout changes: snapshot_ui({ simulatorId: "" }) -2. Wait for UI to settle: wait_for_ui({ simulatorId: "SIMULATOR_UUID", predicate: "settled" }) -3. Tap an elementRef: tap({ simulatorId: "", elementRef: "e3" }) -4. Take screenshot for verification: screenshot({ simulatorId: "" }) +2. Wait for UI to settle: wait_for_ui({ simulatorId: "", predicate: "settled" }) +3. Batch same-screen taps: batch({ simulatorId: "", steps: [{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}] }) +4. Tap an elementRef: tap({ simulatorId: "", elementRef: "e7" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt new file mode 100644 index 000000000..6c402717c --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--success.txt @@ -0,0 +1,8 @@ + +👆 Swipe + +✅ Swipe up within elementRef e3 simulated successfully. + +Next steps: +1. Batch same-screen taps: batch({ simulatorId: "", steps: [{"action":"tap","elementRef":"e5"},{"action":"tap","elementRef":"e7"}] }) +2. Tap an elementRef: tap({ simulatorId: "", elementRef: "e6" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt index 3fbb4eeaa..303efa283 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--success.txt @@ -2,3 +2,7 @@ 👆 Tap ✅ Tap on elementRef e3 simulated successfully. + +Next steps: +1. Batch same-screen taps: batch({ simulatorId: "", steps: [{"action":"tap","elementRef":"e7"},{"action":"tap","elementRef":"e8"}] }) +2. Tap an elementRef: tap({ simulatorId: "", elementRef: "e7" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt index 9f28f64ad..c7256530e 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--success.txt @@ -2,3 +2,6 @@ 👆 Touch ✅ Touch event (touch down+up) on elementRef e3 executed successfully. + +Next steps: +1. Refresh after UI action: snapshot_ui({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt index 86c03d978..f575f68b7 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/wait-for-ui--success.txt @@ -1,34 +1,33 @@ âš™ī¸ Wait for UI -Targets (19) — ref|action|role|label|id - e3|tap|button|C| - e4|tap|button|Âą| - e5|tap|button|%| - e6|tap|button|Ãˇ| - e7|tap|button|7| - e8|tap|button|8| - e9|tap|button|9| - e10|tap|button|×| - e11|tap|button|4| - e12|tap|button|5| - e13|tap|button|6| - e14|tap|button|-| - e15|tap|button|1| - e16|tap|button|2| - e17|tap|button|3| - e18|tap|button|+| - e19|tap|button|0| - e20|tap|button|.| - e21|tap|button|=| +Matched exists (1) — ref|action|role|label|value|id + e3|tap|button|C|| + +Targets (19) — ref|action|role|label|value|id + e7|tap|button|7|| + e8|tap|button|8|| + e9|tap|button|9|| + e11|tap|button|4|| + e12|tap|button|5|| + e13|tap|button|6|| + e15|tap|button|1|| + e16|tap|button|2|| + e17|tap|button|3|| + e19|tap|button|0|| + e20|tap|button|.|| + e3|tap|button|C|| + e4|tap|button|Âą|| + e5|tap|button|%|| + e6|tap|button|Ãˇ|| + e10|tap|button|×|| + e14|tap|button|-|| + e18|tap|button|+|| + e21|tap|button|=|| Tips - - Use target refs with tap/type_text. - - Use scroll refs with swipe. + - Use target refs with tap, type_text, long_press, and touch. + - Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output. - Use wait_for_ui for text/assertions or changing UI. ✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. - -Next steps: -1. Refresh runtime snapshot: snapshot_ui({ simulatorId: "SIMULATOR_UUID" }) -2. Wait again: wait_for_ui({ simulatorId: "SIMULATOR_UUID", predicate: "settled" }) diff --git a/src/snapshot-tests/suites/ui-automation-suite.ts b/src/snapshot-tests/suites/ui-automation-suite.ts index d2acb9064..36064ec35 100644 --- a/src/snapshot-tests/suites/ui-automation-suite.ts +++ b/src/snapshot-tests/suites/ui-automation-suite.ts @@ -33,6 +33,21 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi snapshotCaptured = true; } + async function captureFirstScrollRef(bundleId: string): Promise { + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { text, isError } = await harness.invoke('ui-automation', 'snapshot-ui', { + simulatorId: simulatorUdid, + }); + expect(isError).toBe(false); + + return /\b(e\d+)\|swipe\|/.exec(text)?.[1] ?? null; + } + beforeAll(async () => { vi.setConfig({ testTimeout: 120_000 }); simulatorUdid = await ensureSimulatorBooted('iPhone 17 Pro'); @@ -75,6 +90,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('touch', () => { it('success', async () => { + snapshotCaptured = false; await refreshRuntimeSnapshot(); const { text, isError } = await harness.invoke('ui-automation', 'touch', { @@ -101,6 +117,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('long-press', () => { it('success', async () => { + snapshotCaptured = false; await refreshRuntimeSnapshot(); const { text, isError } = await harness.invoke('ui-automation', 'long-press', { @@ -124,6 +141,24 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); describe('swipe', () => { + it('success', async () => { + if (runtime === 'cli/json') { + return; + } + + const scrollRef = await captureFirstScrollRef('com.apple.Preferences'); + expect(scrollRef).not.toBeNull(); + + const { text, isError } = await harness.invoke('ui-automation', 'swipe', { + simulatorId: simulatorUdid, + withinElementRef: scrollRef, + direction: 'up', + }); + expect(isError).toBe(false); + expectFixture(text, 'swipe--success'); + snapshotCaptured = false; + }); + it('error - target not actionable', async () => { await refreshRuntimeSnapshot(); From c7e93f97a7b85647b90671d0c6e2290777312380 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 01:59:20 +0100 Subject: [PATCH 13/35] fix(ui-automation): Allow empty snapshot polling Treat empty AX hierarchies as valid snapshot_ui and wait_for_ui states so transient app launches or screen transitions do not fail parsing. Keep action and post-action snapshot parsing strict by default so stale or missing post-action captures still surface as failures. Co-Authored-By: Codex --- .../__tests__/runtime-snapshot.test.ts | 21 ++++++- .../__tests__/snapshot_ui.test.ts | 60 +++++++++++++++---- .../__tests__/wait_for_ui.test.ts | 35 +++++++++-- .../ui-automation/shared/runtime-snapshot.ts | 16 ++--- src/mcp/tools/ui-automation/snapshot_ui.ts | 6 +- src/mcp/tools/ui-automation/wait_for_ui.ts | 7 ++- 6 files changed, 118 insertions(+), 27 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index 775ddd5c3..fb61f7851 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -4,6 +4,7 @@ import { createRuntimeSnapshotRecord, extractAccessibilityHierarchy, getPrimaryRuntimeElement, + parseRuntimeSnapshotResponse, getRuntimeElementActivationPoint, getRuntimeElementSwipePoints, RuntimeSnapshotParseError, @@ -119,17 +120,31 @@ describe('runtime snapshot normalization', () => { expect(hierarchy[0]?.AXLabel).toBe('Continue'); }); - it('throws typed parse errors for invalid or empty describe-ui responses', () => { + it('throws typed parse errors for malformed describe-ui responses', () => { expect(() => extractAccessibilityHierarchy('not json')).toThrow(RuntimeSnapshotParseError); expect(() => extractAccessibilityHierarchy(JSON.stringify({ value: [] }))).toThrow( RuntimeSnapshotParseError, ); - expect(() => extractAccessibilityHierarchy(JSON.stringify([]))).toThrow( + expect(() => extractAccessibilityHierarchy(JSON.stringify({}))).toThrow( RuntimeSnapshotParseError, ); - expect(() => extractAccessibilityHierarchy(JSON.stringify({ elements: [] }))).toThrow( + }); + + it('allows empty describe-ui arrays only when the caller opts in', () => { + expect(extractAccessibilityHierarchy(JSON.stringify([]))).toEqual([]); + expect(extractAccessibilityHierarchy(JSON.stringify({ elements: [] }))).toEqual([]); + expect(() => parseRuntimeSnapshotResponse({ simulatorId, responseText: '[]' })).toThrow( RuntimeSnapshotParseError, ); + + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId, + responseText: '{"elements": []}', + allowEmpty: true, + }); + + expect(snapshot.payload.elements).toEqual([]); + expect(snapshot.payload.actions).toEqual([]); }); it('selects the primary element for semantic next steps', () => { diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index 63120a5e3..aa60f8e7d 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -1117,7 +1117,7 @@ describe('Snapshot UI Plugin', () => { ); }); - it('should reject empty AXe payloads without replacing a prior runtime snapshot', async () => { + it('should accept empty AXe payloads and replace a prior runtime snapshot', async () => { __resetRuntimeSnapshotStoreForTests(); const simulatorId = '12345678-1234-4234-8234-123456789012'; const mockAxeHelpers = { @@ -1132,10 +1132,9 @@ describe('Snapshot UI Plugin', () => { process: { pid: 12345 }, }); await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); - const previousSnapshot = getRuntimeSnapshot(simulatorId); - expect(previousSnapshot?.payload.elements).toHaveLength(1); + expect(getRuntimeSnapshot(simulatorId)?.payload.elements).toHaveLength(1); - for (const output of ['[]', '{"elements": []}', '{}']) { + for (const output of ['[]', '{"elements": []}']) { const emptyExecutor = createMockExecutor({ success: true, output, @@ -1145,16 +1144,57 @@ describe('Snapshot UI Plugin', () => { const { ctx, result, run } = createMockToolHandlerContext(); await run(() => snapshot_uiLogic({ simulatorId }, emptyExecutor, mockAxeHelpers)); - expect(result.isError()).toBe(true); - expect( + expect(result.isError()).toBe(false); + const capture = ctx.structuredOutput?.result.kind === 'capture-result' - ? ctx.structuredOutput.result.uiError?.code - : undefined, - ).toBe('SNAPSHOT_PARSE_FAILED'); - expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + elements: [], + actions: [], + }), + ); + expect(getRuntimeSnapshot(simulatorId)?.payload).toBe(capture); } }); + it('should preserve runtime snapshot store when AXe returns a non-array payload', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + const previousSnapshot = getRuntimeSnapshot(simulatorId); + + const invalidExecutor = createMockExecutor({ + success: true, + output: '{}', + error: undefined, + process: { pid: 12345 }, + }); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic({ simulatorId }, invalidExecutor, mockAxeHelpers)); + + expect(result.isError()).toBe(true); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError?.code + : undefined, + ).toBe('SNAPSHOT_PARSE_FAILED'); + expect(getRuntimeSnapshot(simulatorId)).toBe(previousSnapshot); + }); + it('should preserve runtime snapshot store when the debugger guard blocks before AXe runs', async () => { __resetRuntimeSnapshotStoreForTests(); const simulatorId = '12345678-1234-4234-8234-123456789012'; diff --git a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts index cdf474769..83d4fa8db 100644 --- a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts @@ -820,19 +820,44 @@ describe('Wait for UI Plugin', () => { expect(getRuntimeSnapshot(simulatorId, 0)).toBe(previousSnapshot); }); - it('preserves the runtime store when every poll returns an empty UI payload', async () => { + it('records empty UI payloads and times out with empty candidates', async () => { recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); - const previousSnapshot = getRuntimeSnapshot(simulatorId, 0); const { executor } = createSequencedExecutor([{ success: true, output: '[]' }]); const result = await runWaitForUi( - { simulatorId, predicate: 'settled', timeoutMs: 0 }, + { simulatorId, predicate: 'exists', label: 'Ready', timeoutMs: 0 }, executor, ); expect(result.didError).toBe(true); - expect(result.uiError?.code).toBe('SNAPSHOT_PARSE_FAILED'); - expect(getRuntimeSnapshot(simulatorId, 0)).toBe(previousSnapshot); + expect(result.uiError).toMatchObject({ code: 'WAIT_TIMEOUT', candidates: [] }); + expect(result.capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + elements: [], + actions: [], + }), + ); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('succeeds for gone when an empty UI payload has no matching elements', async () => { + const { executor } = createSequencedExecutor([{ success: true, output: '{"elements": []}' }]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', label: 'Loading', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + expect(result.capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + elements: [], + actions: [], + }), + ); }); it('preserves the runtime store when the debugger guard blocks before polling', async () => { diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 772831262..4a01dbbd5 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -547,12 +547,6 @@ export function extractAccessibilityHierarchy(responseText: string): Accessibili ); })(); - if (hierarchy.length === 0) { - throw new RuntimeSnapshotParseError( - 'AXe describe-ui returned an empty accessibility element array.', - ); - } - return hierarchy; } @@ -602,10 +596,18 @@ export function parseRuntimeSnapshotResponse(params: { simulatorId: string; responseText: string; nowMs?: number; + allowEmpty?: boolean; }): RuntimeSnapshotRecord { + const uiHierarchy = extractAccessibilityHierarchy(params.responseText); + if (uiHierarchy.length === 0 && params.allowEmpty !== true) { + throw new RuntimeSnapshotParseError( + 'AXe describe-ui returned an empty accessibility element array.', + ); + } + return createRuntimeSnapshotRecord({ simulatorId: params.simulatorId, - uiHierarchy: extractAccessibilityHierarchy(params.responseText), + uiHierarchy, nowMs: params.nowMs, }); } diff --git a/src/mcp/tools/ui-automation/snapshot_ui.ts b/src/mcp/tools/ui-automation/snapshot_ui.ts index 7d8dceeaa..843c1941a 100644 --- a/src/mcp/tools/ui-automation/snapshot_ui.ts +++ b/src/mcp/tools/ui-automation/snapshot_ui.ts @@ -80,7 +80,11 @@ export function createSnapshotUiExecutor( axeHelpers, ); - const snapshot = parseRuntimeSnapshotResponse({ simulatorId, responseText }); + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId, + responseText, + allowEmpty: true, + }); recordRuntimeSnapshot(snapshot); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); diff --git a/src/mcp/tools/ui-automation/wait_for_ui.ts b/src/mcp/tools/ui-automation/wait_for_ui.ts index 5b0160f51..e7a8579e3 100644 --- a/src/mcp/tools/ui-automation/wait_for_ui.ts +++ b/src/mcp/tools/ui-automation/wait_for_ui.ts @@ -236,7 +236,12 @@ export function createWaitForUiExecutor( axeHelpers, ); const nowMs = timing.now(); - const snapshot = parseRuntimeSnapshotResponse({ simulatorId, responseText, nowMs }); + const snapshot = parseRuntimeSnapshotResponse({ + simulatorId, + responseText, + nowMs, + allowEmpty: true, + }); latestSnapshot = snapshot; lastParseError = null; lastPollError = null; From b476c947b70dad79acf9f914048091726569fa5e Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 03:04:04 +0100 Subject: [PATCH 14/35] fix(ui-automation): Report semantic touch commands accurately Use the actual AXe command name when executing semantic tap commands so switch-backed touch interactions log and report as touch instead of tap. Co-Authored-By: Codex --- .../__tests__/semantic-tap.test.ts | 39 +++++++++++++++++++ .../ui-automation/shared/semantic-tap.ts | 24 +++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts b/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts index 47b57ee39..b7795f53f 100644 --- a/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/semantic-tap.test.ts @@ -91,6 +91,44 @@ describe('semantic tap helpers', () => { ]); }); + it('uses the executed command name for switch touch commands', async () => { + const [element] = createElements([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Alerts', + frame: { x: 10, y: 20, width: 200, height: 40 }, + }), + ]); + const command = createSemanticTapCommand(element!, 'e1'); + const { calls, executor } = createSequencedExecutor([{ success: true, output: 'ok' }]); + + await executeSemanticTapWithAmbiguityFallback({ + command, + simulatorId, + executor, + axeHelpers: createMockAxeHelpers(), + }); + + expect(calls[0]).toEqual( + expect.objectContaining({ + command: [ + '/mocked/axe/path', + 'touch', + '-x', + '158', + '-y', + '40', + '--down', + '--up', + '--udid', + simulatorId, + ], + logPrefix: '[AXe]: touch', + }), + ); + }); + it('retries recoverable selector failures with coordinates', async () => { const [element] = createElements([ createNode({ AXUniqueId: 'continue.button', AXLabel: 'Continue' }), @@ -112,6 +150,7 @@ describe('semantic tap helpers', () => { ['tap', '--id', 'continue.button', '--element-type', 'Button'], ['tap', '-x', '60', '-y', '40'], ]); + expect(calls.map((call) => call.logPrefix)).toEqual(['[AXe]: tap', '[AXe]: tap']); }); it('does not retry unrecoverable selector failures', async () => { diff --git a/src/mcp/tools/ui-automation/shared/semantic-tap.ts b/src/mcp/tools/ui-automation/shared/semantic-tap.ts index 1c7990bfd..4c940c0ce 100644 --- a/src/mcp/tools/ui-automation/shared/semantic-tap.ts +++ b/src/mcp/tools/ui-automation/shared/semantic-tap.ts @@ -118,6 +118,14 @@ export function createSemanticTapCommand( }; } +function readAxeCommandName(args: readonly string[]): string { + const commandName = args[0]; + if (!commandName) { + throw new Error('Semantic tap command has no AXe command name.'); + } + return commandName; +} + export function createSemanticTapBatchSteps(command: SemanticTapCommand): string[] { if (command.coordinateArgs[0] !== 'touch') { return [command.coordinateArgs.join(' ')]; @@ -136,12 +144,24 @@ export async function executeSemanticTapWithAmbiguityFallback(params: { const { command, simulatorId, executor, axeHelpers } = params; try { - await executeAxeCommand(command.primaryArgs, simulatorId, 'tap', executor, axeHelpers); + await executeAxeCommand( + command.primaryArgs, + simulatorId, + readAxeCommandName(command.primaryArgs), + executor, + axeHelpers, + ); } catch (error) { if (!command.selectorArgs || !isRecoverableAxeSelectorError(error)) { throw error; } - await executeAxeCommand(command.coordinateArgs, simulatorId, 'tap', executor, axeHelpers); + await executeAxeCommand( + command.coordinateArgs, + simulatorId, + readAxeCommandName(command.coordinateArgs), + executor, + axeHelpers, + ); } } From 473ae0cd787e43a93800d8073a92d1cc8af7fc66 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 03:06:25 +0100 Subject: [PATCH 15/35] fix(ui-automation): Add wait-for-ui next steps Declare wait_for_ui next-step templates so its success params are consumed and rendered for follow-up snapshot and settled-wait actions. Co-Authored-By: Codex --- manifests/tools/wait_for_ui.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/manifests/tools/wait_for_ui.yaml b/manifests/tools/wait_for_ui.yaml index 543288129..bac213089 100644 --- a/manifests/tools/wait_for_ui.yaml +++ b/manifests/tools/wait_for_ui.yaml @@ -14,3 +14,12 @@ annotations: readOnlyHint: true destructiveHint: false openWorldHint: false +nextSteps: + - label: Capture a fresh runtime UI snapshot + toolId: snapshot_ui + priority: 1 + when: success + - label: Wait until the UI is settled + toolId: wait_for_ui + priority: 2 + when: success From 43b8d73e523b15f91afbc80171316dad072cd125 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 03:18:24 +0100 Subject: [PATCH 16/35] fix(ui-automation): Tighten action diagnostics Add structured batch failure details, classify context menu roles before text roles, and report post-action snapshot capture failures with a dedicated code. Co-Authored-By: Codex --- .../ui-automation/__tests__/batch.test.ts | 5 +++++ .../__tests__/runtime-snapshot.test.ts | 22 +++++++++++++++++++ .../tools/ui-automation/__tests__/tap.test.ts | 19 ++++++++++++++++ src/mcp/tools/ui-automation/batch.ts | 9 +++++++- .../shared/post-action-snapshot.ts | 2 +- .../ui-automation/shared/runtime-snapshot.ts | 2 +- .../ui-automation/shared/snapshot-ui-state.ts | 22 +++++++++---------- src/types/ui-snapshot.ts | 1 + 8 files changed, 68 insertions(+), 14 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/batch.test.ts b/src/mcp/tools/ui-automation/__tests__/batch.test.ts index 5c0957660..ce40bcf69 100644 --- a/src/mcp/tools/ui-automation/__tests__/batch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/batch.test.ts @@ -256,6 +256,11 @@ describe('Batch UI Automation Tool', () => { ); expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.diagnostics?.errors?.[0]?.message).toBe('step failed'); expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index fb61f7851..836dbe24c 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -94,6 +94,28 @@ describe('runtime snapshot normalization', () => { ); }); + it('classifies context menu items as menu controls instead of text', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'MenuItem', + role: 'AXMenuItem', + role_description: 'context menu item', + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'menu', + actions: expect.arrayContaining(['longPress', 'touch']), + }), + ); + expect(snapshot.payload.elements[0]?.actions).not.toContain('tap'); + }); + it('derives deterministic screen hashes from normalized UI content', () => { const uiHierarchy = [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Cancel' })]; diff --git a/src/mcp/tools/ui-automation/__tests__/tap.test.ts b/src/mcp/tools/ui-automation/__tests__/tap.test.ts index e118c4a06..7a35a07fe 100644 --- a/src/mcp/tools/ui-automation/__tests__/tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/tap.test.ts @@ -117,6 +117,25 @@ describe('Tap Plugin', () => { expect(actionCommands(calls)).toHaveLength(1); }); + it('reports post-action snapshot capture failures without failing the tap action', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { executor } = createSequencedExecutor([ + { success: true, output: 'tap succeeded' }, + { success: false, error: 'describe-ui failed' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(result.uiError).toMatchObject({ + code: 'SNAPSHOT_CAPTURE_FAILED', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.error).toBeNull(); + expect(result.capture).toBeUndefined(); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + it('includes element type when tapping a referenced element with a shared identifier', async () => { recordSnapshot([ createNode({ diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts index ad28e6ef3..9b02f4bd4 100644 --- a/src/mcp/tools/ui-automation/batch.ts +++ b/src/mcp/tools/ui-automation/batch.ts @@ -21,6 +21,7 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, shouldInvalidateRuntimeSnapshotAfterActionError, @@ -206,7 +207,13 @@ export function createBatchExecutor( axeFailureMessage: () => `Failed to execute AXe batch with ${steps.length} steps.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); - return createUiActionFailureResult(action, simulatorId, failure.message); + return createUiActionFailureResult(action, simulatorId, failure.message, { + details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + }), + }); } const captureResult = await captureRuntimeSnapshotAfterActionSafely({ diff --git a/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts index 8ab5b0dc9..240041377 100644 --- a/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/post-action-snapshot.ts @@ -53,7 +53,7 @@ export async function captureRuntimeSnapshotAfterActionSafely(params: { return { warning: `${message} ${POST_ACTION_SNAPSHOT_RECOVERY_HINT}`, uiError: { - code: isParseFailure ? 'SNAPSHOT_PARSE_FAILED' : 'ACTION_FAILED', + code: isParseFailure ? 'SNAPSHOT_PARSE_FAILED' : 'SNAPSHOT_CAPTURE_FAILED', message: `${message} ${detail}`, recoveryHint: POST_ACTION_SNAPSHOT_RECOVERY_HINT, }, diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 4a01dbbd5..9c6e54082 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -116,6 +116,7 @@ function deriveRole(node: AccessibilityNode): RuntimeElementRoleV1 | undefined { ) { return 'text-field'; } + if (/menu/.test(roleText)) return 'menu'; if (/statictext|text/.test(roleText)) return 'text'; if (/image/.test(roleText)) return 'image'; if (/switch|checkbox|check box/.test(roleText)) return 'switch'; @@ -124,7 +125,6 @@ function deriveRole(node: AccessibilityNode): RuntimeElementRoleV1 | undefined { if (/scroll/.test(roleText)) return 'scroll-view'; if (/table|list|outline|collection/.test(roleText)) return 'list'; if (/(^|\b|ax)tab(\b|group|$)/.test(roleText)) return 'tab'; - if (/menu/.test(roleText)) return 'menu'; return 'other'; } diff --git a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts index efdb5f15c..dc5801d2d 100644 --- a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts +++ b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts @@ -22,15 +22,12 @@ function snapshotMissingError(): UiAutomationRecoverableError { }; } -function snapshotExpiredError( - snapshot: RuntimeSnapshotRecord, - nowMs: number, -): UiAutomationRecoverableError { +function snapshotExpiredError(snapshotAgeMs: number): UiAutomationRecoverableError { return { code: 'SNAPSHOT_EXPIRED', message: 'The runtime UI snapshot for this simulator has expired.', recoveryHint: 'Run snapshot_ui again and retry with a current elementRef.', - snapshotAgeMs: snapshotAgeMs(snapshot, nowMs), + snapshotAgeMs, }; } @@ -83,17 +80,20 @@ export function resolveElementRef( requiredAction: RuntimeActionNameV1, nowMs = Date.now(), ): RuntimeElementResolution { - const snapshot = runtimeSnapshots.get(simulatorId) ?? null; - if (!snapshot) { + const lookup = getRuntimeSnapshotLookup(simulatorId, nowMs); + if (lookup.status === 'missing') { return { ok: false, error: snapshotMissingError() }; } - const ageMs = snapshotAgeMs(snapshot, nowMs); - if (nowMs > snapshot.expiresAtMs) { - runtimeSnapshots.delete(simulatorId); - return { ok: false, error: snapshotExpiredError(snapshot, nowMs) }; + if (lookup.status === 'expired') { + return { ok: false, error: snapshotExpiredError(lookup.snapshotAgeMs ?? 0) }; } + const snapshot = lookup.snapshot; + if (!snapshot) { + throw new Error('Runtime snapshot lookup returned an available status without a snapshot.'); + } + const ageMs = lookup.snapshotAgeMs ?? 0; const element = snapshot.elementsByRef.get(elementRef); if (!element) { return { diff --git a/src/types/ui-snapshot.ts b/src/types/ui-snapshot.ts index 6f4d09cad..aee7cfcab 100644 --- a/src/types/ui-snapshot.ts +++ b/src/types/ui-snapshot.ts @@ -104,6 +104,7 @@ export type UiAutomationRecoverableErrorCode = | 'SNAPSHOT_MISSING' | 'SNAPSHOT_EXPIRED' | 'SNAPSHOT_PARSE_FAILED' + | 'SNAPSHOT_CAPTURE_FAILED' | 'ELEMENT_REF_NOT_FOUND' | 'TARGET_NOT_FOUND' | 'TARGET_AMBIGUOUS' From ca9f245f3d350ee2b5c080ae037c1b235784c9e0 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 03:22:43 +0100 Subject: [PATCH 17/35] test(ui-automation): Avoid button settle delay in progress test Override the production button settle delay in the non-streaming progress test so the unit path stays fast while preserving runtime behavior. Co-Authored-By: Codex --- .../ui-automation/__tests__/non_streaming_progress.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts index 65e47dc54..065301319 100644 --- a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts @@ -41,6 +41,8 @@ describe('ui automation non-streaming tools', () => { { simulatorId, buttonType: 'home' }, createMockExecutor({ success: true }), axeHelpers, + undefined, + 0, ), expectedText: "Hardware button 'home' pressed successfully.", }, From 796525c936b6e9fd295ad4ec363eda39b1b38282 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 03:41:14 +0100 Subject: [PATCH 18/35] fix(ui-automation): Tighten post-action snapshot handling Skip post-action snapshot capture for safe same-screen batches, add long-press capture parity, and cover runtime next-step foreground ranking rules directly. Co-Authored-By: Codex --- .../ui-automation/__tests__/batch.test.ts | 20 ++- .../__tests__/long_press.test.ts | 2 +- .../__tests__/runtime-next-steps.test.ts | 161 ++++++++++++++++++ src/mcp/tools/ui-automation/batch.ts | 4 + src/mcp/tools/ui-automation/long_press.ts | 17 +- .../shared/runtime-next-steps.ts | 4 +- 6 files changed, 197 insertions(+), 11 deletions(-) create mode 100644 src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts diff --git a/src/mcp/tools/ui-automation/__tests__/batch.test.ts b/src/mcp/tools/ui-automation/__tests__/batch.test.ts index ce40bcf69..4d0b52c36 100644 --- a/src/mcp/tools/ui-automation/__tests__/batch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/batch.test.ts @@ -203,16 +203,22 @@ describe('Batch UI Automation Tool', () => { createNode({ type: 'Switch', role: 'AXSwitch', AXValue: '0' }), createNode({ type: 'Switch', role: 'AXSwitch', AXValue: 'off' }), ]); + const { calls, executor } = createTrackingExecutor(); - const result = await runBatch({ - simulatorId, - steps: [ - { action: 'tap', elementRef: 'e1' }, - { action: 'tap', elementRef: 'e2' }, - ], - }); + const result = await runBatch( + { + simulatorId, + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + executor, + ); expect(result.didError).toBe(false); + expect(result.capture).toBeUndefined(); + expect(calls.some((call) => call.command[1] === 'describe-ui')).toBe(false); expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); diff --git a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts index 35a641078..a24ffd751 100644 --- a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts @@ -59,8 +59,8 @@ describe('Long Press Plugin', () => { expect(result).toMatchObject({ didError: false, action: { type: 'long-press', elementRef: 'e1', durationMs: 1500 }, + capture: { type: 'runtime-snapshot', simulatorId }, }); - expect(calls).toHaveLength(1); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'touch', diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts new file mode 100644 index 000000000..36d2ebb9a --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts @@ -0,0 +1,161 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import { createRuntimeSnapshotNextSteps } from '../shared/runtime-next-steps.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { createNode, recordSnapshot, simulatorId } from './ui-action-test-helpers.ts'; + +function currentRuntimeSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!.payload; +} + +function createScrollView(overrides: Partial = {}): AccessibilityNode { + return createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 390, height: 844 }, + AXIdentifier: 'scroll-view', + ...overrides, + }); +} + +function nestNode(node: AccessibilityNode, depth: number): AccessibilityNode { + let current = node; + for (let index = 0; index < depth; index += 1) { + current = createNode({ + type: 'Group', + role: 'AXGroup', + AXIdentifier: `container.${index}`, + frame: current.frame, + children: [current], + }); + } + return current; +} + +describe('runtime snapshot next steps', () => { + beforeEach(() => { + __resetRuntimeSnapshotStoreForTests(); + }); + + it('prefers tap and scroll examples from the active foreground container', () => { + recordSnapshot([ + createScrollView({ + AXIdentifier: 'weather.backgroundList', + children: [ + createNode({ + AXLabel: 'Background, Details', + AXIdentifier: 'weather.backgroundCard', + frame: { x: 20, y: 120, width: 350, height: 80 }, + }), + ], + }), + createScrollView({ + AXIdentifier: 'weather.settingsSheet', + frame: { x: 0, y: 420, width: 390, height: 424 }, + children: [ + createNode({ AXLabel: 'Close', frame: { x: 310, y: 430, width: 60, height: 40 } }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 480, width: 350, height: 40 }, + }), + createNode({ + AXLabel: 'London, England', + AXIdentifier: 'weather.locationCard', + frame: { x: 20, y: 540, width: 350, height: 80 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const foregroundScrollRef = snapshot.elements.find( + (element) => element.identifier === 'weather.settingsSheet', + )?.ref; + const foregroundCardRef = snapshot.elements.find( + (element) => element.identifier === 'weather.locationCard', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: foregroundCardRef }, + }); + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: foregroundScrollRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + + it('uses hierarchy depth only as a foreground-root tie breaker', () => { + recordSnapshot([ + nestNode( + createScrollView({ + AXIdentifier: 'deep.stateControls', + frame: { x: 0, y: 0, width: 390, height: 80 }, + children: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Nested switch', + AXValue: '0', + }), + ], + }), + 40, + ), + createScrollView({ + AXIdentifier: 'shallow.searchPanel', + frame: { x: 0, y: 100, width: 390, height: 500 }, + children: [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 130, width: 350, height: 40 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const shallowSearchRef = snapshot.elements.find( + (element) => element.identifier === 'shallow.searchPanel', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: shallowSearchRef, + direction: 'up', + distance: 0.5, + }, + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts index 9b02f4bd4..30967f693 100644 --- a/src/mcp/tools/ui-automation/batch.ts +++ b/src/mcp/tools/ui-automation/batch.ts @@ -216,6 +216,10 @@ export function createBatchExecutor( }); } + if (resolvedSteps.preserveSnapshot) { + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + } + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ simulatorId, executor, diff --git a/src/mcp/tools/ui-automation/long_press.ts b/src/mcp/tools/ui-automation/long_press.ts index 38953c6e9..062107899 100644 --- a/src/mcp/tools/ui-automation/long_press.ts +++ b/src/mcp/tools/ui-automation/long_press.ts @@ -20,6 +20,7 @@ import { import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -100,7 +101,6 @@ export function createLongPressExecutor( await executeAxeCommand(commandArgs, simulatorId, 'touch', executor, axeHelpers); clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { clearRuntimeSnapshot(simulatorId); @@ -118,6 +118,21 @@ export function createLongPressExecutor( }), }); } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); }; } diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index 47ed634aa..3dd010d50 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -270,8 +270,8 @@ function findActiveForegroundRoot( (hasDismissControl ? 100 : 0) + (hasTextEntry ? 60 : 0) + (hasStateControls ? 30 : 0) + - record.metadata.depth + - (indexByRef.get(record.publicElement.ref) ?? 0) / 1000; + record.metadata.depth / 1000 + + (indexByRef.get(record.publicElement.ref) ?? 0) / 1_000_000; scoreByRef.set(record.publicElement.ref, score); return score; } From 0884f89902e04075d5e5c531dd49961310e34043 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 04:01:47 +0100 Subject: [PATCH 19/35] fix(ui-automation): Restrict batch snapshot preservation to switches Only treat inactive switch elements as safe for same-screen batch snapshot preservation. This avoids preserving stale snapshots when a non-switch control happens to expose inactive selected/value state. Adds regression coverage for an inactive non-switch element triggering a fresh post-action runtime snapshot. --- .../tools/ui-automation/__tests__/batch.test.ts | 15 +++++++++++++++ src/mcp/tools/ui-automation/batch.ts | 5 ++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/mcp/tools/ui-automation/__tests__/batch.test.ts b/src/mcp/tools/ui-automation/__tests__/batch.test.ts index 4d0b52c36..d7148b80f 100644 --- a/src/mcp/tools/ui-automation/__tests__/batch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/batch.test.ts @@ -232,6 +232,21 @@ describe('Batch UI Automation Tool', () => { expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); + it('does not preserve snapshots for inactive non-switch elements', async () => { + recordSnapshot([createNode({ AXValue: 'not selected' })]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { simulatorId, steps: [{ action: 'tap', elementRef: 'e1' }] }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toMatchObject({ type: 'runtime-snapshot', simulatorId }); + expect(calls.some((call) => call.command[1] === 'describe-ui')).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + it('pre-resolves all refs and fails before execution if any ref is invalid', async () => { recordSnapshot([createNode()]); const { calls, executor } = createTrackingExecutor(); diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts index 30967f693..cde2a9009 100644 --- a/src/mcp/tools/ui-automation/batch.ts +++ b/src/mcp/tools/ui-automation/batch.ts @@ -76,8 +76,11 @@ function isSafeSameScreenBatchElement(element: { return false; } + if (element.role !== 'switch') { + return false; + } + return ( - element.role === 'switch' || element.state?.selected === false || value === 'not selected' || value === '0' || From bcc772036269e189bc7317832425d4862a9a09b8 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 04:13:34 +0100 Subject: [PATCH 20/35] fix(ui-automation): Add snapshot capture error to schemas Allow SNAPSHOT_CAPTURE_FAILED in the ui-action and capture-result recoverable UI error enums so post-action capture failures validate against the structured output contract. --- .../xcodebuildmcp.output.capture-result/2.schema.json | 1 + .../xcodebuildmcp.output.ui-action-result/2.schema.json | 1 + 2 files changed, 2 insertions(+) diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json index 654589ee6..7528ddd14 100644 --- a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -215,6 +215,7 @@ "SNAPSHOT_MISSING", "SNAPSHOT_EXPIRED", "SNAPSHOT_PARSE_FAILED", + "SNAPSHOT_CAPTURE_FAILED", "ELEMENT_REF_NOT_FOUND", "TARGET_NOT_FOUND", "TARGET_AMBIGUOUS", diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json index 286c15c9e..cb0eff92e 100644 --- a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -116,6 +116,7 @@ "SNAPSHOT_MISSING", "SNAPSHOT_EXPIRED", "SNAPSHOT_PARSE_FAILED", + "SNAPSHOT_CAPTURE_FAILED", "ELEMENT_REF_NOT_FOUND", "TARGET_NOT_FOUND", "TARGET_AMBIGUOUS", From f0851555556da740b0451a6a44f13b99119f32fd Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 07:05:04 +0100 Subject: [PATCH 21/35] fix(ui-automation): Keep inactive tabs in next-step suggestions Allow unselected tab elements to remain eligible as screen-changing tap examples while still filtering selected/current tabs and non-tab selected controls from generic next-step suggestions. Add regression coverage for inactive tabs so the tab-priority path stays reachable. --- .../__tests__/runtime-next-steps.test.ts | 34 +++++++++++++++++++ .../shared/runtime-next-steps.ts | 10 +++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts index 36d2ebb9a..917875590 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts @@ -105,6 +105,40 @@ describe('runtime snapshot next steps', () => { }); }); + it('keeps unselected tabs available as screen-changing tap suggestions', () => { + recordSnapshot([ + createNode({ + type: 'Tab', + role: 'AXTab', + AXLabel: 'Current', + AXValue: 'selected', + AXSelected: true, + }), + createNode({ + type: 'Tab', + role: 'AXTab', + AXLabel: 'Search', + AXValue: 'not selected', + AXSelected: false, + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const searchTabRef = snapshot.elements.find((element) => element.label === 'Search')?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: searchTabRef }, + }); + }); + it('uses hierarchy depth only as a foreground-root tie breaker', () => { recordSnapshot([ nestNode( diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index 3dd010d50..9123ecc90 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -84,12 +84,14 @@ function isStateChangingTapNextStepElement(element: { value?: string; }): boolean { const value = compactTapNextStepText(element.value).toLowerCase(); - return ( - element.role === 'switch' || + const hasSelectionState = element.state?.selected === true || - element.state?.selected === false || value === 'selected' || - value === 'not selected' || + (element.role !== 'tab' && (element.state?.selected === false || value === 'not selected')); + + return ( + element.role === 'switch' || + hasSelectionState || value === '0' || value === '1' || value === 'off' || From 244f5c8a5362666d9f813d71c519182b464cde5e Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 07:10:22 +0100 Subject: [PATCH 22/35] fix(ui-automation): Preserve inactive tab value hints Keep value-based toggle filtering scoped away from tab elements so inactive tabs with compact values such as 0/off still remain eligible as screen-changing next-step suggestions. --- .../__tests__/runtime-next-steps.test.ts | 2 +- .../tools/ui-automation/shared/runtime-next-steps.ts | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts index 917875590..c7792735c 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts @@ -118,7 +118,7 @@ describe('runtime snapshot next steps', () => { type: 'Tab', role: 'AXTab', AXLabel: 'Search', - AXValue: 'not selected', + AXValue: '0', AXSelected: false, }), ]); diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index 9123ecc90..b9a31568a 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -89,14 +89,10 @@ function isStateChangingTapNextStepElement(element: { value === 'selected' || (element.role !== 'tab' && (element.state?.selected === false || value === 'not selected')); - return ( - element.role === 'switch' || - hasSelectionState || - value === '0' || - value === '1' || - value === 'off' || - value === 'on' - ); + const hasToggleValue = + element.role !== 'tab' && (value === '0' || value === '1' || value === 'off' || value === 'on'); + + return element.role === 'switch' || hasSelectionState || hasToggleValue; } /** From a9e55042d15ea865d4b0e5678f133c78cf9074dc Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 08:55:56 +0100 Subject: [PATCH 23/35] fix(example): Default Weather app to mock data Keep the Weather example deterministic for UI automation benchmarks by removing the launch-argument gate around the mock weather service. Co-Authored-By: Codex --- example_projects/Weather/Weather/WeatherApp.swift | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/example_projects/Weather/Weather/WeatherApp.swift b/example_projects/Weather/Weather/WeatherApp.swift index 3ec87c754..d1e5a4d80 100644 --- a/example_projects/Weather/Weather/WeatherApp.swift +++ b/example_projects/Weather/Weather/WeatherApp.swift @@ -10,15 +10,7 @@ import SwiftUI @main struct WeatherApp: App { - private let weatherService: WeatherService - - init() { - if CommandLine.arguments.contains("--mock-weather-api") { - weatherService = .mock - } else { - weatherService = .production - } - } + private let weatherService: WeatherService = .mock var body: some Scene { WindowGroup { From d0ad6f12989a90c8084d230750eef82c30857d5f Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 10:09:58 +0100 Subject: [PATCH 24/35] fix(ui-automation): Render UI recovery messages Include the UI automation error message in recovery details so text output keeps the actionable failure reason alongside the code and recovery hint. Co-Authored-By: Codex --- .../cli/text/ui-automation/long-press--error-no-simulator.txt | 1 + .../cli/text/ui-automation/swipe--error-no-simulator.txt | 1 + .../cli/text/ui-automation/swipe--error-not-actionable.txt | 1 + .../cli/text/ui-automation/tap--error-no-simulator.txt | 1 + .../cli/text/ui-automation/touch--error-no-simulator.txt | 1 + .../cli/text/ui-automation/type-text--error-no-simulator.txt | 1 + .../cli/text/ui-automation/type-text--error-not-actionable.txt | 1 + .../mcp/text/ui-automation/long-press--error-no-simulator.txt | 1 + .../mcp/text/ui-automation/swipe--error-no-simulator.txt | 1 + .../mcp/text/ui-automation/swipe--error-not-actionable.txt | 1 + .../mcp/text/ui-automation/tap--error-no-simulator.txt | 1 + .../mcp/text/ui-automation/touch--error-no-simulator.txt | 1 + .../mcp/text/ui-automation/type-text--error-no-simulator.txt | 1 + .../mcp/text/ui-automation/type-text--error-not-actionable.txt | 1 + src/utils/renderers/domain-result-text.ts | 1 + 15 files changed, 15 insertions(+) diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt index adaba8754..8884e85c9 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/long-press--error-no-simulator.txt @@ -5,6 +5,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt index 935b744b8..299c2be21 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-no-simulator.txt @@ -5,6 +5,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt index 3cdc8d292..f227e42b4 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/swipe--error-not-actionable.txt @@ -5,6 +5,7 @@ Recovery Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'swipeWithin'. Element: e3 Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt index 726bd1eb3..78309b818 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/tap--error-no-simulator.txt @@ -5,6 +5,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt index 8a7e5ad50..1d05a8c9f 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/touch--error-no-simulator.txt @@ -5,6 +5,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt index ccdd1e70e..39d9c2f15 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-no-simulator.txt @@ -5,6 +5,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt index 5d72e95f4..27afd2f76 100644 --- a/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt +++ b/src/snapshot-tests/__fixtures__/cli/text/ui-automation/type-text--error-not-actionable.txt @@ -5,6 +5,7 @@ Recovery Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'typeText'. Element: e3 Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt index 3eb76d15a..0b69ada38 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/long-press--error-no-simulator.txt @@ -3,6 +3,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt index a0e22ad66..15636c457 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-no-simulator.txt @@ -3,6 +3,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt index 8c257311f..51d69da9e 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/swipe--error-not-actionable.txt @@ -3,6 +3,7 @@ Recovery Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'swipeWithin'. Element: e3 Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt index 9f2c04d8b..3cc10d903 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/tap--error-no-simulator.txt @@ -3,6 +3,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt index 71f0ecf56..f276ccf07 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/touch--error-no-simulator.txt @@ -3,6 +3,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt index 99cf12e61..19df3f2f3 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-no-simulator.txt @@ -3,6 +3,7 @@ Recovery Code: SNAPSHOT_MISSING + Message: No runtime UI snapshot is available for this simulator. Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. ❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt index e1e5c9bf8..93cf24775 100644 --- a/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt +++ b/src/snapshot-tests/__fixtures__/mcp/text/ui-automation/type-text--error-not-actionable.txt @@ -3,6 +3,7 @@ Recovery Code: TARGET_NOT_ACTIONABLE + Message: Element ref 'e3' does not support 'typeText'. Element: e3 Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index 2c7c02b46..d036ac51a 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -1338,6 +1338,7 @@ function createUiErrorItems(uiError?: UiAutomationRecoverableError): TextRendera const lines = [ `Code: ${uiError.code}`, + `Message: ${uiError.message}`, ...(uiError.elementRef ? [`Element: ${uiError.elementRef}`] : []), ...(typeof uiError.timeoutMs === 'number' ? [`Timeout: ${uiError.timeoutMs}ms`] : []), `Hint: ${uiError.recoveryHint}`, From 06643ad8ef154fcd530b93532789f490c1f85edd Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 10:10:15 +0100 Subject: [PATCH 25/35] fix(ui-automation): Preserve resolved action coordinates Keep ref-based runtime UI actions as the public input shape while carrying resolved coordinates in structured action output when they are available. This preserves the v2 action payload contract without reintroducing coordinate-driven tool inputs. Co-Authored-By: Codex --- .../2.schema.json | 14 ++++++++--- .../__tests__/long_press.test.ts | 14 +++++++++-- .../ui-automation/__tests__/swipe.test.ts | 10 +++++++- .../tools/ui-automation/__tests__/tap.test.ts | 8 +++++-- .../ui-automation/__tests__/touch.test.ts | 23 ++++++++++++++++--- src/mcp/tools/ui-automation/long_press.ts | 8 ++++--- src/mcp/tools/ui-automation/swipe.ts | 14 ++++++++--- src/mcp/tools/ui-automation/tap.ts | 12 ++++++++-- src/mcp/tools/ui-automation/touch.ts | 10 ++++---- src/types/domain-results.ts | 8 +++++++ 10 files changed, 98 insertions(+), 23 deletions(-) diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json index cb0eff92e..053c8a6d5 100644 --- a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -160,7 +160,9 @@ "additionalProperties": false, "properties": { "type": { "const": "tap" }, - "elementRef": { "type": "string" } + "elementRef": { "type": "string" }, + "x": { "type": "number" }, + "y": { "type": "number" } }, "required": ["type", "elementRef"] }, @@ -181,6 +183,8 @@ "type": { "const": "swipe" }, "withinElementRef": { "type": "string" }, "direction": { "$ref": "#/$defs/direction" }, + "from": { "$ref": "#/$defs/point" }, + "to": { "$ref": "#/$defs/point" }, "durationSeconds": { "type": "number", "minimum": 0 } }, "required": ["type", "withinElementRef", "direction"] @@ -210,7 +214,9 @@ "properties": { "type": { "const": "touch" }, "elementRef": { "type": "string" }, - "event": { "type": "string" } + "event": { "type": "string" }, + "x": { "type": "number" }, + "y": { "type": "number" } }, "required": ["type", "elementRef"] }, @@ -239,7 +245,9 @@ "properties": { "type": { "const": "long-press" }, "elementRef": { "type": "string" }, - "durationMs": { "type": "integer", "minimum": 0 } + "durationMs": { "type": "integer", "minimum": 0 }, + "x": { "type": "number" }, + "y": { "type": "number" } }, "required": ["type", "elementRef", "durationMs"] }, diff --git a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts index a24ffd751..727ef2500 100644 --- a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts @@ -58,7 +58,7 @@ describe('Long Press Plugin', () => { expect(result).toMatchObject({ didError: false, - action: { type: 'long-press', elementRef: 'e1', durationMs: 1500 }, + action: { type: 'long-press', elementRef: 'e1', durationMs: 1500, x: 60, y: 40 }, capture: { type: 'runtime-snapshot', simulatorId }, }); expect(calls[0]?.command).toEqual([ @@ -87,8 +87,18 @@ describe('Long Press Plugin', () => { ]); const { calls, executor } = createTrackingExecutor(); - await runLongPress({ simulatorId, elementRef: 'e1', duration: 1000 }, executor); + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, + ); + expect(result.action).toMatchObject({ + type: 'long-press', + elementRef: 'e1', + durationMs: 1000, + x: 307, + y: 903, + }); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'touch', diff --git a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts index fa5130fcc..88234e1a0 100644 --- a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts @@ -93,7 +93,13 @@ describe('Swipe Tool', () => { expect(result).toMatchObject({ didError: false, - action: { type: 'swipe', withinElementRef: 'e1', direction: 'up' }, + action: { + type: 'swipe', + withinElementRef: 'e1', + direction: 'up', + from: { x: 100, y: 340 }, + to: { x: 100, y: 60 }, + }, }); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', @@ -138,6 +144,8 @@ describe('Swipe Tool', () => { type: 'swipe', withinElementRef: 'e1', direction: 'right', + from: { x: 30, y: 200 }, + to: { x: 170, y: 200 }, durationSeconds: 2, }); expect(calls[0]?.command).toEqual([ diff --git a/src/mcp/tools/ui-automation/__tests__/tap.test.ts b/src/mcp/tools/ui-automation/__tests__/tap.test.ts index 7a35a07fe..1a23523a6 100644 --- a/src/mcp/tools/ui-automation/__tests__/tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/tap.test.ts @@ -66,7 +66,10 @@ describe('Tap Plugin', () => { const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result).toMatchObject({ didError: false, action: { type: 'tap', elementRef: 'e1' } }); + expect(result).toMatchObject({ + didError: false, + action: { type: 'tap', elementRef: 'e1', x: 60, y: 40 }, + }); expect(actionCommands(calls)).toHaveLength(1); expect(calls[0]).toEqual({ command: [ @@ -380,8 +383,9 @@ describe('Tap Plugin', () => { ]); const { calls, executor } = createTrackingExecutor(); - await runTap({ simulatorId, elementRef: 'e1' }, executor); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + expect(result.action).toMatchObject({ type: 'tap', elementRef: 'e1', x: 307, y: 903 }); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'touch', diff --git a/src/mcp/tools/ui-automation/__tests__/touch.test.ts b/src/mcp/tools/ui-automation/__tests__/touch.test.ts index c114c6bd8..40dbf9f85 100644 --- a/src/mcp/tools/ui-automation/__tests__/touch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/touch.test.ts @@ -61,7 +61,7 @@ describe('Touch Plugin', () => { expect(result).toMatchObject({ didError: false, - action: { type: 'touch', elementRef: 'e1', event: 'touch down' }, + action: { type: 'touch', elementRef: 'e1', event: 'touch down', x: 60, y: 40 }, }); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', @@ -80,8 +80,15 @@ describe('Touch Plugin', () => { recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); const { calls, executor } = createTrackingExecutor(); - await runTouch({ simulatorId, elementRef: 'e1', up: true }, executor); + const result = await runTouch({ simulatorId, elementRef: 'e1', up: true }, executor); + expect(result.action).toMatchObject({ + type: 'touch', + elementRef: 'e1', + event: 'touch up', + x: 60, + y: 40, + }); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'touch', @@ -127,8 +134,18 @@ describe('Touch Plugin', () => { ]); const { calls, executor } = createTrackingExecutor(); - await runTouch({ simulatorId, elementRef: 'e1', down: true, up: true }, executor); + const result = await runTouch( + { simulatorId, elementRef: 'e1', down: true, up: true }, + executor, + ); + expect(result.action).toMatchObject({ + type: 'touch', + elementRef: 'e1', + event: 'touch down+up', + x: 307, + y: 903, + }); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'touch', diff --git a/src/mcp/tools/ui-automation/long_press.ts b/src/mcp/tools/ui-automation/long_press.ts index 062107899..9bd5039ec 100644 --- a/src/mcp/tools/ui-automation/long_press.ts +++ b/src/mcp/tools/ui-automation/long_press.ts @@ -60,15 +60,18 @@ export function createLongPressExecutor( return async (params) => { const toolName = 'long_press'; const { simulatorId, elementRef, duration } = params; - const action = { type: 'long-press' as const, elementRef, durationMs: duration }; + const unresolvedAction = { type: 'long-press' as const, elementRef, durationMs: duration }; const resolution = resolveElementRef(simulatorId, elementRef, 'longPress'); if (!resolution.ok) { - return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { uiError: resolution.error, }); } + const center = getRuntimeElementActivationPoint(resolution.element); + const action = { ...unresolvedAction, x: center.x, y: center.y }; + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, @@ -78,7 +81,6 @@ export function createLongPressExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const center = getRuntimeElementActivationPoint(resolution.element); const delayInSeconds = duration / 1000; const commandArgs = [ 'touch', diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index b964d9bd8..7d6085f01 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -74,7 +74,7 @@ export function createSwipeExecutor( const toolName = 'swipe'; const { simulatorId, withinElementRef, direction, duration, distance, preDelay, postDelay } = params; - const action = { + const unresolvedAction = { type: 'swipe' as const, withinElementRef, direction, @@ -83,7 +83,7 @@ export function createSwipeExecutor( const resolution = resolveElementRef(simulatorId, withinElementRef, 'swipeWithin'); if (!resolution.ok) { - return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { uiError: resolution.error, }); } @@ -95,9 +95,17 @@ export function createSwipeExecutor( message: points.message, elementRef: withinElementRef, }); - return createUiActionFailureResult(action, simulatorId, points.message, { uiError }); + return createUiActionFailureResult(unresolvedAction, simulatorId, points.message, { + uiError, + }); } + const action = { + ...unresolvedAction, + from: points.from, + to: points.to, + }; + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index f3a2ed119..0b827890b 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -12,6 +12,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { defaultAxeHelpers } from './shared/axe-command.ts'; import { createSemanticTapCommand, @@ -69,15 +70,22 @@ export function createTapExecutor( return async (params) => { const toolName = 'tap'; const { simulatorId, elementRef, preDelay, postDelay } = params; - const action = { type: 'tap' as const, elementRef }; + const unresolvedAction = { type: 'tap' as const, elementRef }; const resolution = resolveElementRef(simulatorId, elementRef, 'tap'); if (!resolution.ok) { - return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { uiError: resolution.error, }); } + const activationPoint = getRuntimeElementActivationPoint(resolution.element); + const action = { + ...unresolvedAction, + x: activationPoint.x, + y: activationPoint.y, + }; + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, diff --git a/src/mcp/tools/ui-automation/touch.ts b/src/mcp/tools/ui-automation/touch.ts index 277bbb53b..27e4bc666 100644 --- a/src/mcp/tools/ui-automation/touch.ts +++ b/src/mcp/tools/ui-automation/touch.ts @@ -76,7 +76,7 @@ export function createTouchExecutor( const { simulatorId, elementRef, down, up, delay } = params; const actionText = down && up ? 'touch down+up' : down ? 'touch down' : up ? 'touch up' : undefined; - const action = { + const unresolvedAction = { type: 'touch' as const, elementRef, ...(actionText ? { event: actionText } : {}), @@ -84,7 +84,7 @@ export function createTouchExecutor( if (!down && !up) { return createUiActionFailureResult( - action, + unresolvedAction, simulatorId, 'At least one of "down" or "up" must be true', ); @@ -92,11 +92,14 @@ export function createTouchExecutor( const resolution = resolveElementRef(simulatorId, elementRef, 'touch'); if (!resolution.ok) { - return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { uiError: resolution.error, }); } + const center = getRuntimeElementActivationPoint(resolution.element); + const action = { ...unresolvedAction, x: center.x, y: center.y }; + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, @@ -106,7 +109,6 @@ export function createTouchExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const center = getRuntimeElementActivationPoint(resolution.element); const commandArgs = ['touch', '-x', String(center.x), '-y', String(center.y)]; if (down) { commandArgs.push('--down'); diff --git a/src/types/domain-results.ts b/src/types/domain-results.ts index ea2ea0ff2..dd70b1db4 100644 --- a/src/types/domain-results.ts +++ b/src/types/domain-results.ts @@ -358,22 +358,30 @@ export interface TestSelectionInfo { export interface UiActionTap { type: 'tap'; elementRef: string; + x?: number; + y?: number; } export interface UiActionSwipe { type: 'swipe'; withinElementRef: string; direction: 'up' | 'down' | 'left' | 'right'; + from?: Point; + to?: Point; durationSeconds?: number; } export interface UiActionTouch { type: 'touch'; elementRef: string; event?: string; + x?: number; + y?: number; } export interface UiActionLongPress { type: 'long-press'; elementRef: string; durationMs: number; + x?: number; + y?: number; } export interface UiActionButton { type: 'button'; From 3cc5b88928b83c32ad2974a06224a7dbfc3d8eea Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 10:10:30 +0100 Subject: [PATCH 26/35] test(snapshot): Restore JSON normalization coverage Restore regression coverage for SwiftPM progress sorting, volatile build settings normalization, and compact frame formatting. These behaviors are still implemented by the production normalizer and need explicit tests. Co-Authored-By: Codex --- .../__tests__/json-normalize.test.ts | 126 +++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/src/snapshot-tests/__tests__/json-normalize.test.ts b/src/snapshot-tests/__tests__/json-normalize.test.ts index 629a5a2b8..899e85e91 100644 --- a/src/snapshot-tests/__tests__/json-normalize.test.ts +++ b/src/snapshot-tests/__tests__/json-normalize.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest'; import type { StructuredOutputEnvelope } from '../../types/structured-output.ts'; -import { normalizeStructuredEnvelope } from '../json-normalize.ts'; +import { formatStructuredEnvelopeFixture, normalizeStructuredEnvelope } from '../json-normalize.ts'; describe('normalizeStructuredEnvelope', () => { it('keeps suite-less simulator test cases while normalizing volatile durations', () => { @@ -131,6 +131,130 @@ describe('normalizeStructuredEnvelope', () => { }); }); + it('normalizes and sorts SwiftPM build progress lines in stderr arrays', () => { + const envelope: StructuredOutputEnvelope = { + schema: 'xcodebuildmcp.output.build-run-result', + schemaVersion: '1', + didError: false, + error: null, + data: { + output: { + stderr: [ + 'Building for debugging...', + '[5/8] Emitting module spm', + '[4/8] Compiling spm main.swift', + "Build of product 'spm' complete! (0.42s)", + ], + }, + }, + }; + + expect(normalizeStructuredEnvelope(envelope)).toEqual({ + schema: 'xcodebuildmcp.output.build-run-result', + schemaVersion: '1', + didError: false, + error: null, + data: { + output: { + stderr: [ + 'Building for debugging...', + '[] Compiling spm main.swift', + '[] Emitting module spm', + "Build of product 'spm' complete! ()", + ], + }, + }, + }); + }); + + it('normalizes volatile build settings entry values without dropping entries', () => { + const envelope: StructuredOutputEnvelope = { + schema: 'xcodebuildmcp.output.build-settings', + schemaVersion: '1', + didError: false, + error: null, + data: { + entries: [ + { key: 'ALTERNATE_OWNER', value: 'cameroncooke' }, + { key: 'ALTERNATE_GROUP', value: 'staff' }, + { key: 'CACHE_ROOT', value: '/var/folders/hash/C/com.apple.DeveloperTools/26.4/Xcode' }, + { key: 'GID', value: '20' }, + { key: 'TARGET_DEVICE_MODEL', value: 'iPhone17,2' }, + { key: 'TARGET_DEVICE_OS_VERSION', value: '26.4.2' }, + { + key: 'SDKROOT', + value: + '/Applications/Xcode-26.4.0.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS26.4.sdk', + }, + { + key: 'SDK_DIR_iphoneos26_4', + value: + '/Applications/Xcode-26.4.0.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS26.4.sdk', + }, + { key: 'SDK_NAME', value: 'iphoneos26.4' }, + { key: 'SDK_VERSION_ACTUAL', value: '260400' }, + { key: 'SDK_PRODUCT_BUILD_VERSION', value: '23E237' }, + { key: 'MAC_OS_X_VERSION_ACTUAL', value: '260301' }, + { key: 'MAC_OS_X_PRODUCT_BUILD_VERSION', value: '25D2128' }, + { + key: 'PLATFORM_DEVELOPER_APPLICATIONS_DIR', + value: '/Applications/Xcode-26.4.0.app/Contents/Developer/Applications', + }, + { + key: 'SDK_STAT_CACHE_PATH', + value: + '/Library/Developer/Xcode/DerivedData/SDKStatCaches.noindex/iphoneos26.4-23E237-c1e9.sdkstatcache', + }, + ], + }, + }; + + expect(normalizeStructuredEnvelope(envelope)).toEqual({ + schema: 'xcodebuildmcp.output.build-settings', + schemaVersion: '1', + didError: false, + error: null, + data: { + entries: [ + { key: 'ALTERNATE_OWNER', value: '' }, + { key: 'ALTERNATE_GROUP', value: '' }, + { key: 'CACHE_ROOT', value: '' }, + { key: 'GID', value: '' }, + { key: 'TARGET_DEVICE_MODEL', value: '' }, + { key: 'TARGET_DEVICE_OS_VERSION', value: '' }, + { key: 'SDKROOT', value: '' }, + { key: 'SDK_DIR_', value: '' }, + { key: 'SDK_NAME', value: '' }, + { key: 'SDK_VERSION_ACTUAL', value: '' }, + { key: 'SDK_PRODUCT_BUILD_VERSION', value: '' }, + { key: 'MAC_OS_X_VERSION_ACTUAL', value: '' }, + { key: 'MAC_OS_X_PRODUCT_BUILD_VERSION', value: '' }, + { + key: 'PLATFORM_DEVELOPER_APPLICATIONS_DIR', + value: '/Applications/Xcode-.app/Contents/Developer/Applications', + }, + { key: 'SDK_STAT_CACHE_PATH', value: '' }, + ], + }, + }); + }); + + it('compacts frame objects emitted with y before x', () => { + const envelope: StructuredOutputEnvelope = { + schema: 'xcodebuildmcp.output.ui-snapshot', + schemaVersion: '1', + didError: false, + error: null, + data: { + frame: { y: 2, x: 1, width: 3, height: 4 }, + }, + }; + + expect(formatStructuredEnvelopeFixture(envelope)).toContain( + '"frame": { "x": 1, "y": 2, "width": 3, "height": 4 }', + ); + }); + it('normalizes volatile build settings PATH entry values without dropping the entry', () => { const envelope: StructuredOutputEnvelope = { schema: 'xcodebuildmcp.output.build-settings', From 18193ec07d69676794f7dd0b188b3889fd68a6cc Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 10:52:46 +0100 Subject: [PATCH 27/35] fix(ui-automation): Address Warden review findings Remove internal validation detail from the changelog, preserve existing duration validation compatibility for button and key press actions, and simplify UI wait predicate evaluation for readability. Co-Authored-By: Codex --- CHANGELOG.md | 2 +- .../ui-automation/__tests__/button.test.ts | 6 +- .../ui-automation/__tests__/key_press.test.ts | 4 +- src/mcp/tools/ui-automation/button.ts | 3 +- src/mcp/tools/ui-automation/key_press.ts | 3 +- src/mcp/tools/ui-automation/wait_for_ui.ts | 70 +++++++++++++------ src/utils/renderers/domain-result-text.ts | 4 +- 7 files changed, 60 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d16353709..d13c8a56e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ - Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged. - Added `batch` for executing multiple AXe UI automation steps in one simulator session. - Added `wait_for_ui` for polling rs/1 runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. -- Added structured rs/1 element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions. Validated the flow with a source-built AXe binary and the full Weather/Safari Claude Code task, with no raw-string batch attempts. +- Added structured rs/1 element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions. ### Fixed diff --git a/src/mcp/tools/ui-automation/__tests__/button.test.ts b/src/mcp/tools/ui-automation/__tests__/button.test.ts index 0d672889c..c946a148e 100644 --- a/src/mcp/tools/ui-automation/__tests__/button.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/button.test.ts @@ -23,8 +23,8 @@ describe('Button Plugin', () => { expect(schemaObj.safeParse({ buttonType: 'home', duration: 2.5 }).success).toBe(true); expect(schemaObj.safeParse({ buttonType: 'invalid-button' }).success).toBe(false); expect(schemaObj.safeParse({ buttonType: 'home', duration: -1 }).success).toBe(false); - expect(schemaObj.safeParse({ buttonType: 'home', duration: 0 }).success).toBe(false); - expect(schemaObj.safeParse({ buttonType: 'home', duration: 10.1 }).success).toBe(false); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 0 }).success).toBe(true); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 10.1 }).success).toBe(true); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', @@ -285,7 +285,7 @@ describe('Button Plugin', () => { expect(result.isError).toBe(true); expect(allText(result)).toContain('Parameter validation failed'); - expect(allText(result)).toContain('Duration must be greater than 0 seconds'); + expect(allText(result)).toContain('Duration must be non-negative'); }); it('should return success for valid button press', async () => { diff --git a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts index bd7f28160..ebdd2909e 100644 --- a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts @@ -36,8 +36,8 @@ describe('Key Press Tool', () => { expect(schemaObj.safeParse({ keyCode: 'invalid' }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: -1 }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: 256 }).success).toBe(false); - expect(schemaObj.safeParse({ keyCode: 40, duration: 0 }).success).toBe(false); - expect(schemaObj.safeParse({ keyCode: 40, duration: 10.1 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCode: 40, duration: 0 }).success).toBe(true); + expect(schemaObj.safeParse({ keyCode: 40, duration: 10.1 }).success).toBe(true); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/button.ts b/src/mcp/tools/ui-automation/button.ts index 280ca0b07..e3d52e3c9 100644 --- a/src/mcp/tools/ui-automation/button.ts +++ b/src/mcp/tools/ui-automation/button.ts @@ -31,8 +31,7 @@ const buttonSchema = z.object({ .describe('apple-pay|home|lock|side-button|siri'), duration: z .number() - .positive({ message: 'Duration must be greater than 0 seconds' }) - .max(10, { message: 'Duration must be at most 10 seconds' }) + .min(0, { message: 'Duration must be non-negative' }) .optional() .describe('seconds'), }); diff --git a/src/mcp/tools/ui-automation/key_press.ts b/src/mcp/tools/ui-automation/key_press.ts index b851d198a..93fd89890 100644 --- a/src/mcp/tools/ui-automation/key_press.ts +++ b/src/mcp/tools/ui-automation/key_press.ts @@ -34,8 +34,7 @@ const keyPressSchema = z.object({ .describe('HID keycode. Common values: 40 Return/Enter, 42 Backspace, 43 Tab, 44 Space.'), duration: z .number() - .positive({ message: 'Duration must be greater than 0 seconds' }) - .max(10, { message: 'Duration must be at most 10 seconds' }) + .min(0, { message: 'Duration must be non-negative' }) .optional() .describe('seconds'), }); diff --git a/src/mcp/tools/ui-automation/wait_for_ui.ts b/src/mcp/tools/ui-automation/wait_for_ui.ts index e7a8579e3..3dc77b0bc 100644 --- a/src/mcp/tools/ui-automation/wait_for_ui.ts +++ b/src/mcp/tools/ui-automation/wait_for_ui.ts @@ -151,6 +151,11 @@ function defaultSleep(durationMs: number): Promise { }); } +type WaitPredicateEvaluation = + | ReturnType + | ReturnType + | ReturnType; + function createWaitMatch( predicate: WaitForUiParams['predicate'], matches: RuntimeElementV1[] | undefined, @@ -161,6 +166,42 @@ function createWaitMatch( return { predicate, matches }; } +function evaluateWaitPredicate(args: { + predicate: WaitForUiParams['predicate']; + selector: ResolvedWaitSelector | null; + snapshot: RuntimeSnapshotRecord; + text?: string; + nowMs: number; + settledDurationMs: number; + settledTracker: SettledTracker; +}): WaitPredicateEvaluation { + const { predicate, selector, snapshot, text, nowMs, settledDurationMs, settledTracker } = args; + + if (predicate === 'settled') { + return evaluateSettledPredicate({ + snapshot, + nowMs, + settledDurationMs, + tracker: settledTracker, + }); + } + + if (predicate === 'textContains' && !selector) { + return evaluateTextContainsPredicate({ snapshot, text: text! }); + } + + if (predicate === 'gone' && !selector && text) { + const textMatch = evaluateTextContainsPredicate({ snapshot, text }); + return { + matched: (textMatch.candidates ?? []).length === 0, + candidates: textMatch.candidates ?? [], + uiError: undefined, + }; + } + + return evaluateElementPredicate({ predicate, selector: selector!, snapshot, text }); +} + export function createWaitForUiExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, @@ -247,26 +288,15 @@ export function createWaitForUiExecutor( lastPollError = null; recordRuntimeSnapshot(snapshot); - const matched = - predicate === 'settled' - ? evaluateSettledPredicate({ - snapshot, - nowMs, - settledDurationMs, - tracker: settledTracker, - }) - : predicate === 'textContains' && !selector - ? evaluateTextContainsPredicate({ snapshot, text: text! }) - : predicate === 'gone' && !selector && text - ? (() => { - const textMatch = evaluateTextContainsPredicate({ snapshot, text }); - return { - matched: (textMatch.candidates ?? []).length === 0, - candidates: textMatch.candidates ?? [], - uiError: undefined, - }; - })() - : evaluateElementPredicate({ predicate, selector: selector!, snapshot, text }); + const matched = evaluateWaitPredicate({ + predicate, + selector, + snapshot, + text, + nowMs, + settledDurationMs, + settledTracker, + }); if (typeof matched === 'boolean') { if (matched) { diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index d036ac51a..3506d8c28 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -1487,12 +1487,12 @@ function createCaptureResultItems( } if (isRuntimeSnapshotUnchanged) { - const capture = result.capture as RuntimeSnapshotUnchangedV1; + const unchangedCapture = result.capture as RuntimeSnapshotUnchangedV1; items.push( ...createStandardDiagnosticSections(result.diagnostics), createStatus( 'success', - `Runtime UI snapshot unchanged (screenHash: ${capture.screenHash}, seq: ${capture.seq}).`, + `Runtime UI snapshot unchanged (screenHash: ${unchangedCapture.screenHash}, seq: ${unchangedCapture.seq}).`, ), ); return items; From 6cc93b2129eb52a34f796b20f543cd5c43640bf9 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 11:24:53 +0100 Subject: [PATCH 28/35] fix(ui-automation): Address Warden follow-up findings Move the type_text changelog entry to the added section and keep user-facing notes free of internal runtime schema names. Preserve AXe fallback diagnostics when command output is empty and simplify small helper paths flagged by Warden. Co-Authored-By: Codex --- CHANGELOG.md | 6 +-- .../ui-automation/shared/domain-result.ts | 2 +- .../ui-automation/shared/semantic-tap.ts | 51 +++++++++++-------- src/snapshot-tests/json-normalize.ts | 3 +- src/utils/responses/next-step-formatting.ts | 5 +- src/utils/structured-output-envelope.ts | 16 +++--- 6 files changed, 47 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d13c8a56e..0b6160bbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,9 @@ - Added `nextSteps` hint lines to MCP `structuredContent` and CLI `--output json` envelopes so agents can consume follow-up actions without scraping text. CLI JSON renders shell command lines; MCP structured content renders MCP tool-call hints. Structured result schemas that include `nextSteps` now use schema version 2; existing version 1 schema files remain available for current validators. - Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged. - Added `batch` for executing multiple AXe UI automation steps in one simulator session. -- Added `wait_for_ui` for polling rs/1 runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. -- Added structured rs/1 element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions. +- Added `wait_for_ui` for polling runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. +- Added structured element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions. +- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it. ### Fixed @@ -29,7 +30,6 @@ - Fixed `snapshot_ui` and `wait_for_ui` next steps so they use the resolved simulator ID instead of leaking `SIMULATOR_UUID` placeholders. - Fixed the Weather example app so saved-location rows are not reused as search-result rows after editing locations. - Fixed the Weather example app's current-location button so it selects the current saved location instead of appearing as a no-op UI automation target. -- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it. - Fixed `type_text` so AXe-unsupported international/accented characters fail before focusing the field, with a clear recoverable error instead of a generic typing failure. - Fixed `snapshot_ui` next-step guidance so the suggested tap ref prefers useful tappable controls over text fields, sheet grabbers, close buttons, and clear-search buttons. - Fixed compact runtime snapshot JSON so target ordering matches compact text output and prioritizes useful content targets before low-value sheet chrome. diff --git a/src/mcp/tools/ui-automation/shared/domain-result.ts b/src/mcp/tools/ui-automation/shared/domain-result.ts index 560709d2c..b1b9aa197 100644 --- a/src/mcp/tools/ui-automation/shared/domain-result.ts +++ b/src/mcp/tools/ui-automation/shared/domain-result.ts @@ -183,7 +183,7 @@ export function mapAxeCommandError( if (error instanceof AxeError) { return { message: messages.axeFailureMessage(error), - diagnostics: createDiagnostics([], compact([error.axeOutput ?? error.message])), + diagnostics: createDiagnostics([], compact([error.axeOutput || error.message])), }; } diff --git a/src/mcp/tools/ui-automation/shared/semantic-tap.ts b/src/mcp/tools/ui-automation/shared/semantic-tap.ts index 4c940c0ce..f3a92d9a2 100644 --- a/src/mcp/tools/ui-automation/shared/semantic-tap.ts +++ b/src/mcp/tools/ui-automation/shared/semantic-tap.ts @@ -64,13 +64,37 @@ function hasDuplicateSelectorMatch(params: { return matches.length > 1; } +function pickSemanticTapSelectorArgs(params: { + element: RuntimeSnapshotElementRecord; + elements: readonly RuntimeSnapshotElementRecord[]; + elementTypeArgs: readonly string[]; + extraArgs: readonly string[]; +}): string[] | null { + const { element, elements, elementTypeArgs, extraArgs } = params; + const { identifier, label, value } = element.publicElement; + + if (element.publicElement.role === 'switch') return null; + if ( + identifier && + !hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier }) + ) { + return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs]; + } + if (label && !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label })) { + return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs]; + } + if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) { + return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs]; + } + return null; +} + export function createSemanticTapCommand( element: RuntimeSnapshotElementRecord, elementRef: string, extraArgs: readonly string[] = [], elements: readonly RuntimeSnapshotElementRecord[] = [element], ): SemanticTapCommand { - const { identifier, label, value } = element.publicElement; const activationPoint = getRuntimeElementActivationPoint(element); const elementType = axeElementTypeFor(element); const elementTypeArgs = elementType ? ['--element-type', elementType] : []; @@ -87,25 +111,12 @@ export function createSemanticTapCommand( ] : ['tap', '-x', String(activationPoint.x), '-y', String(activationPoint.y), ...extraArgs]; - const selectorArgs = (() => { - if (element.publicElement.role === 'switch') return null; - if ( - identifier && - !hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier }) - ) { - return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs]; - } - if ( - label && - !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label }) - ) { - return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs]; - } - if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) { - return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs]; - } - return null; - })(); + const selectorArgs = pickSemanticTapSelectorArgs({ + element, + elements, + elementTypeArgs, + extraArgs, + }); return { selectorArgs, diff --git a/src/snapshot-tests/json-normalize.ts b/src/snapshot-tests/json-normalize.ts index f208ec805..f058aa8e6 100644 --- a/src/snapshot-tests/json-normalize.ts +++ b/src/snapshot-tests/json-normalize.ts @@ -249,7 +249,7 @@ function normalizeXcodeBridgeCallEnvelope( return envelope; } - const normalizedEnvelope: StructuredOutputEnvelope = { + return { ...envelope, data: { ...data, @@ -257,7 +257,6 @@ function normalizeXcodeBridgeCallEnvelope( ...(Object.hasOwn(data, 'structuredContent') ? { structuredContent: {} } : {}), }, }; - return normalizedEnvelope; } export function normalizeStructuredEnvelope( diff --git a/src/utils/responses/next-step-formatting.ts b/src/utils/responses/next-step-formatting.ts index 5e2084bc1..1031a2a6b 100644 --- a/src/utils/responses/next-step-formatting.ts +++ b/src/utils/responses/next-step-formatting.ts @@ -67,10 +67,7 @@ function formatNextStepForCli(step: NextStep): string { } function formatMcpValue(value: NextStepParamValue): string { - if (typeof value === 'string') { - return JSON.stringify(value); - } - if (typeof value === 'object' && value !== null) { + if (typeof value === 'string' || (typeof value === 'object' && value !== null)) { return JSON.stringify(value); } return String(value); diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts index 3e4b45a97..d1b681244 100644 --- a/src/utils/structured-output-envelope.ts +++ b/src/utils/structured-output-envelope.ts @@ -190,12 +190,16 @@ function compactRuntimeElementRow(element: RuntimeElementV1, action: string): st } function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNameV1 | 'none' { - return ( - (element.actions.includes('typeText') && 'typeText') || - (element.actions.includes('tap') && 'tap') || - (element.actions.includes('swipeWithin') && 'swipeWithin') || - 'none' - ); + if (element.actions.includes('typeText')) { + return 'typeText'; + } + if (element.actions.includes('tap')) { + return 'tap'; + } + if (element.actions.includes('swipeWithin')) { + return 'swipeWithin'; + } + return 'none'; } function isRuntimeTextSummaryElement(element: RuntimeElementV1): boolean { From f32bd2ea7d67e2e7a450850ede6869bc6528efec Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 12:07:28 +0100 Subject: [PATCH 29/35] fix(ui-automation): Address Warden contract feedback Preserve zero-value compatibility for gesture duration and delta validation while keeping negative values rejected. Remove internal runtime snapshot wording from the public swipe manifest. Keep touch success text stable when the optional event is omitted and simplify runtime snapshot control flow. Co-Authored-By: Codex --- manifests/tools/swipe.yaml | 2 +- .../ui-automation/__tests__/gesture.test.ts | 3 +- src/mcp/tools/ui-automation/gesture.ts | 4 +- .../ui-automation/shared/runtime-snapshot.ts | 49 +++++++++---------- src/utils/renderers/domain-result-text.ts | 2 +- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/manifests/tools/swipe.yaml b/manifests/tools/swipe.yaml index 6d0c00b03..2e6eb5d18 100644 --- a/manifests/tools/swipe.yaml +++ b/manifests/tools/swipe.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/swipe names: mcp: swipe cli: swipe -description: Swipe within a UI element by withinElementRef and direction from a current rs/1 runtime snapshot. +description: Swipe within a scrollable UI element using an element reference from the latest runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" diff --git a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts index 172cdf512..2eedc5d7a 100644 --- a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts @@ -40,7 +40,8 @@ describe('Gesture Plugin', () => { expect(schemaObj.safeParse({ preset: 'scroll-up', screenWidth: 2001 }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', screenHeight: 3001 }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', duration: -1 }).success).toBe(false); - expect(schemaObj.safeParse({ preset: 'scroll-up', duration: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', duration: 0 }).success).toBe(true); + expect(schemaObj.safeParse({ preset: 'scroll-up', delta: 0 }).success).toBe(true); expect(schemaObj.safeParse({ preset: 'scroll-up', delta: 201 }).success).toBe(false); const withSimId = schemaObj.safeParse({ diff --git a/src/mcp/tools/ui-automation/gesture.ts b/src/mcp/tools/ui-automation/gesture.ts index 01a6848d0..46a521197 100644 --- a/src/mcp/tools/ui-automation/gesture.ts +++ b/src/mcp/tools/ui-automation/gesture.ts @@ -67,13 +67,13 @@ const gestureSchema = z.object({ ), duration: z .number() - .positive({ message: 'Duration must be greater than 0 seconds' }) + .min(0, { message: 'Duration must be non-negative' }) .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('Duration of the gesture in seconds.'), delta: z .number() - .positive({ message: 'Delta must be greater than 0' }) + .min(0, { message: 'Delta must be non-negative' }) .max(200, { message: 'Delta must be at most 200' }) .optional() .describe('Distance to move in pixels.'), diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 9c6e54082..482b3cc92 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -533,21 +533,17 @@ export function extractAccessibilityHierarchy(responseText: string): Accessibili throw new RuntimeSnapshotParseError(`AXe describe-ui returned invalid JSON: ${message}`); } - const hierarchy = (() => { - if (Array.isArray(parsed)) { - return parsed as AccessibilityNode[]; - } - - if (isRecord(parsed) && Array.isArray(parsed.elements)) { - return parsed.elements as AccessibilityNode[]; - } + if (Array.isArray(parsed)) { + return parsed as AccessibilityNode[]; + } - throw new RuntimeSnapshotParseError( - 'AXe describe-ui did not return an accessibility element array.', - ); - })(); + if (isRecord(parsed) && Array.isArray(parsed.elements)) { + return parsed.elements as AccessibilityNode[]; + } - return hierarchy; + throw new RuntimeSnapshotParseError( + 'AXe describe-ui did not return an accessibility element array.', + ); } export function createRuntimeSnapshotRecord(params: { @@ -728,18 +724,21 @@ export function getRuntimeElementSwipePoints( const top = Math.round(frame.y + verticalInset); const bottom = Math.round(frame.y + frame.height - verticalInset); - const points = ((): { from: Point; to: Point } => { - switch (direction) { - case 'up': - return { from: { x: center.x, y: bottom }, to: { x: center.x, y: top } }; - case 'down': - return { from: { x: center.x, y: top }, to: { x: center.x, y: bottom } }; - case 'left': - return { from: { x: right, y: center.y }, to: { x: left, y: center.y } }; - case 'right': - return { from: { x: left, y: center.y }, to: { x: right, y: center.y } }; - } - })(); + let points: { from: Point; to: Point }; + switch (direction) { + case 'up': + points = { from: { x: center.x, y: bottom }, to: { x: center.x, y: top } }; + break; + case 'down': + points = { from: { x: center.x, y: top }, to: { x: center.x, y: bottom } }; + break; + case 'left': + points = { from: { x: right, y: center.y }, to: { x: left, y: center.y } }; + break; + case 'right': + points = { from: { x: left, y: center.y }, to: { x: right, y: center.y } }; + break; + } if (isDegenerateSwipe(points.from, points.to)) { return { diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index 3506d8c28..35f7ae8ef 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -2376,7 +2376,7 @@ function createSpecialCaseItems( break; } case 'touch': - successMessage = `Touch event (${result.action.event}) on elementRef ${result.action.elementRef} executed successfully.`; + successMessage = `Touch event (${result.action.event ?? 'touch'}) on elementRef ${result.action.elementRef} executed successfully.`; break; case 'long-press': successMessage = `Long press on elementRef ${result.action.elementRef} for ${result.action.durationMs}ms simulated successfully.`; From 2e45d06cd4828d077d20a7ee7654fee9830982a2 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 12:36:52 +0100 Subject: [PATCH 30/35] fix(ui-automation): Remove remaining public wording leaks Keep the swipe manifest description app-agnostic and free of runtime snapshot terminology. Use a neutral reserved email address in runtime snapshot fixtures to avoid personal-looking test data. Co-Authored-By: Codex --- manifests/tools/swipe.yaml | 2 +- .../tools/ui-automation/__tests__/runtime-snapshot.test.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/manifests/tools/swipe.yaml b/manifests/tools/swipe.yaml index 2e6eb5d18..962a4deeb 100644 --- a/manifests/tools/swipe.yaml +++ b/manifests/tools/swipe.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/swipe names: mcp: swipe cli: swipe -description: Swipe within a scrollable UI element using an element reference from the latest runtime snapshot. +description: Swipe within a scrollable UI element using a visible element reference from the current UI. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index 836dbe24c..5ff764fa2 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -30,7 +30,7 @@ describe('runtime snapshot normalization', () => { type: 'TextField', role: 'AXTextField', AXLabel: 'Email', - AXValue: 'cam@example.com', + AXValue: 'user@example.com', AXUniqueId: 'email-field', AXSelected: true, frame: { x: 20, y: 80, width: 220, height: 44 }, @@ -63,7 +63,7 @@ describe('runtime snapshot normalization', () => { ref: 'e2', role: 'text-field', label: 'Email', - value: 'cam@example.com', + value: 'user@example.com', identifier: 'email-field', frame: { x: 20, y: 80, width: 220, height: 44 }, state: { enabled: true, selected: true, visible: true }, From b4d7cf0280b96b3f6a34c80141c6cfa080caed53 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 13:19:00 +0100 Subject: [PATCH 31/35] fix(weather): Remove obsolete mock launch flag usage Remove the leftover mock weather launch argument from the Weather example README and UI tests. The app now uses deterministic bundled data directly, so tests and docs should not reintroduce the launch flag path. Co-Authored-By: Codex --- example_projects/Weather/README.md | 12 +++--------- .../Weather/WeatherUITests/WeatherUITests.swift | 1 - .../WeatherUITests/WeatherUITestsLaunchTests.swift | 1 - 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/example_projects/Weather/README.md b/example_projects/Weather/README.md index 6879347f4..6e7aa9325 100644 --- a/example_projects/Weather/README.md +++ b/example_projects/Weather/README.md @@ -2,20 +2,14 @@ Atmos Weather is a native SwiftUI weather app prototype for iOS. -## Launch with mock weather data +## Launch -Build and run the app with XcodeBuildMCP first: +Build and run the app with XcodeBuildMCP: ```bash ../../build/cli.js simulator build-and-run ``` -Then relaunch the installed app with the mock API argument: - -```bash -../../build/cli.js simulator launch-app --json '{"bundleId":"com.sentry.weather.Weather","launchArgs":["--mock-weather-api"]}' -``` - ## JSON fixtures Fixture JSON files live in: @@ -96,4 +90,4 @@ Run the app test suite through XcodeBuildMCP: ../../build/cli.js simulator test ``` -UI tests inject `--mock-weather-api` themselves so they do not depend on the production API endpoint. \ No newline at end of file +The app uses bundled deterministic weather data so UI tests do not depend on the production API endpoint. \ No newline at end of file diff --git a/example_projects/Weather/WeatherUITests/WeatherUITests.swift b/example_projects/Weather/WeatherUITests/WeatherUITests.swift index 75c98bb47..dcffc8058 100644 --- a/example_projects/Weather/WeatherUITests/WeatherUITests.swift +++ b/example_projects/Weather/WeatherUITests/WeatherUITests.swift @@ -69,7 +69,6 @@ final class WeatherUITests: XCTestCase { @MainActor private func launchApp() -> XCUIApplication { let app = XCUIApplication() - app.launchArguments.append("--mock-weather-api") app.launch() return app } diff --git a/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift b/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift index 9b7f410fc..75615e972 100644 --- a/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift +++ b/example_projects/Weather/WeatherUITests/WeatherUITestsLaunchTests.swift @@ -20,7 +20,6 @@ final class WeatherUITestsLaunchTests: XCTestCase { @MainActor func testLaunch() throws { let app = XCUIApplication() - app.launchArguments.append("--mock-weather-api") app.launch() // Insert steps here to perform after app launch but before taking a screenshot, From 34ff914a5b8cc34d0d0824e3acb03adae4358a6c Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 18:51:13 +0100 Subject: [PATCH 32/35] fix(ui-automation): Promote batch guidance for visible switches Surface batch as the preferred next step when runtime snapshots contain multiple visible switch controls. This keeps switches out of generic tap guidance while making same-screen settings changes discoverable at the point agents plan their actions. Also clarify MCP server and tool descriptions so agents know to prefer batch for multiple same-screen elementRef taps without intermediate assertions. Co-Authored-By: Codex --- manifests/tools/batch.yaml | 2 +- manifests/tools/tap.yaml | 2 +- .../__tests__/runtime-next-steps.test.ts | 52 +++++++++++++++++++ .../__tests__/snapshot_ui.test.ts | 28 ++++++++-- .../shared/runtime-next-steps.ts | 13 ++++- src/server/server.ts | 2 +- 6 files changed, 90 insertions(+), 9 deletions(-) diff --git a/manifests/tools/batch.yaml b/manifests/tools/batch.yaml index 9967e4669..4818d0e8b 100644 --- a/manifests/tools/batch.yaml +++ b/manifests/tools/batch.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/batch names: mcp: batch cli: batch -description: Execute structured rs/1 tap steps in one simulator session. Use refs from the latest snapshot_ui or wait_for_ui output, for example {"steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}. Do not pass raw AXe strings such as "tap e7". Omit preDelay/postDelay for switch elementRefs; switches execute as touch down/up steps and reject delays. +description: UI automation batch for multiple same-screen elementRef taps, especially visible settings switches that can be toggled without intermediate assertions. Use refs from the latest snapshot_ui or wait_for_ui output, for example {"steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}. Do not pass raw AXe strings such as "tap e7". Omit preDelay/postDelay for switch elementRefs; switches execute as touch down/up steps and reject delays. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: '2' diff --git a/manifests/tools/tap.yaml b/manifests/tools/tap.yaml index 33740afd7..9587e704c 100644 --- a/manifests/tools/tap.yaml +++ b/manifests/tools/tap.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/tap names: mcp: tap cli: tap -description: Tap one elementRef from the latest snapshot_ui or wait_for_ui output. Other same-screen refs may remain usable after success; refresh after navigation, scrolling, sheet changes, or obvious layout changes. +description: Tap one elementRef from the latest snapshot_ui or wait_for_ui output. For multiple same-screen taps or visible switch toggles with no intermediate assertion, prefer batch. Other same-screen refs may remain usable after success; refresh after navigation, scrolling, sheet changes, or obvious layout changes. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: '2' diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts index c7792735c..55875c4ea 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts @@ -139,6 +139,58 @@ describe('runtime snapshot next steps', () => { }); }); + it('promotes visible switches as a batch next step', () => { + recordSnapshot([ + createScrollView({ + AXIdentifier: 'settings.sheet', + children: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Atmospheric animations', + AXValue: '1', + }), + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Severe weather alerts', + AXValue: '1', + }), + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const switchRefs = snapshot.elements + .filter((element) => element.role === 'switch') + .map((element) => element.ref); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Batch visible switch toggles', + tool: 'batch', + params: { + simulatorId, + steps: switchRefs.slice(0, 2).map((elementRef) => ({ + action: 'tap', + elementRef, + })), + }, + }); + expect(steps.find((step) => step.tool === 'tap')).toBeUndefined(); + }); + it('uses hierarchy depth only as a foreground-root tie breaker', () => { recordSnapshot([ nestNode( diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index aa60f8e7d..e6a3f7240 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -550,7 +550,7 @@ describe('Snapshot UI Plugin', () => { ]); }); - it('should keep state-changing controls in targets without promoting them as generic next steps', async () => { + it('should keep state-changing controls out of generic tap guidance while promoting switch batches', async () => { const uiHierarchy = JSON.stringify({ elements: [ { @@ -590,7 +590,17 @@ describe('Snapshot UI Plugin', () => { ), ); - expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toEqual({ + label: 'Batch visible switch toggles', + tool: 'batch', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + steps: [ + { action: 'tap', elementRef: 'e1' }, + { action: 'tap', elementRef: 'e2' }, + ], + }, + }); expect(ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); const capture = @@ -603,7 +613,7 @@ describe('Snapshot UI Plugin', () => { expect(targets).toContainEqual(expect.objectContaining({ action: 'tap', elementRef: 'e2' })); }); - it('should not promote state-changing controls into batch or tap next-step guidance', async () => { + it('should promote visible switches into batch while keeping generic tap on content', async () => { const uiHierarchy = JSON.stringify({ elements: [ { @@ -673,7 +683,17 @@ describe('Snapshot UI Plugin', () => { ), ); - expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toBeUndefined(); + expect(ctx.nextSteps?.find((step) => step.tool === 'batch')).toEqual({ + label: 'Batch visible switch toggles', + tool: 'batch', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + steps: [ + { action: 'tap', elementRef: 'e3' }, + { action: 'tap', elementRef: 'e5' }, + ], + }, + }); expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ simulatorId: '12345678-1234-4234-8234-123456789012', elementRef: 'e4', diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index b9a31568a..e442f4591 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -319,6 +319,8 @@ function filterToForegroundElements( * is no useful tap, batch, or scroll action to try. * - Tap examples skip text fields, hidden controls, and state-changing controls to avoid destructive * generic suggestions. + * - Batch examples include multiple visible switches because settings screens often require several + * same-screen toggles and batch is the efficient, app-agnostic primitive for that workflow. * - Scroll examples currently use the first scrollable element left after foreground filtering. * - Refresh/wait examples are included for fresh snapshot captures, but not after every action. */ @@ -348,12 +350,19 @@ export function createRuntimeSnapshotNextSteps(params: { }) .map(({ element }) => element); const tapElement = tapElements[0] ?? null; - const batchElements = tapElements.filter( + const sameScreenBatchElements = tapElements.filter( (element) => !isContentRichTapNextStepElement(element) && !isScreenChangingTapNextStepElement(element) && !isLowPriorityTapNextStepElement(element.label), ); + const switchBatchElements = nextStepElements.filter( + (element) => element.role === 'switch' && element.actions.includes('tap'), + ); + const batchElements = + switchBatchElements.length >= 2 ? switchBatchElements : sameScreenBatchElements; + const batchLabel = + switchBatchElements.length >= 2 ? 'Batch visible switch toggles' : 'Batch same-screen taps'; const scrollElement = nextStepElements.find(isScrollableNextStepElement) ?? null; const scrollNextStep: NextStep | null = scrollElement ? { @@ -398,7 +407,7 @@ export function createRuntimeSnapshotNextSteps(params: { ...(batchElements.length >= 2 ? [ { - label: 'Batch same-screen taps', + label: batchLabel, tool: 'batch', params: { simulatorId: params.simulatorId, diff --git a/src/server/server.ts b/src/server/server.ts index 1e35e9cac..1de931b7d 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -42,7 +42,7 @@ Capabilities: - macOS workflows: Build, run, and test macOS applications - Log capture: Stream and capture logs from simulators and devices - LLDB debugging: Attach debugger, set breakpoints, inspect stack traces and variables, execute LLDB commands -- UI automation: Capture screenshots, inspect view hierarchy with coordinates, perform taps/swipes/gestures, type text, press hardware buttons +- UI automation: Capture screenshots, inspect runtime UI snapshots, perform taps/swipes/gestures, type text, press hardware buttons, and batch multiple same-screen elementRef taps - SwiftPM: Build, run, test, and manage Swift Package Manager projects - Project scaffolding: Generate new iOS/macOS project templates From 1ac5b32cafa4fb27658600abf1a3d371f2df4165 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 19:41:05 +0100 Subject: [PATCH 33/35] fix(ui-automation): Restore Weather benchmark scroll guidance Restore compact runtime snapshot text summaries and add guarded root viewport scroll inference so agents receive valid app-agnostic scroll refs when semantic content overflows but no better descendant scroll target is exposed. The external Claude Weather benchmark completed successfully with 17 total tool calls after this change, preserving the optimized loop without Weather-specific logic or coordinate fallback behavior. Co-Authored-By: Codex CLI --- .../__tests__/runtime-snapshot.test.ts | 135 ++++++++++++++-- .../__tests__/snapshot_ui.test.ts | 145 ++++++++++++++++++ .../shared/runtime-next-steps.ts | 5 +- .../ui-automation/shared/runtime-snapshot.ts | 87 ++++++++++- .../structured-output-envelope.test.ts | 24 +-- 5 files changed, 367 insertions(+), 29 deletions(-) diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index 5ff764fa2..61165e2e6 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -182,18 +182,24 @@ describe('runtime snapshot normalization', () => { ); }); - it('does not infer swipeWithin on top-level applications with overflowing descendants', () => { + it('infers swipeWithin on top-level application roots with semantic vertical overflow', () => { const root = createNode({ type: 'Application', role: 'AXApplication', - AXLabel: 'Weather', + AXLabel: 'Example', frame: { x: 0, y: 0, width: 390, height: 844 }, children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }), createNode({ type: 'StaticText', role: 'AXStaticText', - AXLabel: 'Updated just now', - frame: { x: 140, y: 1200, width: 120, height: 20 }, + AXLabel: 'Details available below', + frame: { x: 40, y: 920, width: 220, height: 24 }, }), ], }); @@ -208,29 +214,34 @@ describe('runtime snapshot normalization', () => { expect.objectContaining({ ref: 'e1', role: 'application', - label: 'Weather', - actions: [], + label: 'Example', + actions: ['swipeWithin'], }), ); - expect(snapshot.payload.actions).not.toContainEqual({ + expect(snapshot.payload.actions).toContainEqual({ action: 'swipeWithin', elementRef: 'e1', - label: 'Weather', + label: 'Example', + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toEqual({ + ok: true, + from: { x: 195, y: 717 }, + to: { x: 195, y: 127 }, }); }); - it('does not infer swipeWithin on top-level windows with overflowing descendants', () => { + it('infers swipeWithin on top-level windows with semantic vertical overflow', () => { const root = createNode({ type: 'Window', role: 'AXWindow', - AXLabel: 'Weather', + AXLabel: 'Example', frame: { x: 0, y: 0, width: 390, height: 844 }, children: [ createNode({ type: 'StaticText', role: 'AXStaticText', - AXLabel: 'Updated just now', - frame: { x: 140, y: 1200, width: 120, height: 20 }, + AXLabel: 'More content below', + frame: { x: 140, y: 920, width: 160, height: 24 }, }), ], }); @@ -245,8 +256,8 @@ describe('runtime snapshot normalization', () => { expect.objectContaining({ ref: 'e1', role: 'window', - label: 'Weather', - actions: [], + label: 'Example', + actions: ['swipeWithin'], }), ); }); @@ -275,6 +286,70 @@ describe('runtime snapshot normalization', () => { expect(snapshot.payload.elements[0]?.actions).toEqual([]); }); + it('does not infer root viewport swipeWithin from anonymous geometry-only overflow', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXIdentifier: undefined, + frame: { x: 20, y: 920, width: 240, height: 80 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).toEqual([]); + }); + + it('does not infer root viewport swipeWithin when a better descendant scroll target exists', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'app.contentPanel', + frame: { x: 0, y: 100, width: 390, height: 600 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Additional details below', + frame: { x: 40, y: 920, width: 220, height: 24 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).not.toContain('swipeWithin'); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'app.contentPanel', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + it('keeps sheet hosts swipeable when the current visible sheet content fits', () => { const root = createNode({ type: 'Application', @@ -620,6 +695,38 @@ describe('runtime snapshot normalization', () => { ); }); + it('classifies generic containers with scroll-view identifiers as scroll views', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXIdentifier: 'app.mainScrollView', + AXLabel: undefined, + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Visible child', + frame: { x: 20, y: 120, width: 120, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'app.mainScrollView', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + it('keeps an unlabeled other swipe target as fallback when no better scroll ref exists', () => { const snapshot = createRuntimeSnapshotRecord({ simulatorId, diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index e6a3f7240..f5c4eb91f 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -361,6 +361,151 @@ describe('Snapshot UI Plugin', () => { ]); }); + it('should include scroll guidance for generic containers with scroll-view identifiers', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Other', + role: 'AXGroup', + AXIdentifier: 'app.mainScrollView', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Visible content', + frame: { x: 20, y: 160, width: 140, height: 24 }, + }, + ], + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + AXIdentifier: 'app.settingsButton', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + const scrollElement = + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? capture.elements[0] + : undefined; + expect(scrollElement).toEqual( + expect.objectContaining({ + role: 'scroll-view', + identifier: 'app.mainScrollView', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e1', + direction: 'up', + distance: 0.5, + }); + }); + + it('should include root viewport scroll guidance for semantic vertical overflow', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + { + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Additional details below', + frame: { x: 40, y: 920, width: 220, height: 24 }, + }, + ], + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + const rootElement = + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? capture.elements[0] + : undefined; + expect(rootElement).toEqual( + expect.objectContaining({ + role: 'application', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + expect(ctx.nextSteps?.find((step) => step.tool === 'swipe')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + withinElementRef: 'e1', + direction: 'up', + distance: 0.5, + }); + expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ + 'snapshot_ui', + 'wait_for_ui', + 'swipe', + 'tap', + ]); + expect(ctx.nextSteps?.some((step) => step.tool === 'screenshot')).toBe(false); + }); + it('should include scroll guidance before screenshots when scrollable content is present', async () => { const uiHierarchy = JSON.stringify({ elements: [ diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index e442f4591..e1dc8ec49 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -132,7 +132,10 @@ function isScrollableNextStepElement(element: { actions: readonly string[]; role?: string; }): boolean { - return element.actions.includes('swipeWithin') && element.role === 'scroll-view'; + return ( + element.actions.includes('swipeWithin') && + (element.role === 'scroll-view' || element.role === 'application' || element.role === 'window') + ); } /** diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 482b3cc92..5c0c4d042 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -100,7 +100,14 @@ function readFrame(node: AccessibilityNode): Frame { ); } -function deriveRole(node: AccessibilityNode): RuntimeElementRoleV1 | undefined { +function hasScrollSemanticIdentifier(identifier: string | undefined): boolean { + return /(?:^|[._-])scroll(?:view|[-_.]view)?(?:$|[._-])|scrollView/i.test(identifier ?? ''); +} + +function deriveRole( + node: AccessibilityNode, + identifier: string | undefined, +): RuntimeElementRoleV1 | undefined { const roleText = [node.role, node.type, node.subrole, node.role_description] .map((value) => normalizeText(value)?.toLowerCase()) .filter((value): value is string => value !== undefined) @@ -124,6 +131,9 @@ function deriveRole(node: AccessibilityNode): RuntimeElementRoleV1 | undefined { if (/cell|row/.test(roleText)) return 'cell'; if (/scroll/.test(roleText)) return 'scroll-view'; if (/table|list|outline|collection/.test(roleText)) return 'list'; + if (hasScrollSemanticIdentifier(identifier) && /group|other|view|container/.test(roleText)) { + return 'scroll-view'; + } if (/(^|\b|ax)tab(\b|group|$)/.test(roleText)) return 'tab'; return 'other'; } @@ -253,10 +263,10 @@ function normalizeNode(input: NormalizedNodeInput, index: number): RuntimeSnapsh const { node, path, depth } = input; const ref = `e${index + 1}`; const frame = readFrame(node); - const role = deriveRole(node); const label = readText(node, ['AXLabel', 'title', 'help', 'label']); const value = readText(node, ['AXValue', 'value']); const identifier = readText(node, ['AXUniqueId', 'AXIdentifier', 'identifier', 'id']); + const role = deriveRole(node, identifier); const enabled = node.enabled !== false; const customActions = normalizeCustomActions(node.custom_actions); const actions = deriveActions({ @@ -326,6 +336,62 @@ function frameOverflowsContainer(frame: Frame, containerFrame: Frame): boolean { ); } +function frameVerticallyOverflowsContainer(frame: Frame, containerFrame: Frame): boolean { + const tolerance = 8; + return ( + frame.y < containerFrame.y - tolerance || + frame.y + frame.height > containerFrame.y + containerFrame.height + tolerance + ); +} + +function hasPublicSemanticIdentity(element: RuntimeElementV1): boolean { + return ( + element.label !== undefined || + element.value !== undefined || + (element.identifier !== undefined && !isGenericInternalIdentifier(element.identifier)) + ); +} + +function isTopLevelViewportElement(element: RuntimeSnapshotElementRecord): boolean { + const { role } = element.publicElement; + return (role === 'application' || role === 'window') && !element.metadata.path.includes('.'); +} + +function hasSemanticVerticalOverflowingDescendant( + element: RuntimeSnapshotElementRecord, + elements: RuntimeSnapshotElementRecord[], +): boolean { + return elements.some((candidate) => { + if ( + candidate === element || + !isDescendantPath(element.metadata.path, candidate.metadata.path) + ) { + return false; + } + return ( + hasPublicSemanticIdentity(candidate.publicElement) && + isVisible(candidate.publicElement.frame) && + frameVerticallyOverflowsContainer(candidate.publicElement.frame, element.publicElement.frame) + ); + }); +} + +function hasPreferredDescendantSwipeTarget( + element: RuntimeSnapshotElementRecord, + elements: RuntimeSnapshotElementRecord[], +): boolean { + return elements.some( + (candidate) => + candidate !== element && + isDescendantPath(element.metadata.path, candidate.metadata.path) && + isPreferredSwipeTarget(candidate), + ); +} + +function createViewportSwipeFrame(viewportFrame: Frame): Frame { + return normalizeFrame(viewportFrame); +} + function isSheetGrabberElement(element: RuntimeSnapshotElementRecord): boolean { return element.publicElement.label?.toLowerCase() === 'sheet grabber'; } @@ -452,6 +518,23 @@ function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): vo } } + for (const element of elements) { + const { publicElement, metadata } = element; + if ( + !isTopLevelViewportElement(element) || + publicElement.state?.visible === false || + !isVisible(publicElement.frame) || + publicElement.actions.includes('swipeWithin') || + hasPreferredDescendantSwipeTarget(element, elements) || + !hasSemanticVerticalOverflowingDescendant(element, elements) + ) { + continue; + } + + publicElement.actions.push('swipeWithin'); + metadata.swipeFrame = createViewportSwipeFrame(publicElement.frame); + } + pruneGenericFallbackSwipeTargets(elements); } diff --git a/src/utils/__tests__/structured-output-envelope.test.ts b/src/utils/__tests__/structured-output-envelope.test.ts index 1e7ff263e..e8f1a4688 100644 --- a/src/utils/__tests__/structured-output-envelope.test.ts +++ b/src/utils/__tests__/structured-output-envelope.test.ts @@ -64,8 +64,8 @@ describe('toStructuredEnvelope', () => { { ref: 'e2', role: 'button', - label: 'San Francisco', - identifier: 'weather.locationButton', + label: 'Overview', + identifier: 'app.primaryButton', frame: { x: 12, y: 81, width: 178, height: 33 }, actions: ['tap', 'longPress', 'touch'], }, @@ -83,30 +83,30 @@ describe('toStructuredEnvelope', () => { { ref: 'e1', role: 'application', - label: 'Weather', + label: 'Example', frame: { x: 0, y: 0, width: 390, height: 844 }, actions: ['swipeWithin'], }, { ref: 'e2', role: 'button', - label: 'San Francisco', - identifier: 'weather.locationButton', + label: 'Overview', + identifier: 'app.primaryButton', frame: { x: 12, y: 81, width: 178, height: 33 }, actions: ['tap', 'longPress', 'touch'], }, { ref: 'e3', role: 'text', - label: '10.7 mm', + label: 'Current reading', frame: { x: 24, y: 140, width: 80, height: 24 }, state: { visible: true }, actions: ['longPress', 'touch'], }, ], actions: [ - { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, - { action: 'tap', elementRef: 'e2', label: 'San Francisco' }, + { action: 'swipeWithin', elementRef: 'e1', label: 'Example' }, + { action: 'tap', elementRef: 'e2', label: 'Overview' }, ], }, }; @@ -125,14 +125,14 @@ describe('toStructuredEnvelope', () => { screenHash: 'screen-one', seq: 1, count: 3, - targets: ['e2|tap|button|San Francisco||weather.locationButton'], - scroll: ['e1|swipe|application|Weather||'], - text: ['e3|text|text|10.7 mm||'], + targets: ['e2|tap|button|Overview||app.primaryButton'], + scroll: ['e1|swipe|application|Example||'], + text: ['e3|text|text|Current reading||'], udid: 'SIMULATOR-1', }, waitMatch: { predicate: 'exists', - matches: ['e2|tap|button|San Francisco||weather.locationButton'], + matches: ['e2|tap|button|Overview||app.primaryButton'], }, }, }); From f42dc2b7d48cc0fe0de457640cf23e0644fcb387 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Tue, 19 May 2026 23:53:31 +0100 Subject: [PATCH 34/35] fix(ui-automation): Restore sheet scroll guidance Expose actionable sheet scroll targets and make swipe distance control the computed stroke length. This keeps Claude from falling back to generic app root scrolling when a sheet/list is the real scroll target. Ignore local validation artifacts so benchmark transcripts stay out of the working tree while remaining preserved on disk. Co-Authored-By: Codex CLI --- .gitignore | 1 + CHANGELOG.md | 1 + manifests/tools/swipe.yaml | 2 +- .../__tests__/runtime-next-steps.test.ts | 173 ++++++++++++++++++ .../__tests__/runtime-snapshot.test.ts | 39 +++- .../ui-automation/__tests__/swipe.test.ts | 77 +++++++- .../shared/runtime-next-steps.ts | 98 +++++++++- .../ui-automation/shared/runtime-snapshot.ts | 73 ++++++-- src/mcp/tools/ui-automation/swipe.ts | 12 +- 9 files changed, 439 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index 53ff28412..fce89463a 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,4 @@ DerivedData /.pr-learning /repros /.xcodebuildmcp +/out.nosync diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b6160bbb..75d95757b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ ### Fixed +- Fixed `swipe` distance handling so distance is a normalized stroke fraction used for endpoint calculation, and improved sheet/list scroll guidance so real descendant scroll containers are preferred over application/window root fallbacks. - Fixed compact runtime snapshots so top-level app and window refs are not advertised as swipe targets just because a generic descendant overflows their frame. - Fixed `wait_for_ui` focus waits so elements that do not expose focus state return a typed recoverable error instead of timing out. - Fixed invalid `touch` calls so structured output no longer reports a fake touch event when neither `down` nor `up` was requested. diff --git a/manifests/tools/swipe.yaml b/manifests/tools/swipe.yaml index 962a4deeb..b916d65c4 100644 --- a/manifests/tools/swipe.yaml +++ b/manifests/tools/swipe.yaml @@ -3,7 +3,7 @@ module: mcp/tools/ui-automation/swipe names: mcp: swipe cli: swipe -description: Swipe within a scrollable UI element using a visible element reference from the current UI. +description: Swipe within a scrollable UI element using a visible element reference from the current UI. Optional distance is a normalized stroke fraction greater than 0 and up to 1. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts index 55875c4ea..0ffb54fcc 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts @@ -105,6 +105,179 @@ describe('runtime snapshot next steps', () => { }); }); + it('prefers an identified sheet list over background scroll views in flattened sheets', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 110, width: 390, height: 210 }, + children: [ + createNode({ AXLabel: 'Now', frame: { x: 20, y: 130, width: 80, height: 40 } }), + ], + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.locationsSheet', + frame: { x: 0, y: 360, width: 390, height: 484 }, + children: [ + createNode({ AXLabel: 'Close', frame: { x: 320, y: 370, width: 44, height: 44 } }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'London', + frame: { x: 20, y: 430, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom', + AXValue: 'saved', + frame: { x: 20, y: 500, width: 350, height: 88 }, + }), + ], + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const sheetListRef = snapshot.elements.find( + (element) => element.identifier === 'example.locationsSheet', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: sheetListRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + + it('prefers a foreground sheet list over application root sheet scrolling', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 157, y: 300, width: 76, height: 8 }, + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.sheetList', + frame: { x: 0, y: 320, width: 390, height: 524 }, + children: [ + createNode({ AXLabel: 'Close', frame: { x: 320, y: 340, width: 44, height: 44 } }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 390, width: 300, height: 44 }, + }), + ], + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const rootRef = snapshot.elements.find((element) => element.role === 'application')?.ref; + const listRef = snapshot.elements.find( + (element) => element.identifier === 'example.sheetList', + )?.ref; + + expect(rootRef).toBeDefined(); + expect(listRef).toBeDefined(); + expect(snapshot.elements.find((element) => element.ref === rootRef)?.actions).not.toContain( + 'swipeWithin', + ); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: listRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + + it('prefers a vertical list over a small horizontal scroll view for upward scroll guidance', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.horizontalScroller', + frame: { x: 20, y: 100, width: 350, height: 120 }, + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.verticalList', + frame: { x: 0, y: 240, width: 390, height: 520 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const verticalListRef = snapshot.elements.find( + (element) => element.identifier === 'example.verticalList', + )?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps).toContainEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: verticalListRef, + direction: 'up', + distance: 0.5, + }, + }); + }); + it('keeps unselected tabs available as screen-changing tap suggestions', () => { recordSnapshot([ createNode({ diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index 61165e2e6..bcb9d9ef0 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -354,7 +354,7 @@ describe('runtime snapshot normalization', () => { const root = createNode({ type: 'Application', role: 'AXApplication', - AXLabel: 'Weather', + AXLabel: 'Example', frame: { x: 0, y: 0, width: 402, height: 874 }, children: [ createNode({ @@ -384,14 +384,14 @@ describe('runtime snapshot normalization', () => { expect.objectContaining({ ref: 'e1', role: 'application', - label: 'Weather', + label: 'Example', actions: ['swipeWithin'], }), ); expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'down')).toEqual({ ok: true, - from: { x: 201, y: 372 }, - to: { x: 201, y: 677 }, + from: { x: 201, y: 273 }, + to: { x: 201, y: 732 }, }); }); @@ -399,7 +399,7 @@ describe('runtime snapshot normalization', () => { const root = createNode({ type: 'Application', role: 'AXApplication', - AXLabel: 'Weather', + AXLabel: 'Example', frame: { x: 0, y: 0, width: 390, height: 844 }, children: [ createNode({ @@ -420,8 +420,8 @@ describe('runtime snapshot normalization', () => { expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toEqual({ ok: true, - from: { x: 195, y: 693 }, - to: { x: 195, y: 621 }, + from: { x: 195, y: 778 }, + to: { x: 195, y: 666 }, }); }); @@ -827,6 +827,31 @@ describe('runtime snapshot normalization', () => { expect(getRuntimeElementActivationPoint(snapshot.elements[0]!)).toEqual({ x: 307, y: 903 }); }); + it('uses normalized distance to shorten swipe strokes within safe endpoints', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up', 0.5)).toEqual({ + ok: true, + from: { x: 100, y: 270 }, + to: { x: 100, y: 130 }, + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up', 0.8)).toEqual({ + ok: true, + from: { x: 100, y: 312 }, + to: { x: 100, y: 88 }, + }); + }); + it('keeps full-screen swipe points away from unsafe viewport edges', () => { const snapshot = createRuntimeSnapshotRecord({ simulatorId, diff --git a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts index 88234e1a0..37cbfc143 100644 --- a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts @@ -57,7 +57,7 @@ describe('Swipe Tool', () => { withinElementRef: 'e1', direction: 'down', duration: 1.5, - distance: 10, + distance: 0.5, preDelay: 0.5, postDelay: 0.25, }).success, @@ -68,6 +68,10 @@ describe('Swipe Tool', () => { expect( schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', distance: 0 }).success, ).toBe(false); + expect( + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', distance: 1.1 }) + .success, + ).toBe(false); expect( schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', preDelay: 10.1 }) .success, @@ -117,7 +121,7 @@ describe('Swipe Tool', () => { ]); }); - it('preserves optional AXe swipe flags', async () => { + it('preserves optional AXe swipe flags without forwarding distance as AXe delta', async () => { recordSnapshot([ createNode({ type: 'ScrollView', @@ -133,7 +137,7 @@ describe('Swipe Tool', () => { withinElementRef: 'e1', direction: 'right', duration: 2, - distance: 10, + distance: 0.5, preDelay: 0.5, postDelay: 0.25, }, @@ -144,25 +148,23 @@ describe('Swipe Tool', () => { type: 'swipe', withinElementRef: 'e1', direction: 'right', - from: { x: 30, y: 200 }, - to: { x: 170, y: 200 }, + from: { x: 65, y: 200 }, + to: { x: 135, y: 200 }, durationSeconds: 2, }); expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', - '30', + '65', '--start-y', '200', '--end-x', - '170', + '135', '--end-y', '200', '--duration', '2', - '--delta', - '10', '--pre-delay', '0.5', '--post-delay', @@ -171,6 +173,63 @@ describe('Swipe Tool', () => { simulatorId, ]); }); + + it('uses distance as a normalized stroke fraction for endpoint calculation', async () => { + const { calls, executor } = createTrackingExecutor(); + + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up', distance: 0.5 }, + executor, + ); + + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up', distance: 0.8 }, + executor, + ); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'swipe', + '--start-x', + '100', + '--start-y', + '270', + '--end-x', + '100', + '--end-y', + '130', + '--udid', + simulatorId, + ]); + expect(calls[2]?.command).toEqual([ + '/mocked/axe/path', + 'swipe', + '--start-x', + '100', + '--start-y', + '312', + '--end-x', + '100', + '--end-y', + '88', + '--udid', + simulatorId, + ]); + }); }); describe('Resolution failures', () => { diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index e1dc8ec49..0b8509f19 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -128,16 +128,95 @@ function getTapNextStepElementPriority(element: { return 20; } +function hasScrollSemanticIdentity(element: { + label?: string; + value?: string; + identifier?: string; +}): boolean { + return ( + element.label !== undefined || element.value !== undefined || element.identifier !== undefined + ); +} + function isScrollableNextStepElement(element: { actions: readonly string[]; role?: string; + label?: string; + value?: string; + identifier?: string; }): boolean { return ( element.actions.includes('swipeWithin') && - (element.role === 'scroll-view' || element.role === 'application' || element.role === 'window') + (element.role === 'scroll-view' || + element.role === 'list' || + element.role === 'application' || + element.role === 'window' || + (element.role === 'other' && hasScrollSemanticIdentity(element))) ); } +function getScrollRolePriority(element: RuntimeElementV1): number { + switch (element.role) { + case 'scroll-view': + case 'list': + return 0; + case 'other': + return 1; + case 'application': + case 'window': + return 2; + default: + return 3; + } +} + +function getScrollIdentityPriority(element: { + label?: string; + value?: string; + identifier?: string; +}): number { + const identifier = compactTapNextStepText(element.identifier).toLowerCase(); + if (/(?:^|[._-])(sheet|list|table|panel|drawer|overlay|dialog)(?:$|[._-])/i.test(identifier)) { + return 0; + } + return hasScrollSemanticIdentity(element) ? 1 : 2; +} + +function compareScrollableNextStepCandidates( + left: { element: RuntimeElementV1; index: number }, + right: { element: RuntimeElementV1; index: number }, + recordsByRef: Map, +): number { + const roleDelta = getScrollRolePriority(left.element) - getScrollRolePriority(right.element); + if (roleDelta !== 0) { + return roleDelta; + } + + const identityDelta = + getScrollIdentityPriority(left.element) - getScrollIdentityPriority(right.element); + if (identityDelta !== 0) { + return identityDelta; + } + + const leftDepth = recordsByRef.get(left.element.ref)?.metadata.depth ?? 0; + const rightDepth = recordsByRef.get(right.element.ref)?.metadata.depth ?? 0; + if (leftDepth !== rightDepth) { + return rightDepth - leftDepth; + } + + const leftIsVertical = left.element.frame.height >= left.element.frame.width; + const rightIsVertical = right.element.frame.height >= right.element.frame.width; + if (leftIsVertical !== rightIsVertical) { + return leftIsVertical ? -1 : 1; + } + + if (left.element.frame.height !== right.element.frame.height) { + return right.element.frame.height - left.element.frame.height; + } + + return left.index - right.index; +} + /** * Checks AX hierarchy ancestry using the snapshot metadata path. * @@ -267,10 +346,17 @@ function findActiveForegroundRoot( return 0; } + const element = record.publicElement; + const rolePriority = Math.max(0, 3 - getScrollRolePriority(element)); + const identityPriority = Math.max(0, 2 - getScrollIdentityPriority(element)); + const verticalPriority = element.frame.height >= element.frame.width ? 1 : 0; const score = (hasDismissControl ? 100 : 0) + (hasTextEntry ? 60 : 0) + (hasStateControls ? 30 : 0) + + rolePriority + + identityPriority + + verticalPriority + record.metadata.depth / 1000 + (indexByRef.get(record.publicElement.ref) ?? 0) / 1_000_000; scoreByRef.set(record.publicElement.ref, score); @@ -324,7 +410,8 @@ function filterToForegroundElements( * generic suggestions. * - Batch examples include multiple visible switches because settings screens often require several * same-screen toggles and batch is the efficient, app-agnostic primitive for that workflow. - * - Scroll examples currently use the first scrollable element left after foreground filtering. + * - Scroll examples prefer real list/scroll-view targets, then semantic inferred containers, with + * application/window root scrolling used last as a fallback. * - Refresh/wait examples are included for fresh snapshot captures, but not after every action. */ export function createRuntimeSnapshotNextSteps(params: { @@ -366,7 +453,12 @@ export function createRuntimeSnapshotNextSteps(params: { switchBatchElements.length >= 2 ? switchBatchElements : sameScreenBatchElements; const batchLabel = switchBatchElements.length >= 2 ? 'Batch visible switch toggles' : 'Batch same-screen taps'; - const scrollElement = nextStepElements.find(isScrollableNextStepElement) ?? null; + const scrollElement = + nextStepElements + .map((element, index) => ({ element, index })) + .filter(({ element }) => isScrollableNextStepElement(element)) + .sort((left, right) => compareScrollableNextStepCandidates(left, right, recordsByRef))[0] + ?.element ?? null; const scrollNextStep: NextStep | null = scrollElement ? { label: 'Scroll visible content', diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 5c0c4d042..6fa216bb1 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -410,16 +410,34 @@ function findSheetGrabberDescendant( ); } -function createSheetSwipeFrame(containerFrame: Frame, grabberFrame: Frame): Frame { +function clamp(value: number, minimum: number, maximum: number): number { + return Math.min(Math.max(value, minimum), maximum); +} + +function createSheetSwipeFrame( + containerFrame: Frame, + grabberFrame: Frame, + descendantFrames: readonly Frame[], +): Frame { const minimumHeight = Math.min(120, Math.max(2, containerFrame.height * 0.3)); - const bottom = Math.round(containerFrame.y + containerFrame.height * 0.85); - const preferredTop = Math.round( - Math.max( - grabberFrame.y + grabberFrame.height + 120, - containerFrame.y + containerFrame.height * 0.35, - ), + const containerBottom = containerFrame.y + containerFrame.height; + const topMargin = Math.min(24, Math.max(8, containerFrame.height * 0.02)); + const topFloor = grabberFrame.y + grabberFrame.height + topMargin; + const defaultTop = Math.max(topFloor, containerFrame.y + containerFrame.height * 0.2); + const defaultBottom = containerFrame.y + containerFrame.height * 0.95; + const contentFrames = descendantFrames.filter( + (frame) => + isVisible(frame) && + framesIntersect(frame, containerFrame) && + frame.y + frame.height / 2 >= topFloor, ); - const top = Math.round(Math.min(preferredTop, bottom - minimumHeight)); + const contentTop = Math.min(...contentFrames.map((frame) => frame.y)); + const contentBottom = Math.max(...contentFrames.map((frame) => frame.y + frame.height)); + const preferredTop = contentFrames.length > 0 ? Math.min(defaultTop, contentTop) : defaultTop; + const preferredBottom = + contentFrames.length > 0 ? Math.max(defaultBottom, contentBottom) : defaultBottom; + const bottom = Math.round(clamp(preferredBottom, topFloor + minimumHeight, containerBottom)); + const top = Math.round(clamp(preferredTop, topFloor, bottom - minimumHeight)); return normalizeFrame({ x: containerFrame.x, @@ -501,10 +519,22 @@ function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): vo : null; if (sheetGrabber) { + if (hasPreferredDescendantSwipeTarget(element, elements)) { + continue; + } + publicElement.actions.push('swipeWithin'); metadata.swipeFrame = createSheetSwipeFrame( publicElement.frame, sheetGrabber.publicElement.frame, + elements + .filter( + (candidate) => + candidate !== element && + candidate !== sheetGrabber && + isDescendantPath(metadata.path, candidate.metadata.path), + ) + .map((candidate) => candidate.publicElement.frame), ); continue; } @@ -790,6 +820,7 @@ function getRuntimeSwipeCenter( export function getRuntimeElementSwipePoints( element: RuntimeSnapshotElementRecord, direction: RuntimeSwipeDirection, + distance = 1, ): RuntimeSwipePointResolution { const frame = element.metadata.swipeFrame ?? element.publicElement.frame; if (frame.width < 2 || frame.height < 2) { @@ -807,19 +838,37 @@ export function getRuntimeElementSwipePoints( const top = Math.round(frame.y + verticalInset); const bottom = Math.round(frame.y + frame.height - verticalInset); + const strokeFraction = clamp(distance, 0, 1); + const horizontalCenter = (left + right) / 2; + const verticalCenter = (top + bottom) / 2; + const horizontalHalfStroke = ((right - left) * strokeFraction) / 2; + const verticalHalfStroke = ((bottom - top) * strokeFraction) / 2; + let points: { from: Point; to: Point }; switch (direction) { case 'up': - points = { from: { x: center.x, y: bottom }, to: { x: center.x, y: top } }; + points = { + from: { x: center.x, y: Math.round(verticalCenter + verticalHalfStroke) }, + to: { x: center.x, y: Math.round(verticalCenter - verticalHalfStroke) }, + }; break; case 'down': - points = { from: { x: center.x, y: top }, to: { x: center.x, y: bottom } }; + points = { + from: { x: center.x, y: Math.round(verticalCenter - verticalHalfStroke) }, + to: { x: center.x, y: Math.round(verticalCenter + verticalHalfStroke) }, + }; break; case 'left': - points = { from: { x: right, y: center.y }, to: { x: left, y: center.y } }; + points = { + from: { x: Math.round(horizontalCenter + horizontalHalfStroke), y: center.y }, + to: { x: Math.round(horizontalCenter - horizontalHalfStroke), y: center.y }, + }; break; case 'right': - points = { from: { x: left, y: center.y }, to: { x: right, y: center.y } }; + points = { + from: { x: Math.round(horizontalCenter - horizontalHalfStroke), y: center.y }, + to: { x: Math.round(horizontalCenter + horizontalHalfStroke), y: center.y }, + }; break; } diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index 7d6085f01..ebf14e85c 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -43,7 +43,12 @@ const swipeSchema = z.object({ .positive({ message: 'Duration must be greater than 0 seconds' }) .optional() .describe('seconds'), - distance: z.number().positive({ message: 'Distance must be greater than 0' }).optional(), + distance: z + .number() + .positive({ message: 'Distance must be greater than 0' }) + .max(1, { message: 'Distance must be at most 1' }) + .optional() + .describe('Normalized stroke fraction greater than 0 and up to 1'), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) @@ -88,7 +93,7 @@ export function createSwipeExecutor( }); } - const points = getRuntimeElementSwipePoints(resolution.element, direction); + const points = getRuntimeElementSwipePoints(resolution.element, direction, distance); if (!points.ok) { const uiError = createUiAutomationRecoverableError({ code: 'TARGET_NOT_ACTIONABLE', @@ -129,9 +134,6 @@ export function createSwipeExecutor( if (duration !== undefined) { commandArgs.push('--duration', String(duration)); } - if (distance !== undefined) { - commandArgs.push('--delta', String(distance)); - } if (preDelay !== undefined) { commandArgs.push('--pre-delay', String(preDelay)); } From 9a52e697be0693c5872c22a747c18c0b2617a683 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Wed, 20 May 2026 16:33:06 +0100 Subject: [PATCH 35/35] fix(ui-automation): Restore efficient sheet navigation Add an app-agnostic drag tool so agents can expand foreground sheets and scroll exposed sheet/list content without relying on synthetic swipe refs. Preserve useful semantic evidence outside callable target rows, and tighten next-step guidance so saved-but-not-selected rows do not invite no-op taps. Clarify batch and type_text tool descriptions to steer agents toward valid strict schemas and avoid wasted retry calls. Co-Authored-By: Codex --- CHANGELOG.md | 5 + manifests/tools/batch.yaml | 3 +- manifests/tools/drag.yaml | 17 + manifests/tools/type_text.yaml | 3 +- manifests/workflows/ui-automation.yaml | 1 + .../2.schema.json | 5 + .../2.schema.json | 19 + src/cli/register-tool-commands.ts | 4 + .../ui-automation/__tests__/drag.test.ts | 238 +++++++++ .../__tests__/runtime-next-steps.test.ts | 312 +++++++++++ .../__tests__/runtime-snapshot.test.ts | 232 ++++++++- .../__tests__/snapshot_ui.test.ts | 2 +- ...n-incomplete-completion-next-steps.test.ts | 266 ++++++++++ .../ui-action-no-op-next-steps.test.ts | 485 ++++++++++++++++++ .../ui-action-no-op-swipe-next-steps.test.ts | 134 +++++ .../__tests__/wait_for_ui.test.ts | 17 +- src/mcp/tools/ui-automation/batch.ts | 10 +- src/mcp/tools/ui-automation/drag.ts | 244 +++++++++ .../ui-automation/shared/domain-result.ts | 132 ++++- .../shared/runtime-next-steps.ts | 283 +++++++++- .../ui-automation/shared/runtime-snapshot.ts | 122 ++--- src/mcp/tools/ui-automation/swipe.ts | 1 + src/mcp/tools/ui-automation/tap.ts | 1 + src/mcp/tools/ui-automation/type_text.ts | 9 +- src/rendering/types.ts | 3 + src/types/domain-results.ts | 10 + .../structured-output-envelope.test.ts | 63 +++ .../__tests__/cli-text-renderer.test.ts | 54 ++ src/utils/renderers/domain-result-text.ts | 91 +++- src/utils/structured-output-envelope.ts | 72 ++- src/utils/tool-registry.ts | 5 + 31 files changed, 2716 insertions(+), 127 deletions(-) create mode 100644 manifests/tools/drag.yaml create mode 100644 src/mcp/tools/ui-automation/__tests__/drag.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/ui-action-incomplete-completion-next-steps.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/ui-action-no-op-next-steps.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/ui-action-no-op-swipe-next-steps.test.ts create mode 100644 src/mcp/tools/ui-automation/drag.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 75d95757b..d1c58fb5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ - Added `wait_for_ui` for polling runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. - Added structured element-ref `batch` tap steps, preserved same-screen refs after successful `tap` and `batch` actions, and improved UI automation guidance and next steps for one-observation interactions. - Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it. +- Added `drag` for element-ref based drag gestures, enabling agents to expand foreground sheets and drag real scroll/list regions without raw coordinate guesses. + +### Changed + +- Runtime snapshot guidance no longer advertises synthetic sheet swipe targets for foreground sheets. Agents should use real sheet grabber expansion and real descendant scroll/list targets with `drag` instead of inferred app/window-root sheet swipes. ### Fixed diff --git a/manifests/tools/batch.yaml b/manifests/tools/batch.yaml index 4818d0e8b..699eab740 100644 --- a/manifests/tools/batch.yaml +++ b/manifests/tools/batch.yaml @@ -3,7 +3,8 @@ module: mcp/tools/ui-automation/batch names: mcp: batch cli: batch -description: UI automation batch for multiple same-screen elementRef taps, especially visible settings switches that can be toggled without intermediate assertions. Use refs from the latest snapshot_ui or wait_for_ui output, for example {"steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}. Do not pass raw AXe strings such as "tap e7". Omit preDelay/postDelay for switch elementRefs; switches execute as touch down/up steps and reject delays. +description: >- + UI automation batch for multiple same-screen elementRef taps, especially visible settings switches that can be toggled without intermediate assertions. The input key is steps, never commands, and each step is an object such as {"action":"tap","elementRef":"e1"}; do not pass raw command strings. Use refs from the latest snapshot_ui or wait_for_ui output, for example {"steps":[{"action":"tap","elementRef":"e1"},{"action":"tap","elementRef":"e2"}]}. Omit preDelay/postDelay for switch elementRefs; switches execute as touch down/up steps and reject delays. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: '2' diff --git a/manifests/tools/drag.yaml b/manifests/tools/drag.yaml new file mode 100644 index 000000000..3ed9078dc --- /dev/null +++ b/manifests/tools/drag.yaml @@ -0,0 +1,17 @@ +id: drag +module: mcp/tools/ui-automation/drag +names: + mcp: drag + cli: drag +description: >- + Drag from a visible runtime elementRef in a direction, then return a refreshed runtime UI snapshot. Use this for exposed sheet grabbers or real scroll/list content refs when nextSteps suggests dragging; do not use raw screen coordinates. +outputSchema: + schema: xcodebuildmcp.output.ui-action-result + version: "2" +routing: + stateful: true +annotations: + title: Drag + readOnlyHint: true + destructiveHint: false + openWorldHint: false diff --git a/manifests/tools/type_text.yaml b/manifests/tools/type_text.yaml index 86c825668..c2cd64ec7 100644 --- a/manifests/tools/type_text.yaml +++ b/manifests/tools/type_text.yaml @@ -3,7 +3,8 @@ module: mcp/tools/ui-automation/type_text names: mcp: type_text cli: type-text -description: Type text into a UI element by elementRef from a current rs/1 runtime snapshot, optionally replacing existing field contents. +description: >- + Type text into a UI element by elementRef from a current rs/1 runtime snapshot, optionally replacing existing field contents. elementRef is required; do not call with only text. Example input: {"elementRef":"e8","text":"London","replaceExisting":true}. outputSchema: schema: xcodebuildmcp.output.ui-action-result version: "2" diff --git a/manifests/workflows/ui-automation.yaml b/manifests/workflows/ui-automation.yaml index 6d8dd1f26..0dcde9f50 100644 --- a/manifests/workflows/ui-automation.yaml +++ b/manifests/workflows/ui-automation.yaml @@ -10,6 +10,7 @@ tools: - touch - long_press - swipe + - drag - gesture - button - key_press diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json index 7528ddd14..8d25e8028 100644 --- a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -148,6 +148,11 @@ "type": "array", "items": { "type": "string" } }, + "evidence": { + "description": "Non-actionable semantic evidence rows in role|label|value|identifier format. These rows intentionally omit element refs.", + "type": "array", + "items": { "type": "string" } + }, "udid": { "type": "string" } }, "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json index 053c8a6d5..1a6ad06f5 100644 --- a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -91,6 +91,11 @@ "type": "array", "items": { "type": "string" } }, + "evidence": { + "description": "Non-actionable semantic evidence rows in role|label|value|identifier format. These rows intentionally omit element refs.", + "type": "array", + "items": { "type": "string" } + }, "udid": { "type": "string" } }, "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] @@ -208,6 +213,20 @@ }, "required": ["type"] }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "drag" }, + "elementRef": { "type": "string" }, + "direction": { "$ref": "#/$defs/direction" }, + "from": { "$ref": "#/$defs/point" }, + "to": { "$ref": "#/$defs/point" }, + "durationSeconds": { "type": "number", "minimum": 0 }, + "steps": { "type": "integer", "minimum": 1 } + }, + "required": ["type", "elementRef", "direction"] + }, { "type": "object", "additionalProperties": false, diff --git a/src/cli/register-tool-commands.ts b/src/cli/register-tool-commands.ts index fa5fde6c4..62aeaf273 100644 --- a/src/cli/register-tool-commands.ts +++ b/src/cli/register-tool-commands.ts @@ -102,6 +102,7 @@ function writeJsonOutput( options: { outputStyle: OutputStyle; verbose?: boolean }, ): boolean { const { structuredOutput } = handlerContext; + const suppressedTargetRefs = structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs; const envelope = structuredOutput ? toStructuredEnvelope( structuredOutput.result, @@ -112,6 +113,9 @@ function writeJsonOutput( nextStepRuntime: session.getNextStepsRuntime?.(), outputStyle: options.outputStyle, runtimeSnapshot: options.verbose ? 'full' : 'compact', + ...(suppressedTargetRefs + ? { runtimeSnapshotSuppressedTargetRefs: suppressedTargetRefs } + : {}), }, ) : toStructuredEnvelope( diff --git a/src/mcp/tools/ui-automation/__tests__/drag.test.ts b/src/mcp/tools/ui-automation/__tests__/drag.test.ts new file mode 100644 index 000000000..876cd81bd --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/drag.test.ts @@ -0,0 +1,238 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import * as z from 'zod'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { callHandler, createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { dragLogic, handler, schema } from '../drag.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runDrag( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => dragLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} + +describe('Drag Tool', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes elementRef and direction without raw coordinate fields', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('direction'); + expect(schema).not.toHaveProperty('startX'); + expect(schema).not.toHaveProperty('startY'); + expect(schema).not.toHaveProperty('endX'); + expect(schema).not.toHaveProperty('endY'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ elementRef: 'e1', direction: 'up' }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', direction: 'diagonal' }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ direction: 'up' }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(false); + expect( + schemaObject.safeParse({ + elementRef: 'e1', + direction: 'down', + duration: 1.5, + distance: 0.5, + steps: 80, + preDelay: 0.5, + postDelay: 0.25, + }).success, + ).toBe(true); + expect( + schemaObject.safeParse({ elementRef: 'e1', direction: 'down', duration: 0 }).success, + ).toBe(false); + expect( + schemaObject.safeParse({ elementRef: 'e1', direction: 'down', distance: 0 }).success, + ).toBe(false); + expect( + schemaObject.safeParse({ elementRef: 'e1', direction: 'down', steps: 0 }).success, + ).toBe(false); + }); + }); + + describe('Command Generation', () => { + it('derives a viewport-relative upward drag from a sheet grabber', async () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Half screen', + frame: { x: 182, y: 446, width: 76, height: 24 }, + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag( + { simulatorId, elementRef: 'e2', direction: 'up', distance: 0.35 }, + executor, + ); + + expect(result).toMatchObject({ + didError: false, + action: { + type: 'drag', + elementRef: 'e2', + direction: 'up', + from: { x: 220, y: 458 }, + to: { x: 220, y: 123 }, + }, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'drag', + '--start-x', + '220', + '--start-y', + '458', + '--end-x', + '220', + '--end-y', + '123', + '--udid', + simulatorId, + ]); + }); + + it('uses within-element scroll points for scrollable drag targets', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 20, y: 255, width: 400, height: 637 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag( + { + simulatorId, + elementRef: 'e1', + direction: 'up', + distance: 0.7, + duration: 0.8, + steps: 80, + postDelay: 0.5, + }, + executor, + ); + + expect(result.action).toMatchObject({ + type: 'drag', + elementRef: 'e1', + direction: 'up', + from: { x: 220, y: 729 }, + to: { x: 220, y: 418 }, + durationSeconds: 0.8, + steps: 80, + }); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'drag', + '--start-x', + '220', + '--start-y', + '729', + '--end-x', + '220', + '--end-y', + '418', + '--duration', + '0.8', + '--steps', + '80', + '--post-delay', + '0.5', + '--udid', + simulatorId, + ]); + }); + }); + + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag({ simulatorId, elementRef: 'e1', direction: 'up' }, executor); + + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); + }); + + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runDrag({ simulatorId, elementRef: 'e404', direction: 'up' }, executor); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); + }); + }); + + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await callHandler(handler, { elementRef: 'e1', direction: 'up' }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [createNode()], + }), + ]); + + const result = await runDrag( + { simulatorId, elementRef: 'e2', direction: 'up' }, + createFailingExecutor('drag failed'), + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e2', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts index 0ffb54fcc..6980f93a1 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-next-steps.test.ts @@ -1,5 +1,6 @@ import { beforeEach, describe, expect, it } from 'vitest'; import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import type { RuntimeSnapshotV1 } from '../../../../types/ui-snapshot.ts'; import { createRuntimeSnapshotNextSteps } from '../shared/runtime-next-steps.ts'; import { __resetRuntimeSnapshotStoreForTests, @@ -105,6 +106,68 @@ describe('runtime snapshot next steps', () => { }); }); + it('prioritizes real scrolling over low-information chrome taps', () => { + const snapshot: RuntimeSnapshotV1 = { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId, + screenHash: 'scrollable-main', + seq: 1, + capturedAtMs: 0, + expiresAtMs: 1, + actions: [], + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Example', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'Location', + identifier: 'example.locationButton', + frame: { x: 20, y: 70, width: 120, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + identifier: 'example.settingsButton', + frame: { x: 320, y: 70, width: 44, height: 44 }, + actions: ['tap'], + }, + ], + }; + const scrollRef = 'e1'; + const locationRef = 'e2'; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId, + withinElementRef: scrollRef, + direction: 'up', + distance: 0.5, + }, + }); + expect(steps).toContainEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: locationRef }, + }); + }); + it('prefers an identified sheet list over background scroll views in flattened sheets', () => { recordSnapshot([ createNode({ @@ -231,6 +294,255 @@ describe('runtime snapshot next steps', () => { }); }); + it('does not suggest synthetic sheet scrolling when no real sheet scroll target exists', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.backgroundScroll', + frame: { x: 0, y: 80, width: 402, height: 260 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Locations', + AXIdentifier: 'example.locationsSheet', + frame: { x: 148, y: 104, width: 106, height: 32 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Edit', + AXIdentifier: 'example.locationsSheet', + frame: { x: 24, y: 96, width: 60, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: undefined, + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + AXLabel: 'Use current location', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 218, width: 362, height: 54 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'MY LOCATIONS ¡ 7', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 292, width: 160, height: 20 }, + }), + createNode({ + AXLabel: 'San Francisco, 1:24 PM ¡ Cloudy', + frame: { x: 20, y: 326, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 415, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Aspen, 2:24 PM ¡ Light Snow', + frame: { x: 20, y: 504, width: 362, height: 72 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const rootRef = snapshot.elements.find((element) => element.role === 'application')?.ref; + + expect(rootRef).toBeDefined(); + expect( + snapshot.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.elements.find((element) => element.ref === rootRef)?.actions).not.toContain( + 'swipeWithin', + ); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps.some((step) => step.tool === 'swipe')).toBe(false); + }); + + it('suggests expanding a collapsed foreground sheet via its real grabber', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Half screen', + frame: { x: 182, y: 446, width: 76, height: 24 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.sheet', + frame: { x: 374, y: 478, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search', + AXIdentifier: 'example.sheet', + frame: { x: 20, y: 518, width: 400, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Use current location', + AXIdentifier: 'example.sheet', + frame: { x: 20, y: 580, width: 400, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'First visible row', + frame: { x: 20, y: 650, width: 400, height: 72 }, + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const grabberRef = snapshot.elements.find((element) => element.label === 'Sheet Grabber')?.ref; + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Expand foreground sheet', + tool: 'drag', + params: { + simulatorId, + elementRef: grabberRef, + direction: 'up', + distance: 0.35, + duration: 0.8, + steps: 80, + postDelay: 0.8, + }, + }); + expect(steps.some((step) => step.tool === 'swipe')).toBe(false); + expect(steps.some((step) => step.tool === 'batch')).toBe(false); + }); + + it('prefers composite dragging real foreground sheet scroll content after expansion', () => { + recordSnapshot([ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Expanded', + frame: { x: 182, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 255, width: 400, height: 637 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Edit', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 374, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Use current location', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 240, width: 400, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'San Francisco, 1:24 PM ¡ Mostly Sunny', + frame: { x: 20, y: 326, width: 400, height: 72 }, + }), + ], + }), + ], + }), + ]); + + const snapshot = currentRuntimeSnapshot(); + const sheetScrollRef = snapshot.elements.find( + (element) => element.identifier === 'example.locationsSheet', + )?.ref; + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Drag visible sheet content', + tool: 'drag', + params: { + simulatorId, + elementRef: sheetScrollRef, + direction: 'up', + distance: 0.7, + duration: 0.8, + steps: 80, + postDelay: 0.5, + }, + }); + }); + it('prefers a vertical list over a small horizontal scroll view for upward scroll guidance', () => { recordSnapshot([ createNode({ diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts index bcb9d9ef0..cb0561d48 100644 --- a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -6,6 +6,7 @@ import { getPrimaryRuntimeElement, parseRuntimeSnapshotResponse, getRuntimeElementActivationPoint, + getRuntimeElementDirectionalDragPoints, getRuntimeElementSwipePoints, RuntimeSnapshotParseError, } from '../shared/runtime-snapshot.ts'; @@ -350,7 +351,7 @@ describe('runtime snapshot normalization', () => { ); }); - it('keeps sheet hosts swipeable when the current visible sheet content fits', () => { + it('does not synthesize a foreground sheet scroll region without a real scroll descendant', () => { const root = createNode({ type: 'Application', role: 'AXApplication', @@ -385,17 +386,188 @@ describe('runtime snapshot normalization', () => { ref: 'e1', role: 'application', label: 'Example', - actions: ['swipeWithin'], + actions: [], }), ); - expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'down')).toEqual({ - ok: true, - from: { x: 201, y: 273 }, - to: { x: 201, y: 732 }, + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.payload.actions.some((action) => action.action === 'swipeWithin')).toBe(false); + }); + + it('does not synthesize a locations sheet scroll region over tappable rows', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + AXIdentifier: 'example.backgroundScroll', + frame: { x: 0, y: 80, width: 402, height: 260 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Locations', + AXIdentifier: 'example.locationsSheet', + frame: { x: 148, y: 104, width: 106, height: 32 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Use current location', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 218, width: 362, height: 54 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'MY LOCATIONS ¡ 7', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 292, width: 160, height: 20 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'San Francisco, 1:24 PM ¡ Cloudy', + frame: { x: 20, y: 326, width: 362, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 415, width: 362, height: 72 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Body note', + frame: { x: 20, y: 600, width: 362, height: 44 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.payload.elements[0]?.actions).not.toContain('swipeWithin'); + expect(snapshot.payload.elements.find((element) => element.role === 'scroll-view')).toEqual( + expect.objectContaining({ + identifier: 'example.backgroundScroll', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('does not advertise synthetic scrolling for live-shaped locations sheets', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 182, y: 360, width: 76, height: 25 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Locations', + AXIdentifier: 'example.locationsSheet', + frame: { x: 168, y: 408, width: 106, height: 32 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 374, y: 400, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 450, width: 400, height: 44 }, + }), + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'MY LOCATIONS ¡ 8', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 566, width: 160, height: 20 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'MY LOCATION, San Francisco, 1:24 PM ¡ Mostly Sunny', + frame: { x: 20, y: 596, width: 400, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM ¡ Light Rain', + frame: { x: 20, y: 686, width: 400, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Aspen, 2:24 PM ¡ Light Snow', + frame: { x: 20, y: 776, width: 400, height: 72 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, }); + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); + expect(snapshot.payload.elements[0]?.actions).not.toContain('swipeWithin'); + expect(snapshot.payload.actions.some((action) => action.action === 'swipeWithin')).toBe(false); }); - it('keeps sheet host swipe frames non-degenerate when the grabber is near the bottom', () => { + it('does not synthesize sheet host swipe frames when the grabber is near the bottom', () => { const root = createNode({ type: 'Application', role: 'AXApplication', @@ -418,11 +590,12 @@ describe('runtime snapshot normalization', () => { nowMs: 1_000, }); - expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toEqual({ - ok: true, - from: { x: 195, y: 778 }, - to: { x: 195, y: 666 }, - }); + expect(snapshot.payload.elements[0]?.actions).toEqual([]); + expect( + snapshot.payload.elements.find( + (element) => element.identifier === 'xcodebuildmcp.inferred.sheet-content', + ), + ).toBeUndefined(); }); it('removes actions from elements outside the viewport', () => { @@ -852,6 +1025,41 @@ describe('runtime snapshot normalization', () => { }); }); + it('uses viewport-relative directional drag points for small chrome targets', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 440, height: 956 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 182, y: 446, width: 76, height: 24 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect( + getRuntimeElementDirectionalDragPoints( + snapshot.elements[1]!, + 'up', + 0.35, + snapshot.elements[0]!.publicElement.frame, + ), + ).toEqual({ + ok: true, + from: { x: 220, y: 458 }, + to: { x: 220, y: 123 }, + }); + }); + it('keeps full-screen swipe points away from unsafe viewport edges', () => { const snapshot = createRuntimeSnapshotRecord({ simulatorId, diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index f5c4eb91f..ff17cf752 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -553,8 +553,8 @@ describe('Snapshot UI Plugin', () => { expect(ctx.nextSteps?.map((step) => step.tool)).toEqual([ 'snapshot_ui', 'wait_for_ui', - 'tap', 'swipe', + 'tap', ]); }); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-incomplete-completion-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-incomplete-completion-next-steps.test.ts new file mode 100644 index 000000000..52d207c13 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-incomplete-completion-next-steps.test.ts @@ -0,0 +1,266 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { + AccessibilityNode, + UiActionResultDomainResult, +} from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { toStructuredEnvelope } from '../../../../utils/structured-output-envelope.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { createCaptureSuccessResult } from '../shared/domain-result.ts'; +import { + createRuntimeSnapshotNextSteps, + getForegroundCompletionSuppressedRuntimeTargetRefs, +} from '../shared/runtime-next-steps.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { tapLogic } from '../tap.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, +} from './ui-action-test-helpers.ts'; + +const simulatorId = '57F882E8-F858-4F57-98D4-8164D5915C43'; + +function createSearchResultBeforeCompletionNodes(): AccessibilityNode[] { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.searchSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Result query', + AXIdentifier: 'example.searchSheet', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'Example result, detail text', + AXValue: 'not saved', + frame: { x: 20, y: 218, width: 280, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Add', + AXIdentifier: 'example.searchSheet', + frame: { x: 322, y: 232, width: 60, height: 44 }, + }), + ], + }), + ]; +} + +function createMixedCompletionSheetNodes(): AccessibilityNode[] { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.searchSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + AXLabel: 'Existing result, detail text', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 280, height: 72 }, + }), + createNode({ + AXLabel: 'New result, detail text', + AXValue: 'not saved', + frame: { x: 20, y: 306, width: 280, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Add', + AXIdentifier: 'example.searchSheet', + frame: { x: 322, y: 320, width: 60, height: 44 }, + }), + ], + }), + ]; +} + +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + +function currentSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!; +} + +function sameSearchResultExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { + success: true, + output: JSON.stringify({ elements: createSearchResultBeforeCompletionNodes() }), + }, + ]).executor; +} + +function compactCaptureList( + envelope: ReturnType, + key: 'targets' | 'text' | 'evidence', +): string[] { + const data = envelope.data; + if (!data || typeof data !== 'object' || !('capture' in data)) { + throw new Error('Expected structured output capture.'); + } + + const capture = (data as { capture?: unknown }).capture; + if (!capture || typeof capture !== 'object' || !(key in capture)) { + return []; + } + + const entries = (capture as Record)[key]; + if (!Array.isArray(entries)) { + throw new Error(`Expected compact runtime snapshot ${key} array.`); + } + + return entries.filter((entry): entry is string => typeof entry === 'string'); +} + +function compactTargets(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'targets'); +} + +function compactText(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'text'); +} + +function compactEvidence(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'evidence'); +} + +describe('UI action incomplete completion next steps', () => { + beforeEach(() => { + sessionStore.clear(); + }); + + it('prefers Add when foreground completion rows contain mixed complete and incomplete states', () => { + recordSnapshot(createMixedCompletionSheetNodes()); + const snapshot = currentSnapshot().payload; + const addRef = snapshot.elements.find((element) => element.label === 'Add')?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + const savedRef = snapshot.elements.find((element) => element.value === 'saved')?.ref; + const notSavedRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + expect(addRef).toBeDefined(); + expect(closeRef).toBeDefined(); + expect(savedRef).toBeDefined(); + expect(notSavedRef).toBeDefined(); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + const suppressedRefs = getForegroundCompletionSuppressedRuntimeTargetRefs({ + simulatorId, + runtimeSnapshot: snapshot, + }); + + expect(steps[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: addRef }, + }); + expect(steps[0]?.params?.elementRef).not.toBe(closeRef); + expect(suppressedRefs).toEqual([notSavedRef]); + expect(suppressedRefs).not.toContain(savedRef); + }); + + it('keeps ordinary unsuppressed rows actionable in compact targets', () => { + recordSnapshot(createSearchResultBeforeCompletionNodes()); + const snapshot = currentSnapshot().payload; + const rowRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + expect(rowRef).toBeDefined(); + + const result = createCaptureSuccessResult(simulatorId, { capture: snapshot }); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2'); + + expect(compactTargets(envelope).some((target) => target.startsWith(`${rowRef}|tap|`))).toBe( + true, + ); + }); + + it('does not repeat a no-op incomplete foreground row tap and prefers Add', async () => { + recordSnapshot(createSearchResultBeforeCompletionNodes()); + const snapshot = currentSnapshot().payload; + const rowRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + const addRef = snapshot.elements.find((element) => element.label === 'Add')?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + expect(rowRef).toBeDefined(); + expect(addRef).toBeDefined(); + expect(closeRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + tapLogic( + { simulatorId, elementRef: rowRef! }, + sameSearchResultExecutor(), + createMockAxeHelpers(), + ), + ); + + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.ui-action-result', '2', { + nextSteps: ctx.nextSteps, + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + + expect(ctx.nextSteps?.[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: addRef }, + }); + expect(ctx.nextSteps?.[0]?.params?.elementRef).not.toBe(closeRef); + expect(compactTargets(envelope).some((target) => target.startsWith(`${rowRef}|tap|`))).toBe( + false, + ); + expect(compactTargets(envelope).some((target) => target.startsWith(`${addRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('not saved'))).toBe(false); + const notSavedEvidenceLine = compactEvidence(envelope).find((line) => + line.includes('not saved'), + ); + expect(notSavedEvidenceLine).toBeDefined(); + expect(notSavedEvidenceLine?.startsWith(`${rowRef}|`)).toBe(false); + expect(notSavedEvidenceLine?.split('|')).toHaveLength(4); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-next-steps.test.ts new file mode 100644 index 000000000..f3a0b2055 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-next-steps.test.ts @@ -0,0 +1,485 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { + AccessibilityNode, + UiActionResultDomainResult, +} from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { toStructuredEnvelope } from '../../../../utils/structured-output-envelope.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + setUiActionStructuredOutput, + setCaptureStructuredOutput, + createUiActionSuccessResult, + createCaptureSuccessResult, +} from '../shared/domain-result.ts'; +import { createRuntimeSnapshotNextSteps } from '../shared/runtime-next-steps.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { tapLogic } from '../tap.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, +} from './ui-action-test-helpers.ts'; + +const simulatorId = '9A9F6BF3-A1F8-4AC7-8B32-37EDC7F4F511'; + +function createLocationsSheetNodes() { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + AXLabel: 'Background, Details', + AXIdentifier: 'example.backgroundCard', + frame: { x: 20, y: 120, width: 362, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Edit', + AXIdentifier: 'example.locationsSheet', + frame: { x: 24, y: 96, width: 60, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: undefined, + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Portland, 1:24 PM · Light Rain', + frame: { x: 20, y: 326, width: 362, height: 72 }, + }), + createNode({ + AXLabel: 'Aspen, 2:24 PM · Light Snow', + frame: { x: 20, y: 415, width: 362, height: 72 }, + }), + ], + }), + ]; +} + +function createSearchResultBeforeAddNodes() { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Clear search', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 150, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'London', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom, 9:24 PM · Light Rain', + AXValue: 'not saved', + frame: { x: 20, y: 218, width: 280, height: 72 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Add', + AXIdentifier: 'example.locationsSheet', + frame: { x: 322, y: 232, width: 60, height: 44 }, + }), + ], + }), + ]; +} + +function createSavedSearchResultSheetNodes() { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Clear search', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 150, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'London', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 300, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom, 9:24 PM · Light Rain', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 362, height: 72 }, + }), + ], + }), + ]; +} + +function currentSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!; +} + +function compactCaptureList( + envelope: ReturnType, + key: 'targets' | 'text' | 'evidence', +): string[] { + const data = envelope.data; + if (!data || typeof data !== 'object' || !('capture' in data)) { + throw new Error('Expected structured output capture.'); + } + + const capture = (data as { capture?: unknown }).capture; + if (!capture || typeof capture !== 'object' || !(key in capture)) { + return []; + } + + const entries = (capture as Record)[key]; + if (!Array.isArray(entries)) { + throw new Error(`Expected compact runtime snapshot ${key} array.`); + } + + return entries.filter((entry): entry is string => typeof entry === 'string'); +} + +function compactTargets(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'targets'); +} + +function compactText(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'text'); +} + +function compactEvidence(envelope: ReturnType): string[] { + return compactCaptureList(envelope, 'evidence'); +} + +function sameSheetExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { success: true, output: JSON.stringify({ elements: createLocationsSheetNodes() }) }, + ]).executor; +} + +function addSearchResultExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { + success: true, + output: JSON.stringify({ elements: createSavedSearchResultSheetNodes() }), + }, + ]).executor; +} + +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + +describe('UI action no-op next steps', () => { + beforeEach(() => { + sessionStore.clear(); + }); + + it('filters background taps when a foreground sheet is active', () => { + recordSnapshot(createLocationsSheetNodes()); + const snapshot = currentSnapshot().payload; + const backgroundRef = snapshot.elements.find( + (element) => element.identifier === 'example.backgroundCard', + )?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(backgroundRef).toBeDefined(); + expect(closeRef).toBeDefined(); + expect(steps[0]?.tool).toBe('tap'); + expect(steps[0]?.params?.elementRef).not.toBe(backgroundRef); + expect( + steps.some((step) => step.tool === 'tap' && step.params?.elementRef === backgroundRef), + ).toBe(false); + }); + + it('prefers Add over a not-saved foreground-sheet result row', () => { + recordSnapshot(createSearchResultBeforeAddNodes()); + const snapshot = currentSnapshot().payload; + const addRef = snapshot.elements.find((element) => element.label === 'Add')?.ref; + const rowRef = snapshot.elements.find((element) => element.value === 'not saved')?.ref; + expect(addRef).toBeDefined(); + expect(rowRef).toBeDefined(); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + + expect(steps[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: addRef }, + }); + expect(steps.some((step) => step.tool === 'tap' && step.params?.elementRef === rowRef)).toBe( + false, + ); + + const { ctx } = createMockToolHandlerContext(); + const result = createCaptureSuccessResult(simulatorId, { capture: snapshot }); + setCaptureStructuredOutput(ctx, result); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + + expect(snapshot.elements.find((element) => element.ref === rowRef)?.actions).toContain('tap'); + expect(compactTargets(envelope).some((target) => target.startsWith(`${rowRef}|tap|`))).toBe( + false, + ); + expect(compactTargets(envelope).some((target) => target.startsWith(`${addRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('not saved'))).toBe(false); + const notSavedEvidenceLine = compactEvidence(envelope).find((line) => + line.includes('not saved'), + ); + expect(notSavedEvidenceLine).toBeDefined(); + expect(notSavedEvidenceLine?.startsWith(`${rowRef}|`)).toBe(false); + expect(notSavedEvidenceLine?.split('|')).toHaveLength(4); + }); + + it('keeps completed foreground-sheet rows actionable in regular snapshot affordances', () => { + recordSnapshot(createSavedSearchResultSheetNodes()); + const snapshot = currentSnapshot().payload; + const savedRowRef = snapshot.elements.find((element) => element.value === 'saved')?.ref; + const closeRef = snapshot.elements.find((element) => element.label === 'Close')?.ref; + const clearSearchRef = snapshot.elements.find( + (element) => element.label === 'Clear search', + )?.ref; + expect(savedRowRef).toBeDefined(); + expect(closeRef).toBeDefined(); + expect(clearSearchRef).toBeDefined(); + + const steps = createRuntimeSnapshotNextSteps({ + simulatorId, + runtimeSnapshot: snapshot, + includeRefreshAndWait: false, + }); + expect(steps[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: savedRowRef }, + }); + + const { ctx } = createMockToolHandlerContext(); + const result = createCaptureSuccessResult(simulatorId, { capture: snapshot }); + setCaptureStructuredOutput(ctx, result); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + + expect(snapshot.elements.find((element) => element.ref === savedRowRef)?.actions).toContain( + 'tap', + ); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${savedRowRef}|tap|`)), + ).toBe(true); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${clearSearchRef}|tap|`)), + ).toBe(true); + expect(compactTargets(envelope).some((target) => target.startsWith(`${closeRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('saved'))).toBe(false); + }); + + it('does not demote a saved foreground-sheet result row after adding it', async () => { + recordSnapshot(createSearchResultBeforeAddNodes()); + const addRef = currentSnapshot().payload.elements.find( + (element) => element.label === 'Add', + )?.ref; + expect(addRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + tapLogic( + { simulatorId, elementRef: addRef! }, + addSearchResultExecutor(), + createMockAxeHelpers(), + ), + ); + + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + const capture = result.capture; + if (!capture || !('elements' in capture)) { + throw new Error('Expected runtime snapshot capture.'); + } + const closeRef = capture.elements.find((element) => element.label === 'Close')?.ref; + const clearSearchRef = capture.elements.find( + (element) => element.label === 'Clear search', + )?.ref; + const savedRow = capture.elements.find((element) => element.value === 'saved'); + expect(closeRef).toBeDefined(); + expect(clearSearchRef).toBeDefined(); + expect(savedRow).toBeDefined(); + expect(savedRow?.actions).toContain('tap'); + expect( + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + ).toBeUndefined(); + expect(ctx.nextSteps).toEqual([ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: savedRow?.ref }, + }, + ]); + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.ui-action-result', '2', { + nextSteps: ctx.nextSteps, + runtimeSnapshotSuppressedTargetRefs: + ctx.structuredOutput?.renderHints?.runtimeSnapshot?.suppressedTargetRefs, + }); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${savedRow?.ref}|tap|`)), + ).toBe(true); + expect( + compactTargets(envelope).some((target) => target.startsWith(`${clearSearchRef}|tap|`)), + ).toBe(true); + expect(compactTargets(envelope).some((target) => target.startsWith(`${closeRef}|tap|`))).toBe( + true, + ); + expect(compactText(envelope).some((line) => line.includes('saved'))).toBe(false); + }); + + it('does not repeat a no-op foreground row tap or promote dismiss over remaining content', async () => { + recordSnapshot(createLocationsSheetNodes()); + const rowRef = currentSnapshot().payload.elements.find((element) => + element.label?.startsWith('London'), + )?.ref; + const remainingContentRef = currentSnapshot().payload.elements.find((element) => + element.label?.startsWith('Portland'), + )?.ref; + const closeRef = currentSnapshot().payload.elements.find( + (element) => element.label === 'Close', + )?.ref; + expect(rowRef).toBeDefined(); + expect(remainingContentRef).toBeDefined(); + expect(closeRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + tapLogic({ simulatorId, elementRef: rowRef! }, sameSheetExecutor(), createMockAxeHelpers()), + ); + + expect(ctx.nextSteps?.[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: remainingContentRef }, + }); + expect(ctx.nextSteps?.[0]?.params?.elementRef).not.toBe(closeRef); + expect(ctx.nextSteps?.some((step) => step.tool === 'batch')).toBe(false); + expect( + ctx.nextSteps?.some((step) => step.tool === 'tap' && step.params?.elementRef === rowRef), + ).toBe(false); + expect(ctx.nextSteps?.some((step) => step.tool === 'swipe')).toBe(false); + }); + + it('keeps ordinary post-action next steps when the screen hash changes', () => { + recordSnapshot(createLocationsSheetNodes()); + const previousSnapshot = currentSnapshot().payload; + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Continue', + frame: { x: 20, y: 120, width: 200, height: 44 }, + }), + ]); + const changedSnapshot = currentSnapshot().payload; + const result = createUiActionSuccessResult({ type: 'tap', elementRef: 'e5' }, simulatorId, [], { + capture: changedSnapshot, + previousRuntimeSnapshot: previousSnapshot, + }); + const { ctx } = createMockToolHandlerContext(); + + setUiActionStructuredOutput(ctx, result); + + expect(ctx.nextSteps).toEqual([ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: 'e1' }, + }, + ]); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-swipe-next-steps.test.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-swipe-next-steps.test.ts new file mode 100644 index 000000000..ea41fb7bc --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-no-op-swipe-next-steps.test.ts @@ -0,0 +1,134 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { + AccessibilityNode, + UiActionResultDomainResult, +} from '../../../../types/domain-results.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { getRuntimeSnapshot, recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { swipeLogic } from '../swipe.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, +} from './ui-action-test-helpers.ts'; + +const simulatorId = '044E0C26-0917-4812-B6D8-F5E22BA2E387'; + +function createForegroundSheetWithRealListNodes(): AccessibilityNode[] { + return [ + createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Example', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Table', + role: 'AXTable', + AXIdentifier: 'example.locationsSheet', + frame: { x: 0, y: 96, width: 402, height: 720 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + AXIdentifier: 'example.locationsSheet', + frame: { x: 330, y: 96, width: 44, height: 44 }, + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXValue: 'Search for a city, airport, or country', + AXIdentifier: 'example.locationsSheet', + frame: { x: 20, y: 150, width: 362, height: 44 }, + }), + createNode({ + AXLabel: 'London, England, United Kingdom', + AXValue: 'saved', + frame: { x: 20, y: 218, width: 362, height: 72 }, + }), + ], + }), + ], + }), + ]; +} + +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + +function currentSnapshot() { + const snapshot = getRuntimeSnapshot(simulatorId); + expect(snapshot).not.toBeNull(); + return snapshot!; +} + +function sameSheetExecutor() { + return createSequencedExecutor([ + { success: true, output: 'ok' }, + { + success: true, + output: JSON.stringify({ elements: createForegroundSheetWithRealListNodes() }), + }, + ]).executor; +} + +describe('UI action no-op swipe next steps', () => { + beforeEach(() => { + sessionStore.clear(); + }); + + it('does not repeat a no-op foreground sheet swipe or promote dismiss over visible content', async () => { + recordSnapshot(createForegroundSheetWithRealListNodes()); + const listRef = currentSnapshot().payload.elements.find( + (element) => element.identifier === 'example.locationsSheet', + )?.ref; + const contentRef = currentSnapshot().payload.elements.find((element) => + element.label?.startsWith('London'), + )?.ref; + const closeRef = currentSnapshot().payload.elements.find( + (element) => element.label === 'Close', + )?.ref; + expect(listRef).toBeDefined(); + expect(contentRef).toBeDefined(); + expect(closeRef).toBeDefined(); + const { ctx, run } = createMockToolHandlerContext(); + + await run(() => + swipeLogic( + { simulatorId, withinElementRef: listRef!, direction: 'up', distance: 0.7 }, + sameSheetExecutor(), + createMockAxeHelpers(), + ), + ); + + const result = ctx.structuredOutput?.result as UiActionResultDomainResult; + expect(result.action).toMatchObject({ + type: 'swipe', + withinElementRef: listRef, + direction: 'up', + }); + expect(ctx.nextSteps?.[0]).toEqual({ + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId, elementRef: contentRef }, + }); + expect(ctx.nextSteps?.[0]?.params?.elementRef).not.toBe(closeRef); + expect( + ctx.nextSteps?.some( + (step) => step.tool === 'swipe' && step.params?.withinElementRef === listRef, + ), + ).toBe(false); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts index 83d4fa8db..6a75e3d8c 100644 --- a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts @@ -1,6 +1,9 @@ import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import type { CaptureResultDomainResult } from '../../../../types/domain-results.ts'; +import type { + AccessibilityNode, + CaptureResultDomainResult, +} from '../../../../types/domain-results.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import type { DebuggerBackend } from '../../../../utils/debugger/backends/DebuggerBackend.ts'; import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; @@ -9,20 +12,28 @@ import { callHandler, createMockToolHandlerContext } from '../../../../test-util import { __resetRuntimeSnapshotStoreForTests, getRuntimeSnapshot, + recordRuntimeSnapshot, } from '../shared/snapshot-ui-state.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; import { handler, schema, wait_for_uiLogic } from '../wait_for_ui.ts'; import { createMockAxeHelpers, createNode, createSequencedExecutor, - recordSnapshot, - simulatorId, } from './ui-action-test-helpers.ts'; +const simulatorId = '12E2CB7E-780E-467B-BE90-2917AB236F77'; + function hierarchyJson(nodes: Array>): string { return JSON.stringify({ elements: nodes }); } +function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} + function createTiming(startMs = 0): { timing: { now: () => number; sleep: (durationMs: number) => Promise }; getNow: () => number; diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts index cde2a9009..76687d4d9 100644 --- a/src/mcp/tools/ui-automation/batch.ts +++ b/src/mcp/tools/ui-automation/batch.ts @@ -29,7 +29,10 @@ import { const batchStepSchema = z.strictObject({ action: z.literal('tap'), - elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), + elementRef: z + .string() + .min(1, { message: 'elementRef must be non-empty' }) + .describe('Runtime elementRef from the latest snapshot_ui or wait_for_ui output'), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) @@ -49,7 +52,10 @@ const batchSchema = z.strictObject({ steps: z .array(batchStepSchema) .min(1, { message: 'At least one batch step is required' }) - .max(100, { message: 'At most 100 batch steps are supported' }), + .max(100, { message: 'At most 100 batch steps are supported' }) + .describe( + 'Required array of step objects, for example [{"action":"tap","elementRef":"e1"}]. Do not use commands or raw command strings.', + ), axCache: z.enum(['perBatch', 'perStep', 'none']).optional(), waitTimeout: z.number().min(0, { message: 'waitTimeout must be non-negative' }).optional(), pollInterval: z.number().positive({ message: 'pollInterval must be greater than 0' }).optional(), diff --git a/src/mcp/tools/ui-automation/drag.ts b/src/mcp/tools/ui-automation/drag.ts new file mode 100644 index 000000000..80a20404e --- /dev/null +++ b/src/mcp/tools/ui-automation/drag.ts @@ -0,0 +1,244 @@ +/** + * UI Testing Plugin: Drag + * + * Drags from a semantic UI element from the runtime snapshot store. + */ + +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { + getRuntimeElementDirectionalDragPoints, + getRuntimeElementCenter, + getRuntimeElementSwipePoints, +} from './shared/runtime-snapshot.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { captureRuntimeSnapshotAfterActionSafely } from './shared/post-action-snapshot.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +export type { AxeHelpers } from './shared/axe-command.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; +import { + createUiActionFailureResult, + createUiActionSuccessResult, + createUiAutomationRecoverableError, + mapAxeCommandError, + setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, +} from './shared/domain-result.ts'; + +const dragSchema = z.object({ + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + elementRef: z + .string() + .min(1, { message: 'elementRef must be non-empty' }) + .describe('Runtime elementRef from the latest snapshot_ui or wait_for_ui output'), + direction: z + .enum(['up', 'down', 'left', 'right']) + .describe('Drag direction: up, down, left, or right'), + duration: z + .number() + .positive({ message: 'Duration must be greater than 0 seconds' }) + .optional() + .describe('seconds'), + distance: z + .number() + .positive({ message: 'Distance must be greater than 0' }) + .max(1, { message: 'Distance must be at most 1' }) + .optional() + .describe( + 'Normalized drag distance greater than 0 and up to 1 within the resolved element or viewport', + ), + steps: z + .number() + .int({ message: 'Steps must be an integer' }) + .min(1, { message: 'Steps must be at least 1' }) + .max(1000, { message: 'Steps must be at most 1000' }) + .optional(), + preDelay: z + .number() + .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) + .optional() + .describe('seconds'), + postDelay: z + .number() + .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) + .optional() + .describe('seconds'), +}); + +export type DragParams = z.infer; +type DragResult = UiActionResultDomainResult; + +const publicSchemaObject = z.strictObject(dragSchema.omit({ simulatorId: true } as const).shape); + +const LOG_PREFIX = '[AXe]'; + +export function createDragExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): NonStreamingExecutor { + return async (params) => { + const toolName = 'drag'; + const { simulatorId, elementRef, direction, duration, distance, steps, preDelay, postDelay } = + params; + const unresolvedAction = { + type: 'drag' as const, + elementRef, + direction, + ...(duration !== undefined ? { durationSeconds: duration } : {}), + ...(steps !== undefined ? { steps } : {}), + }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'touch'); + if (!resolution.ok) { + return createUiActionFailureResult(unresolvedAction, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const viewportFrame = resolution.snapshot.elements[0]?.publicElement.frame; + const points = resolution.element.publicElement.actions.includes('swipeWithin') + ? getRuntimeElementSwipePoints(resolution.element, direction, distance) + : getRuntimeElementDirectionalDragPoints( + resolution.element, + direction, + distance, + viewportFrame, + ); + if (!points.ok) { + const uiError = createUiAutomationRecoverableError({ + code: 'TARGET_NOT_ACTIONABLE', + message: points.message, + elementRef, + }); + return createUiActionFailureResult(unresolvedAction, simulatorId, points.message, { + uiError, + }); + } + + const action = { + ...unresolvedAction, + from: points.from, + to: points.to, + }; + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); + } + + const commandArgs = [ + 'drag', + '--start-x', + String(points.from.x), + '--start-y', + String(points.from.y), + '--end-x', + String(points.to.x), + '--end-y', + String(points.to.y), + ]; + if (duration !== undefined) { + commandArgs.push('--duration', String(duration)); + } + if (steps !== undefined) { + commandArgs.push('--steps', String(steps)); + } + if (preDelay !== undefined) { + commandArgs.push('--pre-delay', String(preDelay)); + } + if (postDelay !== undefined) { + commandArgs.push('--post-delay', String(postDelay)); + } + + const target = getRuntimeElementCenter(resolution.element); + const optionsText = duration !== undefined ? ` duration=${duration}s` : ''; + log( + 'info', + `${LOG_PREFIX}/${toolName}: Starting ${direction} drag from ${elementRef} at (${target.x}, ${target.y})${optionsText} on ${simulatorId}`, + ); + + try { + await executeAxeCommand(commandArgs, simulatorId, 'drag', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); + log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to simulate ${direction} drag from ${elementRef}.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message, { + details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), + }); + } + + const captureResult = await captureRuntimeSnapshotAfterActionSafely({ + simulatorId, + executor, + axeHelpers, + }); + return createUiActionSuccessResult( + action, + simulatorId, + [guard.warningText, captureResult.warning], + { + ...(captureResult.capture ? { capture: captureResult.capture } : {}), + previousRuntimeSnapshot: resolution.snapshot.payload, + ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), + }, + ); + }; +} + +export async function dragLogic( + params: DragParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): Promise { + const ctx = getHandlerContext(); + const executeDrag = createDragExecutor(executor, axeHelpers, debuggerManager); + const result = await executeDrag(params); + + setUiActionStructuredOutput(ctx, result); +} + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: dragSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(dragSchema), + logicFunction: (params: DragParams, executor: CommandExecutor) => + dragLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/mcp/tools/ui-automation/shared/domain-result.ts b/src/mcp/tools/ui-automation/shared/domain-result.ts index b1b9aa197..8d1eb2d20 100644 --- a/src/mcp/tools/ui-automation/shared/domain-result.ts +++ b/src/mcp/tools/ui-automation/shared/domain-result.ts @@ -8,6 +8,8 @@ import type { UiActionResultDomainResult, } from '../../../../types/domain-results.ts'; import type { + RuntimeElementV1, + RuntimeSnapshotV1, UiAutomationRecoverableError, UiAutomationRecoverableErrorCode, UiWaitMatch, @@ -15,13 +17,25 @@ import type { import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { createBasicDiagnostics } from '../../../../utils/diagnostics.ts'; import { AxeError, DependencyError, SystemError } from '../../../../utils/errors.ts'; -import { createRuntimeSnapshotNextSteps } from './runtime-next-steps.ts'; +import { + createRuntimeSnapshotNextSteps, + getForegroundCompletionSuppressedRuntimeTargetRefs, +} from './runtime-next-steps.ts'; +import type { + RuntimeSnapshotNextStepActionContext, + RuntimeSnapshotNextStepActionTarget, +} from './runtime-next-steps.ts'; const UI_ACTION_SCHEMA = 'xcodebuildmcp.output.ui-action-result'; const CAPTURE_SCHEMA = 'xcodebuildmcp.output.capture-result'; const REFRESH_SNAPSHOT_RECOVERY_HINT = 'Run snapshot_ui again and retry with a current element reference from the refreshed snapshot.'; +const uiActionNextStepContexts = new WeakMap< + UiActionResultDomainResult, + RuntimeSnapshotNextStepActionContext +>(); + function createDiagnostics( warnings: readonly string[] = [], errors: readonly string[] = [], @@ -51,6 +65,46 @@ function createUiActionSuccessNextSteps(result: UiActionResultDomainResult): Nex ]; } +function getUiActionTargetRef(action: UiAction): string | null { + switch (action.type) { + case 'tap': + case 'touch': + case 'long-press': + case 'type-text': + return action.elementRef; + case 'swipe': + return action.withinElementRef; + case 'drag': + return action.elementRef; + default: + return null; + } +} + +function createNextStepActionTarget( + element: RuntimeElementV1, +): RuntimeSnapshotNextStepActionTarget { + return { + ...(element.label !== undefined ? { label: element.label } : {}), + ...(element.value !== undefined ? { value: element.value } : {}), + ...(element.identifier !== undefined ? { identifier: element.identifier } : {}), + ...(element.role !== undefined ? { role: element.role } : {}), + ...(element.state !== undefined ? { state: element.state } : {}), + }; +} + +function findUiActionTargetElement( + action: UiAction, + runtimeSnapshot: RuntimeSnapshotV1, +): RuntimeElementV1 | null { + const targetRef = getUiActionTargetRef(action); + if (!targetRef) { + return null; + } + + return runtimeSnapshot.elements.find((element) => element.ref === targetRef) ?? null; +} + export function createUiAutomationRecoverableError(params: { code: UiAutomationRecoverableErrorCode; message: string; @@ -69,9 +123,13 @@ export function createUiActionSuccessResult( action: UiAction, simulatorId: string, warnings: Array = [], - options: { capture?: CapturePayload; uiError?: UiAutomationRecoverableError } = {}, + options: { + capture?: CapturePayload; + uiError?: UiAutomationRecoverableError; + previousRuntimeSnapshot?: RuntimeSnapshotV1; + } = {}, ): UiActionResultDomainResult { - return { + const result: UiActionResultDomainResult = { kind: 'ui-action-result', didError: false, error: null, @@ -82,6 +140,19 @@ export function createUiActionSuccessResult( diagnostics: createDiagnostics(compact(warnings), []), ...(options.uiError ? { uiError: options.uiError } : {}), }; + + if (options.previousRuntimeSnapshot) { + const actionTargetElement = findUiActionTargetElement(action, options.previousRuntimeSnapshot); + uiActionNextStepContexts.set(result, { + action, + previousScreenHash: options.previousRuntimeSnapshot.screenHash, + ...(actionTargetElement + ? { actionTarget: createNextStepActionTarget(actionTargetElement) } + : {}), + }); + } + + return result; } export function createUiActionFailureResult( @@ -201,24 +272,59 @@ export function mapAxeCommandError( }; } +function mergeRuntimeSnapshotRenderHints( + renderHints: RenderHints | undefined, + suppressedTargetRefs: readonly string[], +): RenderHints | undefined { + if (suppressedTargetRefs.length === 0) { + return renderHints; + } + + return { + ...renderHints, + runtimeSnapshot: { + ...renderHints?.runtimeSnapshot, + suppressedTargetRefs, + }, + }; +} + export function setUiActionStructuredOutput( ctx: ToolHandlerContext, result: UiActionResultDomainResult, ): void { - ctx.structuredOutput = { - result, - schema: UI_ACTION_SCHEMA, - schemaVersion: '2', - }; if (result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot') { + const actionContext = uiActionNextStepContexts.get(result); + const suppressedTargetRefs = getForegroundCompletionSuppressedRuntimeTargetRefs({ + simulatorId: result.artifacts.simulatorId, + runtimeSnapshot: result.capture, + }); + ctx.structuredOutput = { + result, + schema: UI_ACTION_SCHEMA, + schemaVersion: '2', + ...(suppressedTargetRefs.length > 0 + ? { + renderHints: { + runtimeSnapshot: { suppressedTargetRefs }, + }, + } + : {}), + }; ctx.nextSteps = createRuntimeSnapshotNextSteps({ simulatorId: result.artifacts.simulatorId, runtimeSnapshot: result.capture, includeRefreshAndWait: false, + ...(actionContext ? { actionContext } : {}), }); return; } + ctx.structuredOutput = { + result, + schema: UI_ACTION_SCHEMA, + schemaVersion: '2', + }; ctx.nextSteps = createUiActionSuccessNextSteps(result); } @@ -227,10 +333,18 @@ export function setCaptureStructuredOutput( result: CaptureResultDomainResult, renderHints?: RenderHints, ): void { + const suppressedTargetRefs = + result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' + ? getForegroundCompletionSuppressedRuntimeTargetRefs({ + simulatorId: result.artifacts.simulatorId, + runtimeSnapshot: result.capture, + }) + : []; + const mergedRenderHints = mergeRuntimeSnapshotRenderHints(renderHints, suppressedTargetRefs); ctx.structuredOutput = { result, schema: CAPTURE_SCHEMA, schemaVersion: '2', - ...(renderHints ? { renderHints } : {}), + ...(mergedRenderHints ? { renderHints: mergedRenderHints } : {}), }; } diff --git a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts index 0b8509f19..57a3873f1 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-next-steps.ts @@ -1,4 +1,5 @@ import type { NextStep } from '../../../../types/common.ts'; +import type { UiAction } from '../../../../types/domain-results.ts'; import type { RuntimeElementV1, RuntimeSnapshotElementRecord, @@ -37,6 +38,30 @@ const SCREEN_CHANGING_TAP_NEXT_STEP_LABELS = new Set([ ]); const FOREGROUND_DISMISS_TAP_NEXT_STEP_LABELS = new Set(['back', 'cancel', 'close', 'done']); +const COMPLETION_ACTION_TAP_NEXT_STEP_LABELS = new Set(['add', 'save']); +const SHEET_EXPANDED_VALUE_PATTERN = /\b(?:expanded|full(?:\s+screen)?)\b/i; +const INCOMPLETE_STATE_NEXT_STEP_TEXT = new Set([ + 'not added', + 'not saved', + 'not selected', + 'unadded', + 'unsaved', + 'unselected', +]); + +export interface RuntimeSnapshotNextStepActionTarget { + label?: string; + value?: string; + identifier?: string; + role?: string; + state?: { selected?: boolean }; +} + +export interface RuntimeSnapshotNextStepActionContext { + action: UiAction; + previousScreenHash: string; + actionTarget?: RuntimeSnapshotNextStepActionTarget; +} function compactTapNextStepText(value: string | undefined): string { return (value ?? '').replace(/\s+/g, ' ').trim(); @@ -278,7 +303,10 @@ function findStoredSnapshotRecords(params: { simulatorId: string; runtimeSnapshot: RuntimeSnapshotV1; }): Map { - const storedSnapshot = getRuntimeSnapshot(params.simulatorId); + const storedSnapshot = getRuntimeSnapshot( + params.simulatorId, + params.runtimeSnapshot.capturedAtMs, + ); if ( storedSnapshot?.payload.screenHash !== params.runtimeSnapshot.screenHash || storedSnapshot.payload.seq !== params.runtimeSnapshot.seq @@ -293,8 +321,9 @@ function findStoredSnapshotRecords(params: { * Finds the most likely active foreground scroll container. * * Business rules: - * - Only scrollable elements can become foreground roots because next-step filtering is currently - * used to choose better tap/scroll guidance around scrollable panels, sheets, and detail views. + * - Scrollable elements can become foreground roots. A top-level root with a sheet grabber + * descendant can also become the root so flattened sheet controls are not assigned to background + * scroll views by geometry overlap. * - A foreground root must contain at least one generic foreground cue: * - dismiss/navigation-out control: back, cancel, close, done * - text-entry control @@ -309,6 +338,35 @@ function findStoredSnapshotRecords(params: { * - This does not yet rank competing foreground scroll views by identifier specificity or visible * area. After filtering, scroll selection still chooses the first remaining scrollable element. */ +function findSheetGrabberDescendant( + root: RuntimeSnapshotElementRecord, + records: readonly RuntimeSnapshotElementRecord[], +): RuntimeSnapshotElementRecord | null { + return ( + records.find( + (candidate) => + candidate !== root && + compactTapNextStepText(candidate.publicElement.label).toLowerCase() === 'sheet grabber' && + isSameOrDescendantPath(root.metadata.path, candidate.metadata.path), + ) ?? null + ); +} + +function isExpandableSheetGrabber(element: RuntimeElementV1): boolean { + if (compactTapNextStepText(element.label).toLowerCase() !== 'sheet grabber') { + return false; + } + const value = compactTapNextStepText(element.value); + return value.length > 0 && !SHEET_EXPANDED_VALUE_PATTERN.test(value); +} + +function isExpandedSheetGrabber(element: RuntimeElementV1): boolean { + return ( + compactTapNextStepText(element.label).toLowerCase() === 'sheet grabber' && + SHEET_EXPANDED_VALUE_PATTERN.test(compactTapNextStepText(element.value)) + ); +} + function findActiveForegroundRoot( recordsByRef: Map, ): RuntimeSnapshotElementRecord | null { @@ -321,7 +379,8 @@ function findActiveForegroundRoot( if (cachedScore !== undefined) { return cachedScore; } - if (!isScrollableNextStepElement(record.publicElement)) { + const hasSheetGrabberDescendant = findSheetGrabberDescendant(record, records) !== null; + if (!isScrollableNextStepElement(record.publicElement) && !hasSheetGrabberDescendant) { scoreByRef.set(record.publicElement.ref, 0); return 0; } @@ -351,6 +410,7 @@ function findActiveForegroundRoot( const identityPriority = Math.max(0, 2 - getScrollIdentityPriority(element)); const verticalPriority = element.frame.height >= element.frame.width ? 1 : 0; const score = + (hasSheetGrabberDescendant ? 200 : 0) + (hasDismissControl ? 100 : 0) + (hasTextEntry ? 60 : 0) + (hasStateControls ? 30 : 0) + @@ -384,21 +444,90 @@ function findActiveForegroundRoot( * - If no foreground root is detected, keep all elements rather than guessing; conservative output * is better than hiding valid controls. */ +function findSheetForegroundStartIndex( + foregroundRoot: RuntimeSnapshotElementRecord, + records: readonly RuntimeSnapshotElementRecord[], + indexByRef: Map, +): number | null { + const grabber = findSheetGrabberDescendant(foregroundRoot, records); + return grabber ? (indexByRef.get(grabber.publicElement.ref) ?? null) : null; +} + function filterToForegroundElements( elements: RuntimeElementV1[], recordsByRef: Map, + foregroundRoot: RuntimeSnapshotElementRecord | null, ): RuntimeElementV1[] { - const foregroundRoot = findActiveForegroundRoot(recordsByRef); if (!foregroundRoot) { return elements; } + const records = [...recordsByRef.values()]; + const indexByRef = new Map(records.map((record, index) => [record.publicElement.ref, index])); + const sheetForegroundStartIndex = findSheetForegroundStartIndex( + foregroundRoot, + records, + indexByRef, + ); + return elements.filter((element) => { const record = recordsByRef.get(element.ref); - return record && isForegroundCandidateForRoot(foregroundRoot, record); + if (!record || !isForegroundCandidateForRoot(foregroundRoot, record)) { + return false; + } + + const recordIndex = indexByRef.get(record.publicElement.ref) ?? -1; + return sheetForegroundStartIndex === null || recordIndex >= sheetForegroundStartIndex; }); } +function getRepeatedNoOpActionRef(params: { + runtimeSnapshot: RuntimeSnapshotV1; + actionContext?: RuntimeSnapshotNextStepActionContext; +}): { tool: 'tap' | 'swipe' | 'drag'; ref: string } | null { + if (params.actionContext?.previousScreenHash !== params.runtimeSnapshot.screenHash) { + return null; + } + + switch (params.actionContext.action.type) { + case 'tap': + return { tool: 'tap', ref: params.actionContext.action.elementRef }; + case 'swipe': + return { tool: 'swipe', ref: params.actionContext.action.withinElementRef }; + case 'drag': + return { tool: 'drag', ref: params.actionContext.action.elementRef }; + default: + return null; + } +} + +function hasIncompleteStateSignal(element: { label?: string; value?: string }): boolean { + const label = compactTapNextStepText(element.label).toLowerCase(); + const value = compactTapNextStepText(element.value).toLowerCase(); + return INCOMPLETE_STATE_NEXT_STEP_TEXT.has(label) || INCOMPLETE_STATE_NEXT_STEP_TEXT.has(value); +} + +function findForegroundIncompleteCompletionTapElement( + elements: readonly RuntimeElementV1[], + repeatedNoOpAction: { tool: 'tap' | 'swipe' | 'drag'; ref: string } | null, +): RuntimeElementV1 | null { + if (!elements.some(hasIncompleteStateSignal)) { + return null; + } + + return ( + elements.find( + (element) => + element.actions.includes('tap') && + !element.actions.includes('typeText') && + !(repeatedNoOpAction?.tool === 'tap' && repeatedNoOpAction.ref === element.ref) && + COMPLETION_ACTION_TAP_NEXT_STEP_LABELS.has( + compactTapNextStepText(element.label).toLowerCase(), + ), + ) ?? null + ); +} + /** * Creates human/model-facing next-step examples from a runtime snapshot. * @@ -410,26 +539,69 @@ function filterToForegroundElements( * generic suggestions. * - Batch examples include multiple visible switches because settings screens often require several * same-screen toggles and batch is the efficient, app-agnostic primitive for that workflow. - * - Scroll examples prefer real list/scroll-view targets, then semantic inferred containers, with + * - Scroll examples prefer real list/scroll-view targets, then semantic containers, with * application/window root scrolling used last as a fallback. * - Refresh/wait examples are included for fresh snapshot captures, but not after every action. */ +export function getForegroundCompletionSuppressedRuntimeTargetRefs(params: { + simulatorId: string; + runtimeSnapshot: RuntimeSnapshotV1; +}): string[] { + const recordsByRef = findStoredSnapshotRecords(params); + const foregroundRoot = findActiveForegroundRoot(recordsByRef); + if (!foregroundRoot) { + return []; + } + + const foregroundElements = filterToForegroundElements( + params.runtimeSnapshot.elements, + recordsByRef, + foregroundRoot, + ); + const completionActionElement = findForegroundIncompleteCompletionTapElement( + foregroundElements, + null, + ); + if (completionActionElement) { + return foregroundElements + .filter( + (element) => + element.ref !== completionActionElement.ref && hasIncompleteStateSignal(element), + ) + .map((element) => element.ref); + } + + return []; +} + export function createRuntimeSnapshotNextSteps(params: { simulatorId: string; runtimeSnapshot: RuntimeSnapshotV1; includeRefreshAndWait: boolean; + actionContext?: RuntimeSnapshotNextStepActionContext; }): NextStep[] { const recordsByRef = findStoredSnapshotRecords(params); + const foregroundRoot = findActiveForegroundRoot(recordsByRef); + const records = [...recordsByRef.values()]; + const foregroundSheetGrabber = + foregroundRoot !== null ? findSheetGrabberDescendant(foregroundRoot, records) : null; const nextStepElements = filterToForegroundElements( params.runtimeSnapshot.elements, recordsByRef, + foregroundRoot, ); + const repeatedNoOpAction = getRepeatedNoOpActionRef(params); + const foregroundIncompleteCompletionTapElement = + foregroundRoot !== null + ? findForegroundIncompleteCompletionTapElement(nextStepElements, repeatedNoOpAction) + : null; const tapElements = nextStepElements .map((element, index) => ({ element, index })) .filter( ({ element }) => element.actions.includes('tap') && !element.actions.includes('typeText') && + !(repeatedNoOpAction?.tool === 'tap' && repeatedNoOpAction.ref === element.ref) && !isHiddenTapNextStepElement(element.label) && !isStateChangingTapNextStepElement(element), ) @@ -439,7 +611,7 @@ export function createRuntimeSnapshotNextSteps(params: { return priorityDelta === 0 ? left.index - right.index : priorityDelta; }) .map(({ element }) => element); - const tapElement = tapElements[0] ?? null; + const tapElement = foregroundIncompleteCompletionTapElement ?? tapElements[0] ?? null; const sameScreenBatchElements = tapElements.filter( (element) => !isContentRichTapNextStepElement(element) && @@ -449,35 +621,95 @@ export function createRuntimeSnapshotNextSteps(params: { const switchBatchElements = nextStepElements.filter( (element) => element.role === 'switch' && element.actions.includes('tap'), ); - const batchElements = - switchBatchElements.length >= 2 ? switchBatchElements : sameScreenBatchElements; + let batchElements = sameScreenBatchElements; + if (switchBatchElements.length >= 2) { + batchElements = switchBatchElements; + } const batchLabel = switchBatchElements.length >= 2 ? 'Batch visible switch toggles' : 'Batch same-screen taps'; const scrollElement = nextStepElements .map((element, index) => ({ element, index })) - .filter(({ element }) => isScrollableNextStepElement(element)) + .filter( + ({ element }) => + isScrollableNextStepElement(element) && + !( + (repeatedNoOpAction?.tool === 'swipe' || repeatedNoOpAction?.tool === 'drag') && + repeatedNoOpAction.ref === element.ref + ), + ) .sort((left, right) => compareScrollableNextStepCandidates(left, right, recordsByRef))[0] ?.element ?? null; + const expandSheetNextStep: NextStep | null = + foregroundSheetGrabber && + isExpandableSheetGrabber(foregroundSheetGrabber.publicElement) && + !( + repeatedNoOpAction?.tool === 'drag' && + repeatedNoOpAction.ref === foregroundSheetGrabber.publicElement.ref + ) + ? { + label: 'Expand foreground sheet', + tool: 'drag', + params: { + simulatorId: params.simulatorId, + elementRef: foregroundSheetGrabber.publicElement.ref, + direction: 'up', + distance: 0.35, + duration: 0.8, + steps: 80, + postDelay: 0.8, + }, + } + : null; + const shouldDragSheetScroll = + expandSheetNextStep === null && + foregroundSheetGrabber !== null && + isExpandedSheetGrabber(foregroundSheetGrabber.publicElement) && + scrollElement !== null && + scrollElement.role !== 'application' && + scrollElement.role !== 'window'; const scrollNextStep: NextStep | null = scrollElement - ? { - label: 'Scroll visible content', - tool: 'swipe', - params: { - simulatorId: params.simulatorId, - withinElementRef: scrollElement.ref, - direction: 'up', - distance: 0.5, - }, - } + ? shouldDragSheetScroll + ? { + label: 'Drag visible sheet content', + tool: 'drag', + params: { + simulatorId: params.simulatorId, + elementRef: scrollElement.ref, + direction: 'up', + distance: 0.7, + duration: 0.8, + steps: 80, + postDelay: 0.5, + }, + } + : { + label: 'Scroll visible content', + tool: 'swipe', + params: { + simulatorId: params.simulatorId, + withinElementRef: scrollElement.ref, + direction: 'up', + distance: 0.5, + }, + } : null; const shouldPrioritizeScroll = scrollNextStep !== null && tapElement !== null && - !batchElements.length && - isScreenChangingTapNextStepElement(tapElement); + expandSheetNextStep === null && + (shouldDragSheetScroll || + (batchElements.length < 2 && + (isScreenChangingTapNextStepElement(tapElement) || + (!isContentRichTapNextStepElement(tapElement) && + !isLowPriorityTapNextStepElement(tapElement.label))))); + const shouldShowBatch = + batchElements.length >= 2 && expandSheetNextStep === null && !shouldDragSheetScroll; const hasUsefulRuntimeGuidance = - batchElements.length >= 2 || scrollNextStep !== null || tapElement !== null; + shouldShowBatch || + expandSheetNextStep !== null || + scrollNextStep !== null || + tapElement !== null; const screenshotNextStep: NextStep = { label: 'Take screenshot for verification', tool: 'screenshot', @@ -499,7 +731,7 @@ export function createRuntimeSnapshotNextSteps(params: { }, ] : []), - ...(batchElements.length >= 2 + ...(shouldShowBatch ? [ { label: batchLabel, @@ -514,6 +746,7 @@ export function createRuntimeSnapshotNextSteps(params: { }, ] : []), + ...(expandSheetNextStep ? [expandSheetNextStep] : []), ...(scrollNextStep && shouldPrioritizeScroll ? [scrollNextStep] : []), ...(tapElement ? [ diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts index 6fa216bb1..1265e18f1 100644 --- a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -414,39 +414,6 @@ function clamp(value: number, minimum: number, maximum: number): number { return Math.min(Math.max(value, minimum), maximum); } -function createSheetSwipeFrame( - containerFrame: Frame, - grabberFrame: Frame, - descendantFrames: readonly Frame[], -): Frame { - const minimumHeight = Math.min(120, Math.max(2, containerFrame.height * 0.3)); - const containerBottom = containerFrame.y + containerFrame.height; - const topMargin = Math.min(24, Math.max(8, containerFrame.height * 0.02)); - const topFloor = grabberFrame.y + grabberFrame.height + topMargin; - const defaultTop = Math.max(topFloor, containerFrame.y + containerFrame.height * 0.2); - const defaultBottom = containerFrame.y + containerFrame.height * 0.95; - const contentFrames = descendantFrames.filter( - (frame) => - isVisible(frame) && - framesIntersect(frame, containerFrame) && - frame.y + frame.height / 2 >= topFloor, - ); - const contentTop = Math.min(...contentFrames.map((frame) => frame.y)); - const contentBottom = Math.max(...contentFrames.map((frame) => frame.y + frame.height)); - const preferredTop = contentFrames.length > 0 ? Math.min(defaultTop, contentTop) : defaultTop; - const preferredBottom = - contentFrames.length > 0 ? Math.max(defaultBottom, contentBottom) : defaultBottom; - const bottom = Math.round(clamp(preferredBottom, topFloor + minimumHeight, containerBottom)); - const top = Math.round(clamp(preferredTop, topFloor, bottom - minimumHeight)); - - return normalizeFrame({ - x: containerFrame.x, - y: top, - width: containerFrame.width, - height: bottom - top, - }); -} - function findViewportFrame(elements: RuntimeSnapshotElementRecord[]): Frame | null { return ( elements.find( @@ -503,6 +470,14 @@ function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): vo continue; } + const sheetGrabber = + publicElement.role === 'application' || publicElement.role === 'window' + ? findSheetGrabberDescendant(element, elements) + : null; + if (sheetGrabber) { + continue; + } + const hasOverflowingDescendant = elements.some((candidate) => { if (candidate === element) { return false; @@ -513,32 +488,6 @@ function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): vo ); }); - const sheetGrabber = - publicElement.role === 'application' || publicElement.role === 'window' - ? findSheetGrabberDescendant(element, elements) - : null; - - if (sheetGrabber) { - if (hasPreferredDescendantSwipeTarget(element, elements)) { - continue; - } - - publicElement.actions.push('swipeWithin'); - metadata.swipeFrame = createSheetSwipeFrame( - publicElement.frame, - sheetGrabber.publicElement.frame, - elements - .filter( - (candidate) => - candidate !== element && - candidate !== sheetGrabber && - isDescendantPath(metadata.path, candidate.metadata.path), - ) - .map((candidate) => candidate.publicElement.frame), - ); - continue; - } - if ( publicElement.role !== 'application' && publicElement.role !== 'window' && @@ -555,6 +504,7 @@ function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): vo publicElement.state?.visible === false || !isVisible(publicElement.frame) || publicElement.actions.includes('swipeWithin') || + findSheetGrabberDescendant(element, elements) !== null || hasPreferredDescendantSwipeTarget(element, elements) || !hasSemanticVerticalOverflowingDescendant(element, elements) ) { @@ -881,3 +831,57 @@ export function getRuntimeElementSwipePoints( return { ok: true, ...points }; } + +export function getRuntimeElementDirectionalDragPoints( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, + distance = 0.35, + viewportFrame?: Frame, +): RuntimeSwipePointResolution { + const { frame } = element.publicElement; + if (frame.width < 2 || frame.height < 2) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' is too small for a reliable drag.`, + }; + } + + const from = getRuntimeElementActivationPoint(element); + const boundingFrame = viewportFrame ?? frame; + const edgeInset = 24; + const horizontalDistance = Math.max(1, Math.round(boundingFrame.width * clamp(distance, 0, 1))); + const verticalDistance = Math.max(1, Math.round(boundingFrame.height * clamp(distance, 0, 1))); + const minX = Math.round(boundingFrame.x + Math.min(edgeInset, boundingFrame.width / 2)); + const maxX = Math.round( + boundingFrame.x + boundingFrame.width - Math.min(edgeInset, boundingFrame.width / 2), + ); + const minY = Math.round(boundingFrame.y + Math.min(edgeInset, boundingFrame.height / 2)); + const maxY = Math.round( + boundingFrame.y + boundingFrame.height - Math.min(edgeInset, boundingFrame.height / 2), + ); + + let to: Point; + switch (direction) { + case 'up': + to = { x: from.x, y: clamp(from.y - verticalDistance, minY, maxY) }; + break; + case 'down': + to = { x: from.x, y: clamp(from.y + verticalDistance, minY, maxY) }; + break; + case 'left': + to = { x: clamp(from.x - horizontalDistance, minX, maxX), y: from.y }; + break; + case 'right': + to = { x: clamp(from.x + horizontalDistance, minX, maxX), y: from.y }; + break; + } + + if (isDegenerateSwipe(from, to)) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' does not provide non-degenerate ${direction} drag points.`, + }; + } + + return { ok: true, from, to }; +} diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index ebf14e85c..5d8ad46f6 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -181,6 +181,7 @@ export function createSwipeExecutor( [guard.warningText, captureResult.warning], { ...(captureResult.capture ? { capture: captureResult.capture } : {}), + previousRuntimeSnapshot: resolution.snapshot.payload, ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), }, ); diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index 0b827890b..5b89b294f 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -159,6 +159,7 @@ export function createTapExecutor( [guard.warningText, captureResult.warning], { ...(captureResult.capture ? { capture: captureResult.capture } : {}), + previousRuntimeSnapshot: resolution.snapshot.payload, ...(captureResult.uiError ? { uiError: captureResult.uiError } : {}), }, ); diff --git a/src/mcp/tools/ui-automation/type_text.ts b/src/mcp/tools/ui-automation/type_text.ts index c6f54b3d7..84a443d00 100644 --- a/src/mcp/tools/ui-automation/type_text.ts +++ b/src/mcp/tools/ui-automation/type_text.ts @@ -53,8 +53,13 @@ function containsUnsupportedAxeTypeText(text: string): boolean { const typeTextSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), - text: z.string().min(1, { message: 'Text cannot be empty' }), + elementRef: z + .string() + .min(1, { message: 'elementRef must be non-empty' }) + .describe( + 'Required runtime text-field elementRef from the latest snapshot_ui or wait_for_ui output', + ), + text: z.string().min(1, { message: 'Text cannot be empty' }).describe('Text to type'), replaceExisting: z .boolean() .optional() diff --git a/src/rendering/types.ts b/src/rendering/types.ts index 9bf3299e2..1b3819d4e 100644 --- a/src/rendering/types.ts +++ b/src/rendering/types.ts @@ -24,6 +24,9 @@ export interface RenderSession { export interface RenderHints { headerTitle?: string; + runtimeSnapshot?: { + suppressedTargetRefs?: readonly string[]; + }; } export interface StructuredToolOutput { diff --git a/src/types/domain-results.ts b/src/types/domain-results.ts index dd70b1db4..e67f2f59f 100644 --- a/src/types/domain-results.ts +++ b/src/types/domain-results.ts @@ -369,6 +369,15 @@ export interface UiActionSwipe { to?: Point; durationSeconds?: number; } +export interface UiActionDrag { + type: 'drag'; + elementRef: string; + direction: 'up' | 'down' | 'left' | 'right'; + from?: Point; + to?: Point; + durationSeconds?: number; + steps?: number; +} export interface UiActionTouch { type: 'touch'; elementRef: string; @@ -411,6 +420,7 @@ export interface UiActionBatch { export type UiAction = | UiActionTap | UiActionSwipe + | UiActionDrag | UiActionTouch | UiActionLongPress | UiActionButton diff --git a/src/utils/__tests__/structured-output-envelope.test.ts b/src/utils/__tests__/structured-output-envelope.test.ts index e8f1a4688..f5c344170 100644 --- a/src/utils/__tests__/structured-output-envelope.test.ts +++ b/src/utils/__tests__/structured-output-envelope.test.ts @@ -138,6 +138,69 @@ describe('toStructuredEnvelope', () => { }); }); + it('puts suppressed target evidence in a no-ref evidence array, not text rows', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-suppressed', + seq: 2, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + frame: { x: 12, y: 81, width: 80, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e2', + role: 'button', + label: 'London, England', + value: 'not saved', + frame: { x: 20, y: 140, width: 200, height: 72 }, + state: { visible: true }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'text', + label: 'Search results', + frame: { x: 20, y: 100, width: 120, height: 24 }, + state: { visible: true }, + actions: [], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Add' }, + { action: 'tap', elementRef: 'e2', label: 'London, England' }, + ], + }, + }; + + expect( + toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshotSuppressedTargetRefs: ['e2'], + }), + ).toMatchObject({ + data: { + capture: { + targets: ['e1|tap|button|Add||'], + text: ['e3|text|text|Search results||'], + evidence: ['button|London, England|not saved|'], + }, + }, + }); + }); + it('caps compact runtime snapshot rows by category', () => { const targets = Array.from({ length: 80 }, (_, index) => ({ ref: `e${index + 1}`, diff --git a/src/utils/renderers/__tests__/cli-text-renderer.test.ts b/src/utils/renderers/__tests__/cli-text-renderer.test.ts index b7173dc79..cc8789a4b 100644 --- a/src/utils/renderers/__tests__/cli-text-renderer.test.ts +++ b/src/utils/renderers/__tests__/cli-text-renderer.test.ts @@ -626,6 +626,60 @@ describe('cli-text-renderer', () => { expect(output).not.toContain('```json'); }); + it('renders suppressed runtime evidence without callable refs', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { runtimeSnapshot: { suppressedTargetRefs: ['e2'] } }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + frame: { x: 10, y: 20, width: 60, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e2', + role: 'button', + label: 'London, England', + value: 'not saved', + frame: { x: 20, y: 80, width: 200, height: 72 }, + state: { visible: true }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Add' }, + { action: 'tap', elementRef: 'e2', label: 'London, England' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Add||'); + expect(output).toContain('Evidence (1) — role|label|value|id'); + expect(output).toContain('button|London, England|not saved|'); + expect(output).not.toContain('e2|tap|button|London, England|not saved|'); + }); + it('renders unchanged runtime UI snapshots compactly', () => { const output = renderCliTextTranscript({ structuredOutput: { diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index 35f7ae8ef..34e3276ea 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -1277,28 +1277,72 @@ function formatRuntimeElementLine(element: RuntimeElementV1, action?: string): s ].join('|'); } -function isLikelyRuntimeTarget(element: RuntimeElementV1): boolean { +function formatSuppressedRuntimeEvidenceLine(element: RuntimeElementV1): string { + return [ + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function getSuppressedRuntimeTargetRefs(hints?: RenderHints): Set { + return new Set(hints?.runtimeSnapshot?.suppressedTargetRefs ?? []); +} + +function hasRuntimeTextEvidence(element: RuntimeElementV1): boolean { + return ( + compactRuntimeSnapshotText(element.label).length > 0 || + compactRuntimeSnapshotText(element.value).length > 0 + ); +} + +function isLikelyRuntimeTarget( + element: RuntimeElementV1, + suppressedTargetRefs: ReadonlySet = new Set(), +): boolean { return ( + !suppressedTargetRefs.has(element.ref) && !isHiddenRuntimeTarget(element) && element.actions.some((action) => action === 'tap' || action === 'typeText') ); } +function isSuppressedRuntimeTextEvidenceElement( + element: RuntimeElementV1, + suppressedTargetRefs: ReadonlySet, +): boolean { + return ( + suppressedTargetRefs.has(element.ref) && + element.state?.visible !== false && + !isHiddenRuntimeTarget(element) && + !isLowPriorityRuntimeTarget(element) && + hasRuntimeTextEvidence(element) + ); +} + function isScrollableRuntimeArea(element: RuntimeElementV1): boolean { return element.actions.includes('swipeWithin') && !isLikelyRuntimeTarget(element); } -function countLikelyRuntimeTargets(snapshot: RuntimeSnapshotV1): number { - return snapshot.elements.filter(isLikelyRuntimeTarget).length; +function countLikelyRuntimeTargets( + snapshot: RuntimeSnapshotV1, + suppressedTargetRefs: ReadonlySet = new Set(), +): number { + return snapshot.elements.filter((element) => isLikelyRuntimeTarget(element, suppressedTargetRefs)) + .length; } function countScrollableRuntimeAreas(snapshot: RuntimeSnapshotV1): number { return snapshot.elements.filter(isScrollableRuntimeArea).length; } -function createRuntimeSnapshotTargetsSection(snapshot: RuntimeSnapshotV1): SectionTextBlock { +function createRuntimeSnapshotTargetsSection( + snapshot: RuntimeSnapshotV1, + suppressedTargetRefs: ReadonlySet = new Set(), +): SectionTextBlock { const likelyTargets = sortRuntimeTargetsForDisplay( - snapshot.elements.filter(isLikelyRuntimeTarget), + snapshot.elements.filter((element) => isLikelyRuntimeTarget(element, suppressedTargetRefs)), ); const lines = likelyTargets.map((element) => formatRuntimeElementLine(element)); @@ -1308,6 +1352,23 @@ function createRuntimeSnapshotTargetsSection(snapshot: RuntimeSnapshotV1): Secti ); } +function createRuntimeSnapshotEvidenceSection( + snapshot: RuntimeSnapshotV1, + suppressedTargetRefs: ReadonlySet, +): SectionTextBlock | null { + const evidenceElements = snapshot.elements.filter((element) => + isSuppressedRuntimeTextEvidenceElement(element, suppressedTargetRefs), + ); + if (evidenceElements.length === 0) { + return null; + } + + return createSection( + `Evidence (${evidenceElements.length}) — role|label|value|id`, + evidenceElements.map((element) => formatSuppressedRuntimeEvidenceLine(element)), + ); +} + function createRuntimeSnapshotScrollAreasSection( snapshot: RuntimeSnapshotV1, ): SectionTextBlock | null { @@ -1500,13 +1561,18 @@ function createCaptureResultItems( if (isRuntimeSnapshot) { const snapshot = result.capture as RuntimeSnapshotV1; - const likelyTargetCount = countLikelyRuntimeTargets(snapshot); + const suppressedTargetRefs = getSuppressedRuntimeTargetRefs(hints); + const likelyTargetCount = countLikelyRuntimeTargets(snapshot, suppressedTargetRefs); const scrollAreaCount = countScrollableRuntimeAreas(snapshot); + const evidenceSection = createRuntimeSnapshotEvidenceSection(snapshot, suppressedTargetRefs); const scrollAreasSection = createRuntimeSnapshotScrollAreasSection(snapshot); if (title === 'Wait for UI' && result.waitMatch) { items.push(createWaitMatchSection(result.waitMatch)); } - items.push(createRuntimeSnapshotTargetsSection(snapshot)); + items.push(createRuntimeSnapshotTargetsSection(snapshot, suppressedTargetRefs)); + if (evidenceSection) { + items.push(evidenceSection); + } if (scrollAreasSection) { items.push(scrollAreasSection); } @@ -2340,6 +2406,7 @@ function createSpecialCaseItems( const headerTitleMap: Record = { tap: 'Tap', swipe: 'Swipe', + drag: 'Drag', touch: 'Touch', 'long-press': 'Long Press', button: 'Button', @@ -2375,6 +2442,16 @@ function createSpecialCaseItems( `${durationText} simulated successfully.`; break; } + case 'drag': { + const durationText = + typeof result.action.durationSeconds === 'number' + ? ` duration=${result.action.durationSeconds}s` + : ''; + successMessage = + `Drag ${result.action.direction} from elementRef ${result.action.elementRef}` + + `${durationText} simulated successfully.`; + break; + } case 'touch': successMessage = `Touch event (${result.action.event ?? 'touch'}) on elementRef ${result.action.elementRef} executed successfully.`; break; diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts index d1b681244..ddfe003c8 100644 --- a/src/utils/structured-output-envelope.ts +++ b/src/utils/structured-output-envelope.ts @@ -22,6 +22,7 @@ export interface StructuredEnvelopeOptions { nextStepRuntime?: RuntimeKind; outputStyle?: OutputStyle; runtimeSnapshot?: RuntimeSnapshotEnvelopeMode; + runtimeSnapshotSuppressedTargetRefs?: readonly string[]; } type RuntimeSnapshotCompactCapture = { @@ -33,6 +34,7 @@ type RuntimeSnapshotCompactCapture = { targets: string[]; scroll: string[]; text?: string[]; + evidence?: string[]; udid: string; }; @@ -49,6 +51,7 @@ const MINIMAL_DATA_PRUNE_KEYS = ['request'] as const; const COMPACT_RUNTIME_TARGET_LIMIT = 64; const COMPACT_RUNTIME_SCROLL_LIMIT = 32; const COMPACT_RUNTIME_TEXT_LIMIT = 64; +const COMPACT_RUNTIME_EVIDENCE_LIMIT = 64; const HIDDEN_RUNTIME_TARGET_LABELS = new Set(['sheet grabber']); const LOW_PRIORITY_RUNTIME_TARGET_LABELS = new Set([ 'sheet grabber', @@ -189,6 +192,15 @@ function compactRuntimeElementRow(element: RuntimeElementV1, action: string): st ].join('|'); } +function compactSuppressedRuntimeEvidenceRow(element: RuntimeElementV1): string { + return [ + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNameV1 | 'none' { if (element.actions.includes('typeText')) { return 'typeText'; @@ -202,21 +214,52 @@ function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNa return 'none'; } +function hasRuntimeTextEvidence(element: RuntimeElementV1): boolean { + return ( + compactRuntimeSnapshotText(element.label).length > 0 || + compactRuntimeSnapshotText(element.value).length > 0 + ); +} + function isRuntimeTextSummaryElement(element: RuntimeElementV1): boolean { return ( - element.role === 'text' && + element.role === 'text' && element.state?.visible !== false && hasRuntimeTextEvidence(element) + ); +} + +function isSuppressedRuntimeTextEvidenceElement( + element: RuntimeElementV1, + suppressedTargetRefs: ReadonlySet, +): boolean { + return ( + suppressedTargetRefs.has(element.ref) && element.state?.visible !== false && - (compactRuntimeSnapshotText(element.label).length > 0 || - compactRuntimeSnapshotText(element.value).length > 0) + !isHiddenRuntimeTarget(element) && + !isLowPriorityRuntimeTarget(element) && + hasRuntimeTextEvidence(element) ); } +function uniqueRuntimeElements(elements: RuntimeElementV1[]): RuntimeElementV1[] { + const seenRefs = new Set(); + return elements.filter((element) => { + if (seenRefs.has(element.ref)) { + return false; + } + seenRefs.add(element.ref); + return true; + }); +} + function toRuntimeSnapshotCompactCapture( snapshot: RuntimeSnapshotV1, + options: { suppressedTargetRefs?: readonly string[] } = {}, ): RuntimeSnapshotCompactCapture { + const suppressedTargetRefs = new Set(options.suppressedTargetRefs ?? []); const targets = sortRuntimeTargetsForDisplay( snapshot.elements.filter( (element) => + !suppressedTargetRefs.has(element.ref) && !isHiddenRuntimeTarget(element) && (element.actions.includes('tap') || element.actions.includes('typeText')), ), @@ -235,9 +278,22 @@ function toRuntimeSnapshotCompactCapture( ) .slice(0, COMPACT_RUNTIME_SCROLL_LIMIT) .map((element) => compactRuntimeElementRow(element, 'swipe')); - const text = sortRuntimeTextForDisplay(snapshot.elements.filter(isRuntimeTextSummaryElement)) + const suppressedTextEvidence = sortRuntimeTextForDisplay( + snapshot.elements.filter((element) => + isSuppressedRuntimeTextEvidenceElement(element, suppressedTargetRefs), + ), + ); + const ordinaryTextEvidence = sortRuntimeTextForDisplay( + snapshot.elements.filter( + (element) => !suppressedTargetRefs.has(element.ref) && isRuntimeTextSummaryElement(element), + ), + ); + const text = uniqueRuntimeElements(ordinaryTextEvidence) .slice(0, COMPACT_RUNTIME_TEXT_LIMIT) .map((element) => compactRuntimeElementRow(element, 'text')); + const evidence = uniqueRuntimeElements(suppressedTextEvidence) + .slice(0, COMPACT_RUNTIME_EVIDENCE_LIMIT) + .map(compactSuppressedRuntimeEvidenceRow); return { type: 'runtime-snapshot', @@ -248,6 +304,7 @@ function toRuntimeSnapshotCompactCapture( targets, scroll, ...(text.length > 0 ? { text } : {}), + ...(evidence.length > 0 ? { evidence } : {}), udid: snapshot.simulatorId, }; } @@ -316,7 +373,12 @@ function projectRuntimeSnapshotData( if (isRuntimeSnapshotCapture(dataWithCapture.capture)) { projectedData = { ...dataWithCapture, - capture: toRuntimeSnapshotCompactCapture(dataWithCapture.capture), + capture: toRuntimeSnapshotCompactCapture( + dataWithCapture.capture, + options.runtimeSnapshotSuppressedTargetRefs + ? { suppressedTargetRefs: options.runtimeSnapshotSuppressedTargetRefs } + : {}, + ), }; } else if (isRuntimeSnapshotUnchangedCapture(dataWithCapture.capture)) { projectedData = { diff --git a/src/utils/tool-registry.ts b/src/utils/tool-registry.ts index afd5910fb..56cca768a 100644 --- a/src/utils/tool-registry.ts +++ b/src/utils/tool-registry.ts @@ -32,6 +32,8 @@ function buildStructuredContent( return undefined; } + const suppressedTargetRefs = structuredOutput.renderHints?.runtimeSnapshot?.suppressedTargetRefs; + return toStructuredEnvelope( structuredOutput.result, structuredOutput.schema, @@ -40,6 +42,9 @@ function buildStructuredContent( nextSteps: session.getNextSteps?.(), nextStepRuntime: 'mcp', outputStyle: 'minimal', + ...(suppressedTargetRefs + ? { runtimeSnapshotSuppressedTargetRefs: suppressedTargetRefs } + : {}), }, ); }