From f706dedcb0b072cb9058bfa240a7f4e73d29ee47 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Tue, 12 May 2026 17:38:19 -0500 Subject: [PATCH] Pane(fix[wait_for_text]): anchor on baseline so stale scrollback no longer matches (#45) why: wait_for_text returned found=True on the first poll whenever the pattern was already present in the pane when the call began, so agents using the tool to synchronise on command output saw the wrong result. The sibling wait_for_content_change already snapshots a baseline; wait_for_text now mirrors that pattern. what: - Snapshot (history_size, cursor_y) at entry; compute baseline_abs as the absolute grid index. Each poll re-reads history_size and captures from baseline_abs - hs_now + 1 onward, so the matched region tracks the pane's grid even as content scrolls into history. - Add _read_grid_position and _read_history_size helpers that expand the corresponding tmux format strings via display-message. - Drop content_start / content_end parameters; the baseline anchor supersedes them. Pre-alpha API contract makes this a clean drop. - Rewrite docstring: new "wait for new appearance" semantics, cross- reference search_panes for synchronous matches, sentinel-marker pattern as adversarial-safety guidance, and edge-case notes for scrollback truncation, reverse-index sequences, and clear/reset. - Restructure WAIT_FOR_TEXT_FIXTURES with separate pre_command and during_command fields; add stale_scrollback_does_not_match regression test. - CHANGES: ### Breaking changes entry for the parameter drop (with before/after migration block) and ### Fixes entry for the stale-scrollback behaviour. --- CHANGES | 20 ++++ src/libtmux_mcp/tools/pane_tools/wait.py | 106 +++++++++++++++++---- tests/test_pane_tools.py | 114 +++++++++++++++++++++-- 3 files changed, 214 insertions(+), 26 deletions(-) diff --git a/CHANGES b/CHANGES index 393d3e25..08119ce2 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,26 @@ _Notes on upcoming releases will be added here_ +### Breaking changes + +**{tooliconl}`wait-for-text` drops `content_start` and `content_end`** + +The baseline anchor introduced in this release supersedes the manual capture-range knobs: capture now follows the pane's grid position automatically, so the two parameters had no remaining purpose. Agents that named them should drop them from their call sites. (#45) + +```python +# Before +wait_for_text(pattern="OK", content_start=-100) + +# After +wait_for_text(pattern="OK") +``` + +### Fixes + +**{tooliconl}`wait-for-text` now waits for *new* output, not stale scrollback** + +Previously {tooliconl}`wait-for-text` returned `found=True` on the first poll if the pattern was already present anywhere in the pane when the call began. It now snapshots `#{history_size} + #{cursor_y}` at entry and only matches lines written below that baseline, so agents using the tool to synchronise on command output see the result they expected. For the synchronous "is the pattern in the pane right now?" use case, call {tooliconl}`search-panes` instead. (#45) + ## libtmux-mcp 0.1.0a6 (2026-05-09) libtmux-mcp 0.1.0a6 is the activation and registration cleanup release. It makes the server much easier for MCP clients to discover from ordinary "pane", "window", and "session" prompts, standardizes new setup docs around the `tmux` registration slug, and adds migration guidance for existing `libtmux` registrations. Existing installs keep working; the release changes defaults and documentation so new installs line up with the tool prefix users actually see. diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 52767ce7..1ddb5652 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -22,6 +22,9 @@ WaitForTextResult, ) +if t.TYPE_CHECKING: + from libtmux.pane import Pane + logger = logging.getLogger(__name__) #: Exceptions that indicate "client transport is gone, keep polling". @@ -96,6 +99,34 @@ async def _maybe_log( return +def _read_grid_position(pane: Pane) -> int: + """Return ``history_size + cursor_y`` for ``pane``. + + The result is an absolute grid index that remains stable as new + content scrolls the cursor row into history. Used by + ``wait_for_text`` to anchor a baseline before polling so stale + scrollback no longer matches; see issue #45. + + Uses ``:`` as the format separator because both values are + integers and ``:`` cannot appear in their stringification. + """ + result = pane.cmd("display-message", "-p", "#{history_size}:#{cursor_y}") + raw = result.stdout[0] if result.stdout else "0:0" + hs_str, cy_str = raw.split(":", 1) + return int(hs_str) + int(cy_str) + + +def _read_history_size(pane: Pane) -> int: + """Return the current ``#{history_size}`` for ``pane``. + + Read on every poll tick by ``wait_for_text`` because tmux's + ``-S`` is relative to the live ``hsize``, which grows as lines + scroll out of the visible region. + """ + result = pane.cmd("display-message", "-p", "#{history_size}") + return int(result.stdout[0]) if result.stdout else 0 + + @handle_tool_errors_async async def wait_for_text( pattern: str, @@ -107,16 +138,30 @@ async def wait_for_text( timeout: float = 8.0, interval: float = 0.05, match_case: bool = False, - content_start: int | None = None, - content_end: int | None = None, socket_name: str | None = None, ctx: Context | None = None, ) -> WaitForTextResult: - """Wait for text to appear in a tmux pane. - - Polls the pane content at regular intervals until the pattern is found - or the timeout is reached. Use this instead of polling capture_pane - manually — it saves agent tokens and turns. + r"""Wait for NEW text to appear in a tmux pane. + + Polls the pane at regular intervals until ``pattern`` appears on a + line written *after* the call starts, or the timeout is reached. + Use this instead of polling :func:`capture_pane` manually — it + saves agent tokens and turns. + + **What "new" means.** At entry the tool snapshots the pane's absolute + grid position (``history_size + cursor_y``) and only matches lines + written below that baseline. Stale scrollback that was already + present when the call began is ignored. For the synchronous "is + the pattern in the pane right now?" check, call + {tooliconl}`search-panes` instead. + + **Adversarial-safety pattern.** If you cannot trust that the + pattern only appears after your action — for example because the + pane prints recurring prompts, log lines, or output from background + processes you do not control — bracket your command with a unique + sentinel: ``cmd; echo __WAIT_$RANDOM__`` and wait for the sentinel + instead of ``cmd``'s natural output. tmux's grid model cannot + distinguish "your output" from "theirs"; the sentinel can. When a :class:`fastmcp.Context` is available, this tool emits periodic ``ctx.report_progress`` notifications so MCP clients can @@ -147,10 +192,6 @@ async def wait_for_text( Seconds between polls. Default 0.05 (50ms). match_case : bool Whether to match case. Default False (case-insensitive). - content_start : int, optional - Start line for capture. Negative values reach into scrollback. - content_end : int, optional - End line for capture. socket_name : str, optional tmux socket name. ctx : fastmcp.Context, optional @@ -164,6 +205,24 @@ async def wait_for_text( Notes ----- + **Scrollback truncation.** If ``history-limit`` is small and the + baseline line rolls out of history during the wait, tmux clips + ``-S`` to the oldest available line (``cmd-capture-pane.c``); the + worst case degrades to pre-baseline behaviour on the surviving + portion of history rather than an infinite false-match loop. + + **Reverse-index sequences (``\\eM``).** Programs that rewrite + history below the baseline can theoretically re-introduce stale + text into the captured range. This is rare on the main screen + because pagers (``less``, ``more``) and other heavy users run on + the alternate screen, which has a fresh grid and does not + interact with the baseline. + + **``clear`` / ``reset``.** With the default ``scroll-on-clear`` + option, cleared content scrolls into history (``screen-write.c`` + ``screen_write_clearscreen``), so the baseline anchor is + unaffected. + **Safety tier.** Tagged ``readonly`` because the tool observes pane state without mutating it. Readonly clients may therefore block for the caller-supplied ``timeout`` (default 8 s, caller @@ -193,6 +252,17 @@ async def wait_for_text( ) assert pane.pane_id is not None + + # Snapshot the pane's absolute grid position before polling. ``hs0 + + # cy0`` is invariant under subsequent scrolling — tmux's ``-S`` is + # relative to the live ``hsize`` at capture time + # (cmd-capture-pane.c: ``top = gd->hsize + n``), so re-reading + # ``hsize`` each tick and computing ``baseline_abs - hsize_now + 1`` + # always points at "the first line written after entry". Mirrors + # the snapshot-before-loop pattern in ``wait_for_content_change`` + # below; see issue #45. + baseline_abs = await asyncio.to_thread(_read_grid_position, pane) + matched_lines: list[str] = [] start_time = time.monotonic() deadline = start_time + timeout @@ -208,12 +278,16 @@ async def wait_for_text( message=f"Polling pane {pane.pane_id} for pattern", ) - # FastMCP direct-awaits async tools on the main event loop; the - # libtmux capture_pane call is a blocking subprocess.run. Push - # to the default executor so concurrent tool calls are not - # starved during long waits. + # FastMCP direct-awaits async tools on the main event loop; + # the libtmux display-message + capture_pane calls are both + # blocking subprocess.run. Push to the default executor so + # concurrent tool calls are not starved during long waits. + hs_now = await asyncio.to_thread(_read_history_size, pane) + # ``+ 1`` skips the baseline line itself so we don't + # re-match the row the cursor sat on at entry. + start_line = baseline_abs - hs_now + 1 lines = await asyncio.to_thread( - pane.capture_pane, start=content_start, end=content_end + pane.capture_pane, start=start_line, end=None ) hits = [line for line in lines if compiled.search(line)] if hits: diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 18d395f5..1a148a49 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1128,23 +1128,35 @@ class WaitForTextFixture(t.NamedTuple): """Test fixture for wait_for_text.""" test_id: str - command: str | None + #: Command sent BEFORE ``wait_for_text`` is called. Its output is + #: expected to be present in the pane scrollback (and therefore + #: above the baseline) by the time the wait begins. Used to verify + #: that stale scrollback no longer matches (#45). The positive + #: "text appears after baseline" case lives in + #: ``test_wait_for_text_matches_new_output_after_baseline`` rather + #: than this fixture because it needs ``asyncio.gather`` to + #: coordinate emission against the running poll loop — synchronous + #: setup races the shell's enter-processing on CI and shifts the + #: baseline past single-line output. + pre_command: str | None pattern: str timeout: float expected_found: bool WAIT_FOR_TEXT_FIXTURES: list[WaitForTextFixture] = [ + # Regression for #45: pre-existing scrollback must NOT match. WaitForTextFixture( - test_id="text_found", - command="echo WAIT_MARKER_abc123", - pattern="WAIT_MARKER_abc123", - timeout=2.0, - expected_found=True, + test_id="stale_scrollback_does_not_match", + pre_command="echo WAIT_MARKER_stale", + pattern="WAIT_MARKER_stale", + timeout=0.5, + expected_found=False, ), + # Genuinely absent pattern still times out cleanly. WaitForTextFixture( test_id="timeout_not_found", - command=None, + pre_command=None, pattern="NEVER_EXISTS_xyz999", timeout=0.3, expected_found=False, @@ -1161,7 +1173,7 @@ def test_wait_for_text( mcp_server: Server, mcp_pane: Pane, test_id: str, - command: str | None, + pre_command: str | None, pattern: str, timeout: float, expected_found: bool, @@ -1169,8 +1181,48 @@ def test_wait_for_text( """wait_for_text polls pane content for a pattern.""" import asyncio - if command is not None: - mcp_pane.send_keys(command, enter=True) + if pre_command is not None: + mcp_pane.send_keys(pre_command, enter=True) + # Wait until the pane has fully settled before measuring the + # baseline. "Settled" means: + # + # (a) the OUTPUT line is present — ``line.strip() == pattern``, + # distinguishing the shell's actual output from the typed + # echo line that contains ``pattern`` as a substring (and + # which would otherwise trip a naive ``pattern in capture`` + # predicate while keys are still buffered pre-enter), and + # (b) ``(history_size, cursor_y)`` is unchanged across two + # consecutive polls — zsh prints async prompt-redraw + # lines (vcs_info, precmd hooks) some milliseconds after + # the initial prompt, and those redraws keep growing + # hsize *during* ``wait_for_text``'s window, pulling + # pre-baseline rows back into the visible-relative + # ``start_line`` capture. Waiting them out anchors the + # baseline below all async output. + # + # A fixed ``time.sleep`` would do the same job but couples the + # test to a wall-clock value (the project's idiom for + # tmux-state waits is ``retry_until`` — used throughout this + # file). + last_state: tuple[int, int] = (-1, -1) + + def _stale_settled() -> bool: + nonlocal last_state + raw = mcp_pane.cmd( + "display-message", "-p", "#{history_size}:#{cursor_y}" + ).stdout + if not raw: + return False + hs_str, cy_str = raw[0].split(":", 1) + state = (int(hs_str), int(cy_str)) + has_output_line = any( + line.strip() == pattern for line in mcp_pane.capture_pane() + ) + settled = state == last_state and has_output_line + last_state = state + return settled + + retry_until(_stale_settled, 2, raises=True) result = asyncio.run( wait_for_text( @@ -1190,6 +1242,48 @@ def test_wait_for_text( assert len(result.matched_lines) >= 1 +def test_wait_for_text_matches_new_output_after_baseline( + mcp_server: Server, mcp_pane: Pane +) -> None: + """wait_for_text finds output written AFTER its baseline snapshot. + + Coordinates the marker emission against the running poll loop via + :func:`asyncio.gather` so ``send_keys`` is guaranteed to fire + *after* :func:`wait_for_text` has captured its baseline. Without + that coordination the test races the shell's enter-processing — + if the shell advances the cursor before the baseline read on CI, + ``start_line`` shifts past the single-line marker and the poll + loop misses it (the failure mode that took the original + synchronous ``send_keys`` + ``asyncio.run`` shape to all six tmux + matrix slots on PR #47 commit aa8de89). + """ + import asyncio + + async def emit_after_baseline() -> None: + # The baseline read is a single display-message round trip + # (<5 ms in practice); 0.2 s gives wait_for_text plenty of + # headroom to lock the baseline before the marker fires. + await asyncio.sleep(0.2) + await asyncio.to_thread(mcp_pane.send_keys, "echo WAIT_MARKER_after", True) + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="WAIT_MARKER_after", + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await emit_after_baseline() + return await wait_task + + result = asyncio.run(run()) + assert result.found is True + assert result.timed_out is False + assert any("WAIT_MARKER_after" in line for line in result.matched_lines) + + def test_wait_for_text_invalid_regex(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text raises ToolError on invalid regex when regex=True.""" import asyncio