From ee94d6ae772f9b18b0b00e3c89aee294bc437180 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 19:56:48 -0500 Subject: [PATCH 01/31] Pane(fix[wait_for_text]): anchor on baseline so stale scrollback no longer matches (#45) why: wait_for_text returned found=True on the first poll whenever the pattern was already present in the pane when the call began, so agents using it to synchronise on command output saw the wrong result. The sibling wait_for_content_change already snapshots a baseline; wait_for_text now mirrors that pattern. what: - Snapshot (history_size, cursor_y) at entry; compute baseline_abs as the absolute grid index. Each poll re-reads history_size and captures from baseline_abs - hs_now + 1 onward, so the matched region tracks the pane's grid even as content scrolls into history. - Add _read_grid_position and _read_history_size helpers that expand the corresponding tmux format strings via display-message. - Drop content_start / content_end parameters; the baseline anchor supersedes them. Pre-alpha API contract makes this a clean drop. - Rewrite the docstring around the new "wait for new appearance" semantics, cross-reference search_panes for synchronous matches, and document edge cases (scrollback truncation, reverse-index sequences, clear/reset). --- src/libtmux_mcp/tools/pane_tools/wait.py | 106 +++++++++++++++++++---- 1 file changed, 90 insertions(+), 16 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 52767ce7..1ddb5652 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -22,6 +22,9 @@ WaitForTextResult, ) +if t.TYPE_CHECKING: + from libtmux.pane import Pane + logger = logging.getLogger(__name__) #: Exceptions that indicate "client transport is gone, keep polling". @@ -96,6 +99,34 @@ async def _maybe_log( return +def _read_grid_position(pane: Pane) -> int: + """Return ``history_size + cursor_y`` for ``pane``. + + The result is an absolute grid index that remains stable as new + content scrolls the cursor row into history. Used by + ``wait_for_text`` to anchor a baseline before polling so stale + scrollback no longer matches; see issue #45. + + Uses ``:`` as the format separator because both values are + integers and ``:`` cannot appear in their stringification. + """ + result = pane.cmd("display-message", "-p", "#{history_size}:#{cursor_y}") + raw = result.stdout[0] if result.stdout else "0:0" + hs_str, cy_str = raw.split(":", 1) + return int(hs_str) + int(cy_str) + + +def _read_history_size(pane: Pane) -> int: + """Return the current ``#{history_size}`` for ``pane``. + + Read on every poll tick by ``wait_for_text`` because tmux's + ``-S`` is relative to the live ``hsize``, which grows as lines + scroll out of the visible region. + """ + result = pane.cmd("display-message", "-p", "#{history_size}") + return int(result.stdout[0]) if result.stdout else 0 + + @handle_tool_errors_async async def wait_for_text( pattern: str, @@ -107,16 +138,30 @@ async def wait_for_text( timeout: float = 8.0, interval: float = 0.05, match_case: bool = False, - content_start: int | None = None, - content_end: int | None = None, socket_name: str | None = None, ctx: Context | None = None, ) -> WaitForTextResult: - """Wait for text to appear in a tmux pane. - - Polls the pane content at regular intervals until the pattern is found - or the timeout is reached. Use this instead of polling capture_pane - manually — it saves agent tokens and turns. + r"""Wait for NEW text to appear in a tmux pane. + + Polls the pane at regular intervals until ``pattern`` appears on a + line written *after* the call starts, or the timeout is reached. + Use this instead of polling :func:`capture_pane` manually — it + saves agent tokens and turns. + + **What "new" means.** At entry the tool snapshots the pane's absolute + grid position (``history_size + cursor_y``) and only matches lines + written below that baseline. Stale scrollback that was already + present when the call began is ignored. For the synchronous "is + the pattern in the pane right now?" check, call + {tooliconl}`search-panes` instead. + + **Adversarial-safety pattern.** If you cannot trust that the + pattern only appears after your action — for example because the + pane prints recurring prompts, log lines, or output from background + processes you do not control — bracket your command with a unique + sentinel: ``cmd; echo __WAIT_$RANDOM__`` and wait for the sentinel + instead of ``cmd``'s natural output. tmux's grid model cannot + distinguish "your output" from "theirs"; the sentinel can. When a :class:`fastmcp.Context` is available, this tool emits periodic ``ctx.report_progress`` notifications so MCP clients can @@ -147,10 +192,6 @@ async def wait_for_text( Seconds between polls. Default 0.05 (50ms). match_case : bool Whether to match case. Default False (case-insensitive). - content_start : int, optional - Start line for capture. Negative values reach into scrollback. - content_end : int, optional - End line for capture. socket_name : str, optional tmux socket name. ctx : fastmcp.Context, optional @@ -164,6 +205,24 @@ async def wait_for_text( Notes ----- + **Scrollback truncation.** If ``history-limit`` is small and the + baseline line rolls out of history during the wait, tmux clips + ``-S`` to the oldest available line (``cmd-capture-pane.c``); the + worst case degrades to pre-baseline behaviour on the surviving + portion of history rather than an infinite false-match loop. + + **Reverse-index sequences (``\\eM``).** Programs that rewrite + history below the baseline can theoretically re-introduce stale + text into the captured range. This is rare on the main screen + because pagers (``less``, ``more``) and other heavy users run on + the alternate screen, which has a fresh grid and does not + interact with the baseline. + + **``clear`` / ``reset``.** With the default ``scroll-on-clear`` + option, cleared content scrolls into history (``screen-write.c`` + ``screen_write_clearscreen``), so the baseline anchor is + unaffected. + **Safety tier.** Tagged ``readonly`` because the tool observes pane state without mutating it. Readonly clients may therefore block for the caller-supplied ``timeout`` (default 8 s, caller @@ -193,6 +252,17 @@ async def wait_for_text( ) assert pane.pane_id is not None + + # Snapshot the pane's absolute grid position before polling. ``hs0 + + # cy0`` is invariant under subsequent scrolling — tmux's ``-S`` is + # relative to the live ``hsize`` at capture time + # (cmd-capture-pane.c: ``top = gd->hsize + n``), so re-reading + # ``hsize`` each tick and computing ``baseline_abs - hsize_now + 1`` + # always points at "the first line written after entry". Mirrors + # the snapshot-before-loop pattern in ``wait_for_content_change`` + # below; see issue #45. + baseline_abs = await asyncio.to_thread(_read_grid_position, pane) + matched_lines: list[str] = [] start_time = time.monotonic() deadline = start_time + timeout @@ -208,12 +278,16 @@ async def wait_for_text( message=f"Polling pane {pane.pane_id} for pattern", ) - # FastMCP direct-awaits async tools on the main event loop; the - # libtmux capture_pane call is a blocking subprocess.run. Push - # to the default executor so concurrent tool calls are not - # starved during long waits. + # FastMCP direct-awaits async tools on the main event loop; + # the libtmux display-message + capture_pane calls are both + # blocking subprocess.run. Push to the default executor so + # concurrent tool calls are not starved during long waits. + hs_now = await asyncio.to_thread(_read_history_size, pane) + # ``+ 1`` skips the baseline line itself so we don't + # re-match the row the cursor sat on at entry. + start_line = baseline_abs - hs_now + 1 lines = await asyncio.to_thread( - pane.capture_pane, start=content_start, end=content_end + pane.capture_pane, start=start_line, end=None ) hits = [line for line in lines if compiled.search(line)] if hits: From 24d732e4d117cccdadd3e22f6d1a6c3ab63a7f42 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 19:56:55 -0500 Subject: [PATCH 02/31] Pane(test[wait_for_text]): regress stale-scrollback no-match contract (#45) why: The baseline-anchor fix changes the user-visible contract for wait_for_text: a pattern already in the pane before the call must now return found=False, not the stale match the old code produced. The test suite needs to encode that contract so a future regression lands red. what: - Restructure WAIT_FOR_TEXT_FIXTURES with separate pre_command and during_command fields so fixtures can model "pattern was already on screen when wait_for_text started." - Add stale_scrollback_does_not_match: emit the pattern before wait_for_text starts and assert found=False on timeout. - Add matches_new_output_after_baseline: start the wait task, sleep, emit the marker via send_keys, assert found=True. Covers the positive path the baseline anchor was designed for. --- tests/test_pane_tools.py | 114 +++++++++++++++++++++++++++++++++++---- 1 file changed, 104 insertions(+), 10 deletions(-) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 18d395f5..1a148a49 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1128,23 +1128,35 @@ class WaitForTextFixture(t.NamedTuple): """Test fixture for wait_for_text.""" test_id: str - command: str | None + #: Command sent BEFORE ``wait_for_text`` is called. Its output is + #: expected to be present in the pane scrollback (and therefore + #: above the baseline) by the time the wait begins. Used to verify + #: that stale scrollback no longer matches (#45). The positive + #: "text appears after baseline" case lives in + #: ``test_wait_for_text_matches_new_output_after_baseline`` rather + #: than this fixture because it needs ``asyncio.gather`` to + #: coordinate emission against the running poll loop — synchronous + #: setup races the shell's enter-processing on CI and shifts the + #: baseline past single-line output. + pre_command: str | None pattern: str timeout: float expected_found: bool WAIT_FOR_TEXT_FIXTURES: list[WaitForTextFixture] = [ + # Regression for #45: pre-existing scrollback must NOT match. WaitForTextFixture( - test_id="text_found", - command="echo WAIT_MARKER_abc123", - pattern="WAIT_MARKER_abc123", - timeout=2.0, - expected_found=True, + test_id="stale_scrollback_does_not_match", + pre_command="echo WAIT_MARKER_stale", + pattern="WAIT_MARKER_stale", + timeout=0.5, + expected_found=False, ), + # Genuinely absent pattern still times out cleanly. WaitForTextFixture( test_id="timeout_not_found", - command=None, + pre_command=None, pattern="NEVER_EXISTS_xyz999", timeout=0.3, expected_found=False, @@ -1161,7 +1173,7 @@ def test_wait_for_text( mcp_server: Server, mcp_pane: Pane, test_id: str, - command: str | None, + pre_command: str | None, pattern: str, timeout: float, expected_found: bool, @@ -1169,8 +1181,48 @@ def test_wait_for_text( """wait_for_text polls pane content for a pattern.""" import asyncio - if command is not None: - mcp_pane.send_keys(command, enter=True) + if pre_command is not None: + mcp_pane.send_keys(pre_command, enter=True) + # Wait until the pane has fully settled before measuring the + # baseline. "Settled" means: + # + # (a) the OUTPUT line is present — ``line.strip() == pattern``, + # distinguishing the shell's actual output from the typed + # echo line that contains ``pattern`` as a substring (and + # which would otherwise trip a naive ``pattern in capture`` + # predicate while keys are still buffered pre-enter), and + # (b) ``(history_size, cursor_y)`` is unchanged across two + # consecutive polls — zsh prints async prompt-redraw + # lines (vcs_info, precmd hooks) some milliseconds after + # the initial prompt, and those redraws keep growing + # hsize *during* ``wait_for_text``'s window, pulling + # pre-baseline rows back into the visible-relative + # ``start_line`` capture. Waiting them out anchors the + # baseline below all async output. + # + # A fixed ``time.sleep`` would do the same job but couples the + # test to a wall-clock value (the project's idiom for + # tmux-state waits is ``retry_until`` — used throughout this + # file). + last_state: tuple[int, int] = (-1, -1) + + def _stale_settled() -> bool: + nonlocal last_state + raw = mcp_pane.cmd( + "display-message", "-p", "#{history_size}:#{cursor_y}" + ).stdout + if not raw: + return False + hs_str, cy_str = raw[0].split(":", 1) + state = (int(hs_str), int(cy_str)) + has_output_line = any( + line.strip() == pattern for line in mcp_pane.capture_pane() + ) + settled = state == last_state and has_output_line + last_state = state + return settled + + retry_until(_stale_settled, 2, raises=True) result = asyncio.run( wait_for_text( @@ -1190,6 +1242,48 @@ def test_wait_for_text( assert len(result.matched_lines) >= 1 +def test_wait_for_text_matches_new_output_after_baseline( + mcp_server: Server, mcp_pane: Pane +) -> None: + """wait_for_text finds output written AFTER its baseline snapshot. + + Coordinates the marker emission against the running poll loop via + :func:`asyncio.gather` so ``send_keys`` is guaranteed to fire + *after* :func:`wait_for_text` has captured its baseline. Without + that coordination the test races the shell's enter-processing — + if the shell advances the cursor before the baseline read on CI, + ``start_line`` shifts past the single-line marker and the poll + loop misses it (the failure mode that took the original + synchronous ``send_keys`` + ``asyncio.run`` shape to all six tmux + matrix slots on PR #47 commit aa8de89). + """ + import asyncio + + async def emit_after_baseline() -> None: + # The baseline read is a single display-message round trip + # (<5 ms in practice); 0.2 s gives wait_for_text plenty of + # headroom to lock the baseline before the marker fires. + await asyncio.sleep(0.2) + await asyncio.to_thread(mcp_pane.send_keys, "echo WAIT_MARKER_after", True) + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="WAIT_MARKER_after", + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await emit_after_baseline() + return await wait_task + + result = asyncio.run(run()) + assert result.found is True + assert result.timed_out is False + assert any("WAIT_MARKER_after" in line for line in result.matched_lines) + + def test_wait_for_text_invalid_regex(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text raises ToolError on invalid regex when regex=True.""" import asyncio From 7cb0525f523640883163e48c3fd81020ba04da41 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 19:58:32 -0500 Subject: [PATCH 03/31] Pane(refactor[wait_for_text]): adopt Pane.display_message wrapper why: The libtmux 0.56 bump merged on trunk ships Pane.display_message as a typed wrapper around display-message, and every other tool module in this package adopted it. The two helpers introduced for baseline anchoring (_read_grid_position, _read_history_size) reached back to raw pane.cmd("display-message", ...) instead. Bringing them in line keeps the codebase consistent with the same release-notes entry that markets the wrapper migration. what: - _read_grid_position and _read_history_size call pane.display_message(fmt, get_text=True) directly; the parse logic and empty-output fallback are unchanged. - Rename the local "result" binding to "stdout" to match the call surface used in meta.py. --- src/libtmux_mcp/tools/pane_tools/wait.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 1ddb5652..cceff230 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -110,8 +110,8 @@ def _read_grid_position(pane: Pane) -> int: Uses ``:`` as the format separator because both values are integers and ``:`` cannot appear in their stringification. """ - result = pane.cmd("display-message", "-p", "#{history_size}:#{cursor_y}") - raw = result.stdout[0] if result.stdout else "0:0" + stdout = pane.display_message("#{history_size}:#{cursor_y}", get_text=True) + raw = stdout[0] if stdout else "0:0" hs_str, cy_str = raw.split(":", 1) return int(hs_str) + int(cy_str) @@ -123,8 +123,8 @@ def _read_history_size(pane: Pane) -> int: ``-S`` is relative to the live ``hsize``, which grows as lines scroll out of the visible region. """ - result = pane.cmd("display-message", "-p", "#{history_size}") - return int(result.stdout[0]) if result.stdout else 0 + stdout = pane.display_message("#{history_size}", get_text=True) + return int(stdout[0]) if stdout else 0 @handle_tool_errors_async From ee12d677d787d9336d8fc5433e44f53762a90f61 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 20:10:05 -0500 Subject: [PATCH 04/31] Pane(fix[wait_for_text]): guard against bottom-row capture clip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: capture-pane -S clips a below-visible start back to the bottom visible row (cmd-capture-pane.c, in every tmux release we support). When the cursor sits at the last visible row at entry, start_line = cy0 + 1 lands below the pane and tmux returns the bottom row's content. Stale text on that row — a fresh shell prompt, a printed marker — matches the pattern instantly, before any new output has appeared. The baseline-anchor fix that motivated this branch did not close this corner: the math is correct, but it assumed an empty capture result when start_line pointed below the visible region. what: - Add _read_pane_height alongside _read_history_size. Read pane_height once at entry; the bottom-row clip fires on entry-state, and a per-tick read costs another tmux round-trip without changing anything for the common case. - Before each capture, short-circuit lines to [] when start_line >= pane_height. Route through the existing deadline / sleep tail so the loop honours the timeout instead of tight- spinning. - tests/test_pane_tools.py: regression for #45. Respawn the pane with a sh -c that pre-fills, prints the marker without a trailing newline, and sleeps — freezing hsize and cursor_y for the wait window so the guard's first-tick contract is what's actually tested. --- src/libtmux_mcp/tools/pane_tools/wait.py | 27 ++++++++++-- tests/test_pane_tools.py | 56 ++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index cceff230..f6820bce 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -127,6 +127,18 @@ def _read_history_size(pane: Pane) -> int: return int(stdout[0]) if stdout else 0 +def _read_pane_height(pane: Pane) -> int: + """Return ``#{pane_height}`` (the visible-region row count, ``sy``). + + Read once at entry by ``wait_for_text`` to gate the bottom-row + capture clip: when the computed ``start_line`` would land below + the visible region, ``cmd-capture-pane`` clips back to the bottom + row and returns stale text. The guard short-circuits that path. + """ + stdout = pane.display_message("#{pane_height}", get_text=True) + return int(stdout[0]) if stdout else 0 + + @handle_tool_errors_async async def wait_for_text( pattern: str, @@ -262,6 +274,7 @@ async def wait_for_text( # the snapshot-before-loop pattern in ``wait_for_content_change`` # below; see issue #45. baseline_abs = await asyncio.to_thread(_read_grid_position, pane) + pane_height = await asyncio.to_thread(_read_pane_height, pane) matched_lines: list[str] = [] start_time = time.monotonic() @@ -286,9 +299,17 @@ async def wait_for_text( # ``+ 1`` skips the baseline line itself so we don't # re-match the row the cursor sat on at entry. start_line = baseline_abs - hs_now + 1 - lines = await asyncio.to_thread( - pane.capture_pane, start=start_line, end=None - ) + # ``capture-pane -S`` clips a below-visible start back to + # the bottom row (cmd-capture-pane.c, post-tmux-3.0), so a + # naive capture would return stale bottom-row text whenever + # no new rows have appeared below the cursor yet. Skip the + # capture entirely on those ticks. + if start_line >= pane_height: + lines: list[str] = [] + else: + lines = await asyncio.to_thread( + pane.capture_pane, start=start_line, end=None + ) hits = [line for line in lines if compiled.search(line)] if hits: matched_lines.extend(hits) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 1a148a49..b2ff188d 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1284,6 +1284,62 @@ async def run() -> WaitForTextResult: assert any("WAIT_MARKER_after" in line for line in result.matched_lines) +def test_wait_for_text_does_not_match_bottom_row_clip( + mcp_server: Server, mcp_pane: Pane +) -> None: + """wait_for_text must not match stale text sitting on the cursor row. + + When the cursor is at the last visible row at entry, + ``start_line = cy0 + 1`` points below the visible region and + tmux's ``capture-pane -S`` clips back to the bottom row + (``cmd-capture-pane.c``). Without the bottom-aware guard the + poll loop captures the stale cursor-row text and matches it + instantly. + + The pane is respawned with a shell-free ``sh -c`` command that + prints the marker without a trailing newline and then sleeps — + so ``hsize`` and ``cursor_y`` stay frozen for the duration of + the wait. Running this with zsh in the loop produced a + multi-line history burst on shell exit / exec that lowered + ``start_line`` below ``pane_height`` and disengaged the guard. + """ + import asyncio + + # Replace the default shell with a single sh invocation: emit + # filler rows to push the cursor to the bottom of the visible + # region, print the marker without a trailing newline so it + # stays on the cursor row, then sleep so nothing else scrolls + # into history. Fixture teardown kills the pane (and the sleep) + # at test exit. + fill_and_park = ( + "for i in $(seq 1 30); do echo filler; done; " + "printf STALE_BOTTOM_MARKER; sleep 60" + ) + mcp_pane.respawn(kill=True, shell=f"sh -c '{fill_and_park}'") + + def _bottom_row_ready() -> bool: + state = mcp_pane.display_message("#{pane_height}:#{cursor_y}", get_text=True) + if not state: + return False + sy_str, cy_str = state[0].split(":", 1) + if int(cy_str) != int(sy_str) - 1: + return False + return any("STALE_BOTTOM_MARKER" in line for line in mcp_pane.capture_pane()) + + retry_until(_bottom_row_ready, 5, raises=True) + + result = asyncio.run( + wait_for_text( + pattern="STALE_BOTTOM_MARKER", + pane_id=mcp_pane.pane_id, + timeout=0.5, + socket_name=mcp_server.socket_name, + ) + ) + assert result.found is False + assert result.timed_out is True + + def test_wait_for_text_invalid_regex(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text raises ToolError on invalid regex when regex=True.""" import asyncio From ad1f61a31469ad4b171cd21d3fae5765c8d78222 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 20:11:19 -0500 Subject: [PATCH 05/31] Pane(fix[wait_for_text]): include baseline read inside the timeout budget why: start_time was captured after the initial _read_grid_position and _read_pane_height calls, so a stalled tmux server could block the tool for an arbitrary period before the user-supplied timeout deadline even started. libtmux's tmux_cmd uses Popen.communicate() without a subprocess timeout, so the worst case is real, not theoretical: a server-side stall on the first display-message leaks past the wall-clock budget the caller asked for. what: - Move start_time and deadline to the start of the resolved-pane block, before the baseline and pane-height reads. Same total work, no new branches; only the budget accounting changes. --- src/libtmux_mcp/tools/pane_tools/wait.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index f6820bce..aa26dec8 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -265,6 +265,13 @@ async def wait_for_text( assert pane.pane_id is not None + # Anchor ``start_time`` before the baseline read so a stalled + # tmux server cannot blow the user-supplied ``timeout`` budget + # — libtmux's ``tmux_cmd`` uses ``Popen.communicate()`` with no + # subprocess timeout, so the read can block arbitrarily long. + start_time = time.monotonic() + deadline = start_time + timeout + # Snapshot the pane's absolute grid position before polling. ``hs0 + # cy0`` is invariant under subsequent scrolling — tmux's ``-S`` is # relative to the live ``hsize`` at capture time @@ -277,8 +284,6 @@ async def wait_for_text( pane_height = await asyncio.to_thread(_read_pane_height, pane) matched_lines: list[str] = [] - start_time = time.monotonic() - deadline = start_time + timeout found = False try: From eab8ca8307f60b7c841ce0392906752a2af83f79 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 20:14:39 -0500 Subject: [PATCH 06/31] Pane(fix[wait_for_text]): reject footgun inputs at the door MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Three parameter values turn the wait loop into a footgun rather than a wait primitive: - ``pattern=""`` — ``re.compile('')`` matches the zero-width position of every line, so the first poll returns ``found=True`` against whatever was already in the pane. - ``interval=0`` — ``asyncio.sleep(0)`` yields the event loop without idling, so the loop fires tmux subprocesses as fast as the scheduler hands them out (a self-inflicted server- side DoS). - ``timeout<=0`` — the loop body runs one probe before the deadline check, so a non-positive timeout silently turns "wait" into "synchronous probe" without saying so. Each is rejected at function entry so the failure surfaces as an explicit ``ToolError`` instead of corrupting the contract. what: - Inline parameter checks before the regex compile in wait_for_text; raise ToolError with the offending value in the message. - tests/test_pane_tools.py: one test per rejection, asserting the matching ToolError message. --- src/libtmux_mcp/tools/pane_tools/wait.py | 10 ++++ tests/test_pane_tools.py | 66 ++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index aa26dec8..fb0f356f 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -245,6 +245,16 @@ async def wait_for_text( calls. If you need to rate-limit wait tools, do it at the transport layer or with dedicated middleware. """ + if not pattern: + msg = "pattern must be a non-empty string" + raise ToolError(msg) + if interval < 0.01: + msg = f"interval must be at least 0.01 s (received {interval})" + raise ToolError(msg) + if timeout <= 0: + msg = f"timeout must be positive (received {timeout})" + raise ToolError(msg) + search_pattern = pattern if regex else re.escape(pattern) flags = 0 if match_case else re.IGNORECASE try: diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index b2ff188d..f1d376a9 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1355,6 +1355,72 @@ def test_wait_for_text_invalid_regex(mcp_server: Server, mcp_pane: Pane) -> None ) +def test_wait_for_text_rejects_empty_pattern( + mcp_server: Server, mcp_pane: Pane +) -> None: + """An empty pattern matches every line and returns found=True instantly. + + ``re.compile('')`` succeeds and ``re.search`` reports a zero-width + match on every string, so the first poll would return + ``found=True`` against whatever was in the pane. Reject explicitly. + """ + import asyncio + + with pytest.raises(ToolError, match="pattern must be a non-empty string"): + asyncio.run( + wait_for_text( + pattern="", + pane_id=mcp_pane.pane_id, + socket_name=mcp_server.socket_name, + ) + ) + + +def test_wait_for_text_rejects_tiny_interval( + mcp_server: Server, mcp_pane: Pane +) -> None: + """A sub-10ms interval lets the poll loop saturate the tmux server. + + ``asyncio.sleep(0)`` yields but does not idle, so an unguarded + ``interval=0`` fires tmux subprocesses as fast as the scheduler + hands them out — a self-inflicted server-side DoS. + """ + import asyncio + + with pytest.raises(ToolError, match=r"interval must be at least 0\.01"): + asyncio.run( + wait_for_text( + pattern="anything", + pane_id=mcp_pane.pane_id, + interval=0, + socket_name=mcp_server.socket_name, + ) + ) + + +def test_wait_for_text_rejects_non_positive_timeout( + mcp_server: Server, mcp_pane: Pane +) -> None: + """A non-positive timeout is ambiguous; reject rather than guess. + + The loop body runs one probe before the deadline check, so + ``timeout=0`` would complete a single synchronous capture in a + "wait" tool — surprising. Reject explicitly so callers pick a + meaningful budget. + """ + import asyncio + + with pytest.raises(ToolError, match="timeout must be positive"): + asyncio.run( + wait_for_text( + pattern="anything", + pane_id=mcp_pane.pane_id, + timeout=0, + socket_name=mcp_server.socket_name, + ) + ) + + def test_wait_for_text_reports_progress(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text calls ``ctx.report_progress`` at each poll tick. From ed7b40773738b607cf6a78c8051fdeb6c0403367 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 20:20:03 -0500 Subject: [PATCH 07/31] Pane(fix[wait_for_text]): surface respawn and pane-death as ToolError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The baseline-anchor mechanism assumes the pane's process and grid stay coherent for the duration of the wait. Two events break that assumption silently: - respawn-pane mid-wait. screen_reinit (screen.c) resets cy to 0 but preserves hsize, so the absolute anchor keeps pointing at the old process's grid — the new process's output ends up at rows the capture skips, and the wait times out as if nothing arrived. - pane death. With remain-on-exit set, tmux keeps the pane alive after its child exits and #{pane_dead} flips to 1. Without detection, the loop just keeps polling a stale frozen grid until the deadline. Both cases now surface as explicit ToolErrors so callers can react instead of timing out in the dark. what: - Replace _read_grid_position, _read_history_size, and _read_pane_height with a single _PaneState NamedTuple read via one display-message round-trip. Per-tick subprocess count drops from three reads (entry + per-tick history + per-tick we'd need height) to two (state + capture) without losing any fields. - Capture pane_pid at entry; each tick checks pane_dead first and then verifies pane_pid is unchanged. Either condition raises ToolError naming the affected pane and the transition. - tests/test_pane_tools.py: regression for respawn (asyncio.gather pattern: start wait_task, respawn after 0.1s, expect ToolError) and for pane death (set remain-on-exit, respawn into ``true``, poll #{pane_dead}, expect ToolError). --- src/libtmux_mcp/tools/pane_tools/wait.py | 100 +++++++++++++---------- tests/test_pane_tools.py | 66 +++++++++++++++ 2 files changed, 122 insertions(+), 44 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index fb0f356f..0cadd69a 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -99,44 +99,45 @@ async def _maybe_log( return -def _read_grid_position(pane: Pane) -> int: - """Return ``history_size + cursor_y`` for ``pane``. - - The result is an absolute grid index that remains stable as new - content scrolls the cursor row into history. Used by - ``wait_for_text`` to anchor a baseline before polling so stale - scrollback no longer matches; see issue #45. - - Uses ``:`` as the format separator because both values are - integers and ``:`` cannot appear in their stringification. +class _PaneState(t.NamedTuple): + """Per-tick snapshot of pane state used by :func:`wait_for_text`. + + Read in one ``display-message`` round-trip so the loop costs two + subprocesses per tick (state + capture) instead of growing + linearly with each new field. ``|`` is the field separator — + history/cursor/height are integers, ``pane_pid`` is a numeric PID + string, and ``pane_dead`` is the literal ``"0"``/``"1"`` flag. """ - stdout = pane.display_message("#{history_size}:#{cursor_y}", get_text=True) - raw = stdout[0] if stdout else "0:0" - hs_str, cy_str = raw.split(":", 1) - return int(hs_str) + int(cy_str) - - -def _read_history_size(pane: Pane) -> int: - """Return the current ``#{history_size}`` for ``pane``. - Read on every poll tick by ``wait_for_text`` because tmux's - ``-S`` is relative to the live ``hsize``, which grows as lines - scroll out of the visible region. - """ - stdout = pane.display_message("#{history_size}", get_text=True) - return int(stdout[0]) if stdout else 0 + history_size: int + cursor_y: int + pane_height: int + pane_pid: str + pane_dead: bool -def _read_pane_height(pane: Pane) -> int: - """Return ``#{pane_height}`` (the visible-region row count, ``sy``). +def _read_pane_state(pane: Pane) -> _PaneState: + """Return a :class:`_PaneState` snapshot for ``pane``. - Read once at entry by ``wait_for_text`` to gate the bottom-row - capture clip: when the computed ``start_line`` would land below - the visible region, ``cmd-capture-pane`` clips back to the bottom - row and returns stale text. The guard short-circuits that path. + Combines the per-tick reads ``wait_for_text`` needs into a single + ``display-message`` call. ``history_size + cursor_y`` gives the + absolute grid anchor at entry; ``pane_height`` gates the bottom- + row capture clip; ``pane_pid`` and ``pane_dead`` surface + respawn-pane and pane-death events that invalidate the baseline. """ - stdout = pane.display_message("#{pane_height}", get_text=True) - return int(stdout[0]) if stdout else 0 + stdout = pane.display_message( + "#{history_size}|#{cursor_y}|#{pane_height}|#{pane_pid}|#{pane_dead}", + get_text=True, + ) + raw = stdout[0] if stdout else "0|0|0||0" + hs, cy, sy, pid, dead = raw.split("|", 4) + return _PaneState( + history_size=int(hs), + cursor_y=int(cy), + pane_height=int(sy), + pane_pid=pid, + pane_dead=dead == "1", + ) @handle_tool_errors_async @@ -282,16 +283,17 @@ async def wait_for_text( start_time = time.monotonic() deadline = start_time + timeout - # Snapshot the pane's absolute grid position before polling. ``hs0 + - # cy0`` is invariant under subsequent scrolling — tmux's ``-S`` is - # relative to the live ``hsize`` at capture time - # (cmd-capture-pane.c: ``top = gd->hsize + n``), so re-reading - # ``hsize`` each tick and computing ``baseline_abs - hsize_now + 1`` - # always points at "the first line written after entry". Mirrors - # the snapshot-before-loop pattern in ``wait_for_content_change`` - # below; see issue #45. - baseline_abs = await asyncio.to_thread(_read_grid_position, pane) - pane_height = await asyncio.to_thread(_read_pane_height, pane) + # Snapshot the pane state before polling. ``hs0 + cy0`` is the + # absolute grid anchor — invariant under subsequent scrolling + # because tmux's ``-S`` is relative to the live ``hsize`` at + # capture time (cmd-capture-pane.c: ``top = gd->hsize + n``). + # ``pane_pid`` lets us detect a respawn-pane mid-wait that would + # otherwise leave the absolute anchor pointing at the old + # process's output. See issue #45. + entry = await asyncio.to_thread(_read_pane_state, pane) + baseline_abs = entry.history_size + entry.cursor_y + pane_height = entry.pane_height + baseline_pid = entry.pane_pid matched_lines: list[str] = [] found = False @@ -310,10 +312,20 @@ async def wait_for_text( # the libtmux display-message + capture_pane calls are both # blocking subprocess.run. Push to the default executor so # concurrent tool calls are not starved during long waits. - hs_now = await asyncio.to_thread(_read_history_size, pane) + state = await asyncio.to_thread(_read_pane_state, pane) + if state.pane_dead: + msg = f"pane {pane.pane_id} died during wait" + raise ToolError(msg) + if state.pane_pid != baseline_pid: + msg = ( + f"pane {pane.pane_id} was respawned during wait " + f"(pid {baseline_pid} -> {state.pane_pid}); " + "baseline anchor no longer valid" + ) + raise ToolError(msg) # ``+ 1`` skips the baseline line itself so we don't # re-match the row the cursor sat on at entry. - start_line = baseline_abs - hs_now + 1 + start_line = baseline_abs - state.history_size + 1 # ``capture-pane -S`` clips a below-visible start back to # the bottom row (cmd-capture-pane.c, post-tmux-3.0), so a # naive capture would return stale bottom-row text whenever diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index f1d376a9..24da9675 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1398,6 +1398,72 @@ def test_wait_for_text_rejects_tiny_interval( ) +def test_wait_for_text_raises_on_pane_respawn( + mcp_server: Server, mcp_pane: Pane +) -> None: + """Respawning the pane mid-wait invalidates the baseline anchor. + + The baseline absolute index is computed against the original + pane process's grid. ``respawn-pane`` clears the visible region + but preserves ``hsize`` (``screen_reinit``), so the math keeps + pointing at the *old* process's content — silently miscapturing. + ``wait_for_text`` detects the ``pane_pid`` change and surfaces + it as a ToolError instead. + """ + import asyncio + + async def respawn_after_delay() -> None: + # Let wait_for_text capture its baseline first, then swap + # the pane process so pane_pid changes. + await asyncio.sleep(0.1) + await asyncio.to_thread(mcp_pane.respawn, kill=True, shell="sleep 30") + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="NEVER_APPEARS_xyz", + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await respawn_after_delay() + return await wait_task + + with pytest.raises(ToolError, match="respawned during wait"): + asyncio.run(run()) + + +def test_wait_for_text_raises_on_pane_death(mcp_server: Server, mcp_pane: Pane) -> None: + """A pane whose process has exited surfaces as a ToolError. + + With ``remain-on-exit`` set, tmux keeps the pane alive after its + child exits and reports ``#{pane_dead}=1``. The wait loop checks + that flag every tick and bails with a ToolError instead of + polling stale content until timeout. + """ + import asyncio + + mcp_pane.window.set_option("remain-on-exit", "on") + mcp_pane.respawn(kill=True, shell="true") + + def _is_dead() -> bool: + flag = mcp_pane.display_message("#{pane_dead}", get_text=True) + return bool(flag) and flag[0] == "1" + + retry_until(_is_dead, 3, raises=True) + + with pytest.raises(ToolError, match="died during wait"): + asyncio.run( + wait_for_text( + pattern="anything", + pane_id=mcp_pane.pane_id, + timeout=1.0, + socket_name=mcp_server.socket_name, + ) + ) + + def test_wait_for_text_rejects_non_positive_timeout( mcp_server: Server, mcp_pane: Pane ) -> None: From 859ecb6eef85d7bfd08b45c44f6aee6c8e9e2a6b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Thu, 14 May 2026 20:21:59 -0500 Subject: [PATCH 08/31] Pane(docs[wait_for_text]): fix reverse-index claim and surface the same-row blind spot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Two parts of the wait_for_text docstring oversold the baseline-anchor model: - The reverse-index note claimed ``\eM`` "rewrites history below the baseline." It does not. screen_write_reverseindex (screen-write.c) scrolls the visible region within [rupper, rlower] or decrements cy. ``hsize`` is never touched. A maintainer reading the tmux source to verify a related claim would lose trust in the docstring. - The "What 'new' means" block sells the tool as observing NEW text without saying that in-place updates to the entry cursor row (carriage-return rewrites, progress spinners, single-line status updates) are excluded by design. Users wiring this against TUIs that update in place see a silent timeout. what: - Rewrite the reverse-index note around the real hazard — in-place repaints below the baseline — and cite screen-write.c for the visible-region scroll behaviour rather than asserting history is rewritten. - Add a paragraph under "What 'new' means" that explicitly disclaims same-row rewrites and points readers at wait_for_content_change for the in-place case. --- src/libtmux_mcp/tools/pane_tools/wait.py | 26 ++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 0cadd69a..ebb0f7ef 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -168,6 +168,12 @@ async def wait_for_text( the pattern in the pane right now?" check, call {tooliconl}`search-panes` instead. + In-place updates to the entry cursor's row — carriage-return + rewrites, progress spinners, single-line status updates — are + not observed; only rows below the entry cursor count as "new." + Use {tooliconl}`wait-for-content-change` or pair the command + with a sentinel for those cases. + **Adversarial-safety pattern.** If you cannot trust that the pattern only appears after your action — for example because the pane prints recurring prompts, log lines, or output from background @@ -224,12 +230,20 @@ async def wait_for_text( worst case degrades to pre-baseline behaviour on the surviving portion of history rather than an infinite false-match loop. - **Reverse-index sequences (``\\eM``).** Programs that rewrite - history below the baseline can theoretically re-introduce stale - text into the captured range. This is rare on the main screen - because pagers (``less``, ``more``) and other heavy users run on - the alternate screen, which has a fresh grid and does not - interact with the baseline. + **In-place rewrites below the baseline.** Programs that paint + over rows the tool will capture — cursor-position escape + sequences, full-screen progress displays, anything that rewrites + rows it already wrote — can re-introduce text the caller saw + earlier. Each tick captures the current contents of rows below + the baseline; tmux's grid model cannot distinguish "fresh write" + from "repaint with the same characters." + ``screen_write_reverseindex`` (``screen-write.c``) only scrolls + the visible region within ``[rupper, rlower]`` and never touches + ``hsize``, so ``\\eM`` itself does not invalidate the anchor — + but the surrounding TUI render loop may. Full-screen TUIs + typically run on the alternate screen (a separate grid that + this tool does not traverse), so the main-screen pattern is + rare in practice. **``clear`` / ``reset``.** With the default ``scroll-on-clear`` option, cleared content scrolls into history (``screen-write.c`` From 2a0bef8e020a4d3dacf001f722f078d740483121 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Fri, 15 May 2026 17:39:00 -0500 Subject: [PATCH 09/31] docs(CHANGES[wait_for_text]): note the baseline-anchor fix in AGENTS.md format why: The prior CHANGES entries on this branch carried commit-message shape (why: / what: bodies, mechanism-heavy bullets) into a changelog that AGENTS.md specifies as user-vocabulary prose under Bold subheadings. Three CHANGES commits were dropped from the branch via rebase; this single commit replaces them with prose entries in the project's actual changelog format. what: - Breaking changes: parameter drop with before/after migration block. - Fixes: one prose paragraph covering the baseline-anchor behaviour change (stale-scrollback fix, bottom-row clip handling, death/respawn surfacing) under a single user-vocabulary subheading. - Fixes: one prose paragraph covering input validation. - Section ordering: Breaking changes / Dependencies / Fixes, per AGENTS.md line 420. --- CHANGES | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CHANGES b/CHANGES index 19b0e198..fe39fd6a 100644 --- a/CHANGES +++ b/CHANGES @@ -6,10 +6,34 @@ _Notes on upcoming releases will be added here_ +### Breaking changes + +**{tooliconl}`wait-for-text` drops `content_start` / `content_end`** + +The baseline anchor introduced in this release follows the pane's grid position automatically, so the previous manual capture-range parameters have no remaining purpose. Agents that named them should drop them from their call sites. (#45) + +```python +# Before +wait_for_text(pattern="OK", content_start=-100) + +# After +wait_for_text(pattern="OK") +``` + ### Dependencies **Minimum `libtmux>=0.56.0`** (was `>=0.55.1`). Unlocks the new tmux-command wrappers shipped in libtmux 0.56.0 — {meth}`~libtmux.Pane.respawn`, {meth}`~libtmux.Pane.copy_mode`, {meth}`~libtmux.Pane.pipe`, {meth}`~libtmux.Pane.swap`, {meth}`~libtmux.Pane.paste_buffer`, {meth}`~libtmux.Pane.clear_history`, {meth}`~libtmux.Pane.display_message`, {meth}`~libtmux.Server.delete_buffer`, and the {meth}`~libtmux.Session.next_window` / {meth}`~libtmux.Session.previous_window` / {meth}`~libtmux.Session.last_window` trio — so the MCP no longer falls back to raw `cmd()` calls for tmux commands the upstream API now covers. (#46) +### Fixes + +**{tooliconl}`wait-for-text` waits for new output, not stale scrollback** + +{tooliconl}`wait-for-text` now anchors on the pane's grid position at entry and only matches lines written after the call begins; the previous behaviour returned `found=True` on the first poll whenever the pattern already lived in the pane. Panes that die or are respawned mid-wait surface a `ToolError`. For the synchronous "is the pattern in the pane right now?" case, call {tooliconl}`search-panes`. (#45) + +**{tooliconl}`wait-for-text` rejects misused `pattern` / `interval` / `timeout`** + +Empty `pattern`, `interval` below 10 ms, and non-positive `timeout` each now raise `ToolError` at entry instead of silently matching every line, spinning the tmux server in a tight loop, or completing a surprise single probe. (#45) + ## libtmux-mcp 0.1.0a6 (2026-05-09) libtmux-mcp 0.1.0a6 is the activation and registration cleanup release. It makes the server much easier for MCP clients to discover from ordinary "pane", "window", and "session" prompts, standardizes new setup docs around the `tmux` registration slug, and adds migration guidance for existing `libtmux` registrations. Existing installs keep working; the release changes defaults and documentation so new installs line up with the tool prefix users actually see. From bc16a276379bfd2f588bb2db5e0947f7b66dc091 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Fri, 15 May 2026 17:40:49 -0500 Subject: [PATCH 10/31] Pane(docs[wait_for_text]): correct asyncio.gather references that don't match the implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Two comments in tests/test_pane_tools.py — the WaitForTextFixture docstring and the docstring on test_wait_for_text_matches_new_output_after_baseline — describe the positive-path coordination as using asyncio.gather. The implementation actually uses asyncio.create_task + a sequenced await on the emit coroutine, because gather would schedule both coroutines concurrently and lose the start-baseline-then-emit ordering the test relies on. A future maintainer following the comments would either rewrite the test incorrectly or doubt the existing shape. what: - WaitForTextFixture pre_command docstring: replace the asyncio.gather mention with asyncio.create_task plus a sequenced await. - test_wait_for_text_matches_new_output_after_baseline docstring: describe the actual create_task + sequenced-await mechanism and note explicitly that asyncio.gather is unsuitable here because it loses the ordering guarantee. --- tests/test_pane_tools.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 24da9675..bf27c60a 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1134,10 +1134,10 @@ class WaitForTextFixture(t.NamedTuple): #: that stale scrollback no longer matches (#45). The positive #: "text appears after baseline" case lives in #: ``test_wait_for_text_matches_new_output_after_baseline`` rather - #: than this fixture because it needs ``asyncio.gather`` to - #: coordinate emission against the running poll loop — synchronous - #: setup races the shell's enter-processing on CI and shifts the - #: baseline past single-line output. + #: than this fixture because it needs ``asyncio.create_task`` plus + #: a sequenced ``await`` to coordinate emission against the running + #: poll loop — synchronous setup races the shell's enter-processing + #: on CI and shifts the baseline past single-line output. pre_command: str | None pattern: str timeout: float @@ -1247,15 +1247,16 @@ def test_wait_for_text_matches_new_output_after_baseline( ) -> None: """wait_for_text finds output written AFTER its baseline snapshot. - Coordinates the marker emission against the running poll loop via - :func:`asyncio.gather` so ``send_keys`` is guaranteed to fire - *after* :func:`wait_for_text` has captured its baseline. Without - that coordination the test races the shell's enter-processing — - if the shell advances the cursor before the baseline read on CI, - ``start_line`` shifts past the single-line marker and the poll - loop misses it (the failure mode that took the original - synchronous ``send_keys`` + ``asyncio.run`` shape to all six tmux - matrix slots on PR #47 commit aa8de89). + Coordinates the marker emission against the running poll loop by + starting :func:`wait_for_text` via :func:`asyncio.create_task`, + then ``await``-ing the emit coroutine, then ``await``-ing the + wait task. Sequencing matters: the explicit start-then-emit + ordering guarantees ``send_keys`` fires *after* the baseline + read; :func:`asyncio.gather` would schedule both concurrently + and lose that guarantee. Without coordination the test races + the shell's enter-processing — if the shell advances the cursor + before the baseline read on CI, ``start_line`` shifts past the + single-line marker and the poll loop misses it. """ import asyncio From 7ec75bf79f15b2fd464574f1d95ad476b82d22a8 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Fri, 15 May 2026 17:42:30 -0500 Subject: [PATCH 11/31] Pane(test[wait_for_text]): bump _stale_settled budget to match project precedent why: The _stale_settled helper inside the wait_for_text parametrized test waited 2 seconds for (history_size, cursor_y) to stabilise across two consecutive polls. Every other settle-loop in this file (_pane_dead, _ready, _bottom_row_ready) uses a 5-second budget for similar tmux-state stability under shell activity. The 2-second outlier risked flaking on CI nodes whose zsh async-prompt hooks keep firing for longer than the budget, missing the two-consecutive-identical-reads window that signals settlement. what: - retry_until(_stale_settled, 2, raises=True) becomes retry_until(_stale_settled, 5, raises=True). Happy-path wall-clock is unaffected (the predicate returns True as soon as state stabilises, usually within milliseconds); only the failure-mode budget widens. --- tests/test_pane_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index bf27c60a..cdcd8028 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1222,7 +1222,7 @@ def _stale_settled() -> bool: last_state = state return settled - retry_until(_stale_settled, 2, raises=True) + retry_until(_stale_settled, 5, raises=True) result = asyncio.run( wait_for_text( From c378e16cbbf0570ea09c43bbcd3ecbfd15cd68d2 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Fri, 15 May 2026 18:35:29 -0500 Subject: [PATCH 12/31] Pane(fix[wait_for_text]): raise on history rollover so the baseline anchor stays sound MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a rollover guard inside the poll loop: when ``state.history_size < entry.history_size`` (grid_collect_history fired, clear-history ran, or the session's history-limit option was shrunk mid-wait), the absolute baseline ``hs0 + cy0`` is no longer recoverable. There is no server-side way to disambiguate "trimmed" from "still anchored", so surface the lost anchor as ToolError with a steer toward wait_for_channel for deterministic synchronization. Also fixes the stale ``pane_height`` reference in the bottom-row clip guard — it was captured once at entry and never refreshed, so a pane resize mid-wait left the guard keyed to a stale height. Compare against ``state.pane_height`` re-read each tick so the guard tracks the live visible region. Refs tmux grid.c grid_collect_history; verified against tmux master HEAD. --- CHANGES | 4 + src/libtmux_mcp/tools/pane_tools/wait.py | 46 +++++--- tests/test_pane_tools.py | 132 +++++++++++++++++++++++ 3 files changed, 170 insertions(+), 12 deletions(-) diff --git a/CHANGES b/CHANGES index fe39fd6a..5cb81184 100644 --- a/CHANGES +++ b/CHANGES @@ -20,6 +20,10 @@ wait_for_text(pattern="OK", content_start=-100) wait_for_text(pattern="OK") ``` +**{tooliconl}`wait-for-text` raises on history rollover** + +When the pane's `history-limit` causes the baseline grid row to be trimmed mid-wait (or `clear-history` runs, or `history-limit` is shrunk while the wait is running), {tooliconl}`wait-for-text` now raises `ToolError` instead of silently returning stale matches or missing new output. The absolute baseline anchor relies on tmux's `history_size` being monotonically non-decreasing; once `grid_collect_history` fires there is no server-side way to recover the original anchor. For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and use {tooliconl}`wait-for-channel`. (#45 follow-up) + ### Dependencies **Minimum `libtmux>=0.56.0`** (was `>=0.55.1`). Unlocks the new tmux-command wrappers shipped in libtmux 0.56.0 — {meth}`~libtmux.Pane.respawn`, {meth}`~libtmux.Pane.copy_mode`, {meth}`~libtmux.Pane.pipe`, {meth}`~libtmux.Pane.swap`, {meth}`~libtmux.Pane.paste_buffer`, {meth}`~libtmux.Pane.clear_history`, {meth}`~libtmux.Pane.display_message`, {meth}`~libtmux.Server.delete_buffer`, and the {meth}`~libtmux.Session.next_window` / {meth}`~libtmux.Session.previous_window` / {meth}`~libtmux.Session.last_window` trio — so the MCP no longer falls back to raw `cmd()` calls for tmux commands the upstream API now covers. (#46) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index ebb0f7ef..3421592e 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -224,11 +224,15 @@ async def wait_for_text( Notes ----- - **Scrollback truncation.** If ``history-limit`` is small and the - baseline line rolls out of history during the wait, tmux clips - ``-S`` to the oldest available line (``cmd-capture-pane.c``); the - worst case degrades to pre-baseline behaviour on the surviving - portion of history rather than an infinite false-match loop. + **Scrollback rollover raises.** When ``history-limit`` is reached + mid-wait, tmux's ``grid_collect_history`` (``grid.c``) frees the + oldest scrollback rows and decrements ``hsize``, invalidating the + absolute baseline. The same hsize-decrement fires on + ``clear-history`` and on shrinking ``history-limit`` mid-wait. + The tool raises ``ToolError`` ("history rolled over during wait") + rather than silently false-matching or silently missing output; + the caller can re-arm ``wait_for_text`` or switch to + ``wait_for_channel`` for deterministic synchronization. **In-place rewrites below the baseline.** Programs that paint over rows the tool will capture — cursor-position escape @@ -306,7 +310,6 @@ async def wait_for_text( # process's output. See issue #45. entry = await asyncio.to_thread(_read_pane_state, pane) baseline_abs = entry.history_size + entry.cursor_y - pane_height = entry.pane_height baseline_pid = entry.pane_pid matched_lines: list[str] = [] @@ -337,15 +340,34 @@ async def wait_for_text( "baseline anchor no longer valid" ) raise ToolError(msg) + # When tmux's ``history-limit`` is reached, ``grid_collect_history`` + # (grid.c) frees the oldest scrollback rows and decrements + # ``gd->hsize``, so absolute index math anchored on + # ``history_size + cursor_y`` is no longer recoverable. The same + # hsize-decrement also fires on ``clear-history`` and on shrinking + # the ``history-limit`` option mid-wait. There is no server-side way + # to disambiguate "trimmed" from "still anchored", so surface the + # lost anchor as ``ToolError`` instead of silently false-matching + # or silently missing output. + if state.history_size < entry.history_size: + msg = ( + f"pane {pane.pane_id} history rolled over during wait " + f"(history_size {entry.history_size} -> " + f"{state.history_size}); baseline anchor lost — " + "re-arm wait_for_text or use wait_for_channel for " + "deterministic synchronization" + ) + raise ToolError(msg) # ``+ 1`` skips the baseline line itself so we don't # re-match the row the cursor sat on at entry. start_line = baseline_abs - state.history_size + 1 - # ``capture-pane -S`` clips a below-visible start back to - # the bottom row (cmd-capture-pane.c, post-tmux-3.0), so a - # naive capture would return stale bottom-row text whenever - # no new rows have appeared below the cursor yet. Skip the - # capture entirely on those ticks. - if start_line >= pane_height: + # ``capture-pane -S`` clips a below-visible start back to the + # bottom row (cmd-capture-pane.c, post-tmux-3.0), so a naive + # capture would return stale bottom-row text whenever no new rows + # have appeared below the cursor yet. Compare against + # ``state.pane_height`` (re-read each tick) so a resize mid-wait + # doesn't leave the guard keyed to a stale height. + if start_line >= state.pane_height: lines: list[str] = [] else: lines = await asyncio.to_thread( diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index cdcd8028..0d14c855 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1488,6 +1488,138 @@ def test_wait_for_text_rejects_non_positive_timeout( ) +def test_wait_for_text_raises_on_history_rollover( + mcp_server: Server, mcp_pane: Pane +) -> None: + """A history trim mid-wait surfaces as ToolError, not silent miss. + + The rollover guard fires when ``state.history_size < + entry.history_size``. To force that we (1) pre-fill the pane with + scrollback so ``entry.history_size`` is large at wait entry, then + (2) run ``clear-history`` mid-wait — tmux's ``grid_clear_history`` + (``grid.c``) sets ``gd->hsize = 0`` synchronously, dropping hsize + below the baseline. The wait then detects the drop and raises + instead of silently false-matching or missing output. + + ``clear-history`` is chosen (rather than shrinking ``history-limit`` + retroactively) because the retroactive-trim path landed in tmux + master commit 195a9cf and is not in tmux 3.6a or earlier releases. + ``clear-history`` works on all supported tmux versions. + """ + import asyncio + + mcp_pane.send_keys("for i in $(seq 1 100); do echo prefill$i; done", enter=True) + + def _prefilled() -> bool: + hs = mcp_pane.display_message("#{history_size}", get_text=True) + return bool(hs) and int(hs[0]) >= 50 + + retry_until(_prefilled, 5, raises=True) + + async def clear_after_delay() -> None: + # Let wait_for_text snapshot the baseline first, then drop + # hsize to 0 with clear-history. + await asyncio.sleep(0.1) + await asyncio.to_thread(mcp_pane.cmd, "clear-history") + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="NEVER_APPEARS_rollover", + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await clear_after_delay() + return await wait_task + + with pytest.raises(ToolError, match="history rolled over"): + asyncio.run(run()) + + +def test_wait_for_text_succeeds_when_history_grows_normally( + mcp_server: Server, mcp_pane: Pane +) -> None: + """Monotonic history growth without trim does NOT trip the rollover guard. + + The guard fires only when ``state.history_size < entry.history_size``. + Many lines scrolling into a generous ``history-limit`` keep hsize + monotonically increasing, so a long-output command followed by a + sentinel marker must still match cleanly. + """ + import asyncio + + async def emit_after_baseline() -> None: + await asyncio.sleep(0.1) + cmd = "for i in $(seq 1 50); do echo line$i; done; echo WAIT_MARKER_grows_ok" + await asyncio.to_thread(mcp_pane.send_keys, cmd, True) + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="WAIT_MARKER_grows_ok", + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await emit_after_baseline() + return await wait_task + + result = asyncio.run(run()) + assert result.found is True + assert result.timed_out is False + + +def test_wait_for_text_handles_resize_during_wait( + mcp_server: Server, mcp_pane: Pane +) -> None: + """Mid-wait resize keys the bottom-row clip to the LIVE pane height. + + Without the ``state.pane_height`` fix, the bottom-row clip guard + stays keyed to the entry-time pane height. Shrinking the pane + mid-wait would then leave the guard too lax — the capture would + fire past the new bottom and tmux's ``-S`` clip would return stale + bottom-row text. The fix re-reads ``pane_height`` each tick so the + guard matches the current visible region. + """ + import asyncio + + # Park a stale marker on the last visible row and freeze output. + # Same parking shape as test_wait_for_text_does_not_match_bottom_row_clip. + fill_and_park = ( + "for i in $(seq 1 30); do echo filler; done; " + "printf STALE_RESIZE_MARKER; sleep 60" + ) + mcp_pane.respawn(kill=True, shell=f"sh -c '{fill_and_park}'") + + def _ready() -> bool: + return any("STALE_RESIZE_MARKER" in line for line in mcp_pane.capture_pane()) + + retry_until(_ready, 5, raises=True) + + async def resize_after_delay() -> None: + await asyncio.sleep(0.1) + await asyncio.to_thread(mcp_pane.cmd, "resize-pane", "-y", "5") + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="STALE_RESIZE_MARKER", + pane_id=mcp_pane.pane_id, + timeout=0.5, + socket_name=mcp_server.socket_name, + ) + ) + await resize_after_delay() + return await wait_task + + result = asyncio.run(run()) + assert result.found is False + assert result.timed_out is True + + def test_wait_for_text_reports_progress(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text calls ``ctx.report_progress`` at each poll tick. From 67eed15945f9472077d47d1286f60032ad3ddb78 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Fri, 15 May 2026 18:38:43 -0500 Subject: [PATCH 13/31] Pane(fix[wait_for_text]): join wrapped lines so long-line patterns match Passes ``join_wrapped=True`` (tmux ``-J``) to ``Pane.capture_pane`` so a pattern that crosses the pane's visual wrap is matched against the joined logical line. Without this, long error strings like "Build failed: module not found" got split across two rows by tmux and a naive ``re.search`` against each row in isolation never matched. The returned ``matched_lines`` entry for a wrap-crossing hit is the joined line and can therefore be longer than ``pane_width``; documented in the Notes section so agents that bound output understand the implication. --- CHANGES | 4 ++ src/libtmux_mcp/tools/pane_tools/wait.py | 18 ++++++++- tests/test_pane_tools.py | 50 ++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 5cb81184..222bb7a9 100644 --- a/CHANGES +++ b/CHANGES @@ -30,6 +30,10 @@ When the pane's `history-limit` causes the baseline grid row to be trimmed mid-w ### Fixes +**{tooliconl}`wait-for-text` matches patterns across visually-wrapped lines** + +The poll-loop capture now passes tmux's `-J` flag (`join_wrapped=True` on `Pane.capture_pane`), so a pattern that crosses the pane's visual wrap is still matched against the joined logical line. Long error strings like `"Build failed: module not found"` that tmux split across two rows previously slipped through `re.search`. The joined logical line is returned in `matched_lines` and may exceed `pane_width`. (#45 follow-up) + **{tooliconl}`wait-for-text` waits for new output, not stale scrollback** {tooliconl}`wait-for-text` now anchors on the pane's grid position at entry and only matches lines written after the call begins; the previous behaviour returned `found=True` on the first poll whenever the pattern already lived in the pane. Panes that die or are respawned mid-wait surface a `ToolError`. For the synchronous "is the pattern in the pane right now?" case, call {tooliconl}`search-panes`. (#45) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 3421592e..c102edf5 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -234,6 +234,12 @@ async def wait_for_text( the caller can re-arm ``wait_for_text`` or switch to ``wait_for_channel`` for deterministic synchronization. + **Wrapped lines are joined for matching.** Captures pass tmux's + ``-J`` flag so a pattern that spans the pane's visual wrap is + still matched against the joined logical line. The returned + ``matched_lines`` entry for such a hit is the joined line and + can therefore be longer than ``pane_width``. + **In-place rewrites below the baseline.** Programs that paint over rows the tool will capture — cursor-position escape sequences, full-screen progress displays, anything that rewrites @@ -370,8 +376,18 @@ async def wait_for_text( if start_line >= state.pane_height: lines: list[str] = [] else: + # ``join_wrapped=True`` adds tmux's ``-J`` so visually + # wrapped lines are returned as one logical line. Without + # this, a pattern that spans tmux's wrap column is split + # across two rows and ``re.search`` against each row in + # isolation never matches. Trade-off: the returned + # ``matched_lines`` can contain a single string longer + # than ``pane_width``. lines = await asyncio.to_thread( - pane.capture_pane, start=start_line, end=None + pane.capture_pane, + start=start_line, + end=None, + join_wrapped=True, ) hits = [line for line in lines if compiled.search(line)] if hits: diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 0d14c855..43de91ad 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1620,6 +1620,56 @@ async def run() -> WaitForTextResult: assert result.timed_out is True +def test_wait_for_text_matches_pattern_across_wrap( + mcp_server: Server, mcp_pane: Pane +) -> None: + """A pattern that spans tmux's visual wrap matches via ``-J``. + + The poll loop passes ``join_wrapped=True`` to ``capture-pane`` so a + pattern that crosses the wrap boundary is matched against the + joined logical line. Without that flag, each visual row is its own + string and a regex against any one row never sees the full marker. + + The command line is composed of three ``printf`` calls so the + echoed command text does NOT contain the marker as a literal + substring — only the produced output (after the three pieces + concatenate on stdout) does. + """ + import asyncio + + width_raw = mcp_pane.display_message("#{pane_width}", get_text=True) + assert width_raw is not None + pane_width = int(width_raw[0]) + + filler_len = max(1, pane_width - 5) + payload = ( + f"printf 'x%.0s' $(seq 1 {filler_len}); " + "printf 'WRA'; printf 'PPED_MARKER'; printf '_xyz'; echo" + ) + marker = "WRAPPED_MARKER_xyz" + + async def emit_after_baseline() -> None: + await asyncio.sleep(0.2) + await asyncio.to_thread(mcp_pane.send_keys, payload, True) + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern=marker, + pane_id=mcp_pane.pane_id, + timeout=3.0, + socket_name=mcp_server.socket_name, + ) + ) + await emit_after_baseline() + return await wait_task + + result = asyncio.run(run()) + assert result.found is True + assert result.timed_out is False + assert any(marker in line for line in result.matched_lines) + + def test_wait_for_text_reports_progress(mcp_server: Server, mcp_pane: Pane) -> None: """wait_for_text calls ``ctx.report_progress`` at each poll tick. From 6bfabc34dc1476603705bf89c32e59b15383b605 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Fri, 15 May 2026 18:43:52 -0500 Subject: [PATCH 14/31] Pane(docs[wait_for_text]): cross-link wait_for_channel as the deterministic primitive Re-frames the wait family so agents pick the cheaper, race-free option first. Three coordinated doc edits: * ``send_keys`` docstring now forks the post-send choice into deterministic (``wait_for_channel`` + ``tmux wait-for -S``), pattern-match (``wait_for_text`` for output the agent doesn't control), and any-change (``wait_for_content_change``). * Server ``_INSTR_WAIT_NOT_POLL`` system instruction names ``wait_for_channel`` first; the 2 KB instruction budget is preserved by tightening the supporting prose. * ``wait_for_text`` docstring gains a ``When NOT to use this`` section that names the sequential-send_keys race and steers callers to the channel pattern with a pointer at the ``run_and_wait`` recipe. Tests: - ``test_base_instructions_prefer_wait_over_poll`` now requires ``wait_for_channel`` to appear in the system instructions and to be named before the fallbacks. - ``test_send_keys_docstring_cross_links_wait_for_channel`` is a cheap regression against future docstring drift. --- CHANGES | 6 ++++++ src/libtmux_mcp/server.py | 6 +++--- src/libtmux_mcp/tools/pane_tools/io.py | 20 +++++++++++++++++--- src/libtmux_mcp/tools/pane_tools/wait.py | 13 +++++++++++++ tests/test_pane_tools.py | 16 ++++++++++++++++ tests/test_server.py | 19 ++++++++++++++----- 6 files changed, 69 insertions(+), 11 deletions(-) diff --git a/CHANGES b/CHANGES index 222bb7a9..9da987c8 100644 --- a/CHANGES +++ b/CHANGES @@ -24,6 +24,12 @@ wait_for_text(pattern="OK") When the pane's `history-limit` causes the baseline grid row to be trimmed mid-wait (or `clear-history` runs, or `history-limit` is shrunk while the wait is running), {tooliconl}`wait-for-text` now raises `ToolError` instead of silently returning stale matches or missing new output. The absolute baseline anchor relies on tmux's `history_size` being monotonically non-decreasing; once `grid_collect_history` fires there is no server-side way to recover the original anchor. For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and use {tooliconl}`wait-for-channel`. (#45 follow-up) +### Documentation + +**Wait family is re-framed around {tooliconl}`wait-for-channel` as the deterministic primitive** + +The {tooliconl}`send-keys` docstring, the server system instructions, and the {tooliconl}`wait-for-text` docstring now point agents at {tooliconl}`wait-for-channel` with composed `tmux wait-for -S` for command completion, and reserve {tooliconl}`wait-for-text` / {tooliconl}`wait-for-content-change` for output the agent does not author. The `run_and_wait` recipe is the canonical status-preserving pattern. (#45 follow-up) + ### Dependencies **Minimum `libtmux>=0.56.0`** (was `>=0.55.1`). Unlocks the new tmux-command wrappers shipped in libtmux 0.56.0 — {meth}`~libtmux.Pane.respawn`, {meth}`~libtmux.Pane.copy_mode`, {meth}`~libtmux.Pane.pipe`, {meth}`~libtmux.Pane.swap`, {meth}`~libtmux.Pane.paste_buffer`, {meth}`~libtmux.Pane.clear_history`, {meth}`~libtmux.Pane.display_message`, {meth}`~libtmux.Server.delete_buffer`, and the {meth}`~libtmux.Session.next_window` / {meth}`~libtmux.Session.previous_window` / {meth}`~libtmux.Session.last_window` trio — so the MCP no longer falls back to raw `cmd()` calls for tmux commands the upstream API now covers. (#46) diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index 8bf3d2ff..8b36557d 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -94,9 +94,9 @@ ) _INSTR_WAIT_NOT_POLL = ( - "WAIT, DON'T POLL: use wait_for_text (text/regex) or " - "wait_for_content_change instead of capture_pane retry loops; " - "both block server-side until the condition or timeout." + "WAIT, DON'T POLL: prefer wait_for_channel (compose `tmux wait-for -S`) " + "for command completion. Else wait_for_text / wait_for_content_change " + "for output you don't author." ) #: Gap-explainer: write-hook tools are intentionally absent. See module diff --git a/src/libtmux_mcp/tools/pane_tools/io.py b/src/libtmux_mcp/tools/pane_tools/io.py index 15870aa3..22d4c473 100644 --- a/src/libtmux_mcp/tools/pane_tools/io.py +++ b/src/libtmux_mcp/tools/pane_tools/io.py @@ -32,9 +32,23 @@ def send_keys( ) -> str: """Send keys (commands or text) to a tmux pane. - After sending, use wait_for_text to block until the command completes, - or capture_pane to read the result. Do not capture_pane immediately — - there is a race condition. + After sending, choose your synchronization primitive based on what you + control: + + - **Deterministic (preferred):** compose ``tmux wait-for -S `` + into the shell command and call ``wait_for_channel``. See the + ``run_and_wait`` prompt for the canonical status-preserving pattern. + Cheaper in agent turns and immune to baseline races. + - **Pattern-match:** call ``wait_for_text`` when the output you await + is yours to author and won't appear before the wait locks its + baseline (e.g. a sentinel ``echo`` after a long command). Fast + ``echo`` statements can race the baseline read; reserve this for + output the agent does not control. + - **Any change:** call ``wait_for_content_change`` when you don't know + the output shape. + + Do NOT call ``capture_pane`` immediately — both the read and the + pattern-match paths race the pane's PTY draw. Parameters ---------- diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index c102edf5..d6fe082e 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -182,6 +182,19 @@ async def wait_for_text( instead of ``cmd``'s natural output. tmux's grid model cannot distinguish "your output" from "theirs"; the sentinel can. + **When NOT to use this — sequential ``send_keys`` race.** If you + call ``send_keys`` and immediately ``wait_for_text``, fast output + (``echo``, prompt-return after ``^C``) can land *before* this tool + snapshots the baseline, and the match is then invisible to the + wait. The race is small but real on CI and over remote sockets. + For commands you author, prefer the channel pattern: append + ``; tmux wait-for -S `` to your ``send_keys`` payload and + call ``wait_for_channel`` instead. The ``run_and_wait`` prompt at + ``libtmux_mcp.prompts.recipes`` shows the safe, status-preserving + composition. Reserve ``wait_for_text`` for output you do not + control (third-party process logs, daemon prompts, interactive + supervisors). + When a :class:`fastmcp.Context` is available, this tool emits periodic ``ctx.report_progress`` notifications so MCP clients can show a "polling pane X... (elapsed/timeout)" indicator during long diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 43de91ad..73203ef3 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -55,6 +55,22 @@ def test_send_keys(mcp_server: Server, mcp_pane: Pane) -> None: assert "sent" in result.lower() +def test_send_keys_docstring_cross_links_wait_for_channel() -> None: + """``send_keys`` docstring steers agents at ``wait_for_channel`` first. + + Agents read tool descriptions when picking a synchronization primitive. + After the baseline-anchor design landed, ``send_keys`` → + ``wait_for_text`` can race for fast commands (the baseline locks after + the keys are buffered), and the channel pattern is strictly cheaper + for command completion. The docstring must therefore mention both + ``wait_for_channel`` and ``run_and_wait`` so the agent can find the + safe pattern without a separate docs lookup. + """ + assert send_keys.__doc__ is not None + assert "wait_for_channel" in send_keys.__doc__ + assert "run_and_wait" in send_keys.__doc__ + + def test_capture_pane(mcp_server: Server, mcp_pane: Pane) -> None: """capture_pane returns pane content.""" result = capture_pane( diff --git a/tests/test_server.py b/tests/test_server.py index de61814a..65db7d1d 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -151,14 +151,23 @@ def test_base_instructions_surface_flagship_read_tools() -> None: def test_base_instructions_prefer_wait_over_poll() -> None: - """_BASE_INSTRUCTIONS names wait_for_text and wait_for_content_change. - - The wait tools block server-side, which is dramatically cheaper in - agent turns than ``capture_pane`` in a retry loop. Making them - discoverable from the instructions is a no-cost UX win. + """_BASE_INSTRUCTIONS names the wait family with the right primacy. + + ``wait_for_channel`` is the deterministic primitive (composes + ``tmux wait-for -S``) and should appear first; ``wait_for_text`` + and ``wait_for_content_change`` are the fallbacks for output the + agent doesn't author. Making the channel primitive discoverable + from the instructions steers agents off the polling-scraper path + for command-completion synchronization. """ + assert "wait_for_channel" in _BASE_INSTRUCTIONS assert "wait_for_text" in _BASE_INSTRUCTIONS assert "wait_for_content_change" in _BASE_INSTRUCTIONS + # The channel primitive should be named before the fallbacks so an + # agent that scans top-to-bottom encounters the cheaper option first. + assert _BASE_INSTRUCTIONS.index("wait_for_channel") < _BASE_INSTRUCTIONS.index( + "wait_for_text" + ) def test_base_instructions_document_hook_boundary() -> None: From 692858a651d95cee8e7db06671425cb0dcaaf2b7 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 04:38:52 -0500 Subject: [PATCH 15/31] Pane(fix[wait_for_text]): exempt resize-grow from the rollover guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Phase 1 rollover predicate (``state.history_size < entry.history_size``) fires correctly on real row eviction — ``grid_collect_history`` trim, ``clear-history``, ``set-option history-limit `` on tmux master — but ALSO fires on resize-grow when ``hscrolled > 0``. tmux's ``screen_resize_y`` (screen.c:451-465) decrements ``gd->hsize`` on a vertical grow by pulling rows from history back into the visible region. The rows are not freed; only the history/visible-region partition shifts and absolute indices stay valid. The Phase 1 predicate raised a spurious ``ToolError`` in that case. Trim and resize-grow are distinguished by ``pane_height``: trim leaves it unchanged, resize-grow increases it. Adding ``state.pane_height <= entry.pane_height`` as a conjunct makes the predicate the actual signature of row eviction, exempting resize-grow. Also walks back two over-broad claims that shipped with Phase 1: the "monotonically non-decreasing" framing in CHANGES (resize-grow proves it is not monotone) and the parenthetical about "shrinking history-limit mid-wait" — that retroactive-trim path requires tmux commit 195a9cf which is not in any tagged release as of tmux 3.6a. Adds test_wait_for_text_survives_resize_grow_with_scrolled_history. --- CHANGES | 6 +- src/libtmux_mcp/tools/pane_tools/wait.py | 37 +++++++++---- tests/test_pane_tools.py | 70 ++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 12 deletions(-) diff --git a/CHANGES b/CHANGES index 9da987c8..2e4c9f86 100644 --- a/CHANGES +++ b/CHANGES @@ -22,7 +22,7 @@ wait_for_text(pattern="OK") **{tooliconl}`wait-for-text` raises on history rollover** -When the pane's `history-limit` causes the baseline grid row to be trimmed mid-wait (or `clear-history` runs, or `history-limit` is shrunk while the wait is running), {tooliconl}`wait-for-text` now raises `ToolError` instead of silently returning stale matches or missing new output. The absolute baseline anchor relies on tmux's `history_size` being monotonically non-decreasing; once `grid_collect_history` fires there is no server-side way to recover the original anchor. For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and use {tooliconl}`wait-for-channel`. (#45 follow-up) +When the pane's `history-limit` causes the baseline grid row to be trimmed mid-wait (or `clear-history` runs), {tooliconl}`wait-for-text` now raises `ToolError` instead of silently returning stale matches or missing new output. The absolute baseline anchor relies on `history_size` not shrinking due to row eviction; once `grid_collect_history` frees those rows there is no server-side way to recover the original anchor. (Resize-grow also decrements `history_size`, but does not evict rows — that case is exempted by also checking `pane_height`.) For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and use {tooliconl}`wait-for-channel`. (#45 follow-up) ### Documentation @@ -36,6 +36,10 @@ The {tooliconl}`send-keys` docstring, the server system instructions, and the {t ### Fixes +**{tooliconl}`wait-for-text` no longer raises spuriously on resize-grow** + +The rollover guard now fires only when `history_size` shrinks *and* `pane_height` does not grow — the actual signature of row eviction. Pane resize that pulls lines back from history into the visible region (`screen_resize_y` with `hscrolled > 0`) decrements `history_size` without freeing rows, so the baseline anchor is still valid and the wait continues. Previously this case (WM resize, font/zoom change, mosh reconnect mid-wait) surfaced as a spurious "history rolled over" `ToolError`. (#45 follow-up) + **{tooliconl}`wait-for-text` matches patterns across visually-wrapped lines** The poll-loop capture now passes tmux's `-J` flag (`join_wrapped=True` on `Pane.capture_pane`), so a pattern that crosses the pane's visual wrap is still matched against the joined logical line. Long error strings like `"Build failed: module not found"` that tmux split across two rows previously slipped through `re.search`. The joined logical line is returned in `matched_lines` and may exceed `pane_width`. (#45 follow-up) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index d6fe082e..4a96c7e6 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -241,11 +241,18 @@ async def wait_for_text( mid-wait, tmux's ``grid_collect_history`` (``grid.c``) frees the oldest scrollback rows and decrements ``hsize``, invalidating the absolute baseline. The same hsize-decrement fires on - ``clear-history`` and on shrinking ``history-limit`` mid-wait. - The tool raises ``ToolError`` ("history rolled over during wait") - rather than silently false-matching or silently missing output; - the caller can re-arm ``wait_for_text`` or switch to - ``wait_for_channel`` for deterministic synchronization. + ``clear-history``. The tool raises ``ToolError`` ("history rolled + over during wait") rather than silently false-matching or silently + missing output; the caller can re-arm ``wait_for_text`` or switch + to ``wait_for_channel`` for deterministic synchronization. + + Note that ``hsize`` also decrements on resize-grow when there is + scrolled history available (``screen.c`` ``screen_resize_y``), + but in that case the row data is not freed — only the + history/visible-region boundary moves and absolute indices stay + valid. The guard distinguishes the two cases by also requiring + ``pane_height`` to not have grown, so resize-grow continues + polling cleanly. **Wrapped lines are joined for matching.** Captures pass tmux's ``-J`` flag so a pattern that spans the pane's visual wrap is @@ -363,12 +370,20 @@ async def wait_for_text( # (grid.c) frees the oldest scrollback rows and decrements # ``gd->hsize``, so absolute index math anchored on # ``history_size + cursor_y`` is no longer recoverable. The same - # hsize-decrement also fires on ``clear-history`` and on shrinking - # the ``history-limit`` option mid-wait. There is no server-side way - # to disambiguate "trimmed" from "still anchored", so surface the - # lost anchor as ``ToolError`` instead of silently false-matching - # or silently missing output. - if state.history_size < entry.history_size: + # hsize-decrement also fires on ``clear-history``. + # + # ``hsize`` ALSO decrements on resize-grow when ``hscrolled > 0`` + # (``screen.c`` ``screen_resize_y``: rows are pulled from history + # back into the visible region). In that case no row data is freed + # — only the hsize/visible-region partition shifts and absolute + # indices stay valid. Trim and resize-grow are distinguished by + # ``pane_height``: trim leaves it unchanged, resize-grow increases + # it. The conjunction below is the actual signature of row + # eviction; resize-grow falls through cleanly. + if ( + state.history_size < entry.history_size + and state.pane_height <= entry.pane_height + ): msg = ( f"pane {pane.pane_id} history rolled over during wait " f"(history_size {entry.history_size} -> " diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 73203ef3..4aa1173b 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1588,6 +1588,76 @@ async def run() -> WaitForTextResult: assert result.timed_out is False +def test_wait_for_text_survives_resize_grow_with_scrolled_history( + mcp_server: Server, mcp_pane: Pane +) -> None: + """Resize-grow that pulls lines from history must NOT trip the rollover guard. + + tmux's ``screen_resize_y`` (``screen.c:451-465``) decrements + ``gd->hsize`` on a vertical grow when ``hscrolled > 0`` — rows + from history are pulled back into the visible region. The rows + themselves are NOT freed; only the history/visible-region + boundary shifts and absolute indices stay valid. + + The rollover guard distinguishes this case from real row eviction + by ALSO requiring ``pane_height`` to not have grown. Resize-grow + increases ``pane_height``, so the conjunction is false and the + guard correctly does NOT fire. + + Before this distinction was added (initial Phase 1 predicate), + the same sequence raised a spurious "history rolled over" + ``ToolError`` for any agent whose pane got resized mid-wait by a + WM event, font/zoom change, or mosh reconnect. + """ + import asyncio + + # Pre-fill scrollback so hscrolled > 0 — rows must have already + # scrolled past the visible region for screen_resize_y to have + # anything to pull back on grow. + mcp_pane.send_keys("for i in $(seq 1 100); do echo prefill$i; done", enter=True) + + def _prefilled() -> bool: + hs = mcp_pane.display_message("#{history_size}", get_text=True) + return bool(hs) and int(hs[0]) >= 50 + + retry_until(_prefilled, 5, raises=True) + + # Read current pane height; we'll grow past it during the wait. + height_raw = mcp_pane.display_message("#{pane_height}", get_text=True) + assert height_raw is not None + current_height = int(height_raw[0]) + target_height = current_height + 3 + + async def grow_after_delay() -> None: + # Let wait_for_text snapshot the baseline first, then grow + # the window vertically. screen_resize_y pulls rows from + # history back into view, decrementing hsize. + await asyncio.sleep(0.1) + await asyncio.to_thread( + mcp_pane.window.cmd, + "resize-window", + "-y", + str(target_height), + ) + + async def run() -> WaitForTextResult: + wait_task = asyncio.create_task( + wait_for_text( + pattern="NEVER_APPEARS_resize_grow", + pane_id=mcp_pane.pane_id, + timeout=1.0, + socket_name=mcp_server.socket_name, + ) + ) + await grow_after_delay() + return await wait_task + + # The wait must complete cleanly via timeout — NOT a ToolError. + result = asyncio.run(run()) + assert result.found is False + assert result.timed_out is True + + def test_wait_for_text_handles_resize_during_wait( mcp_server: Server, mcp_pane: Pane ) -> None: From 8c7a15289757aad3055c9b2b0e8dc5888a60d45a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 05:43:03 -0500 Subject: [PATCH 16/31] Pane(docs[wait_for_text]): walk back rollover-detection contract; the guard catches a subset A 2 ms high-frequency repro over ~3000 hsize samples showed the rollover guard's strict-inequality predicate (state.history_size < entry.history_size) never observes a dip during real history-limit overflow: tmux's grid_collect_history trims (~10% of hlimit) then immediately scrolls new lines back, so sampled hsize stays clamped near the cap. The predicate only fires when entry.history_size happened to land exactly at hlimit (environment luck). All earlier claims that "wait_for_text raises on history rollover" were therefore over-broad. Walk back the contract: * Docstring's "Scrollback rollover raises" Note now says "rollover detection is partial" and enumerates what is and isn't caught (clear-history yes; retroactive history-limit shrink on tmux >= 3.7 yes; grid_collect_history trim during continuous output NO). Points to the upcoming risk-band warning and to wait_for_channel for deterministic synchronization. * ToolError message changes from "history rolled over during wait" to "history shrank below entry baseline" -- describes what we actually detected (a shrink), not what we assumed (a rollover). * CHANGES breaking-change entry: same walk-back; references the new ### Fixes warning entry and the tracking issue filed in libtmux. * test_wait_for_text_raises_on_history_rollover renamed to test_wait_for_text_raises_when_history_is_cleared, since that is the path it actually exercises. Docstring trimmed to describe the test mechanism, with no dev-history narrative. * Same dev-history narrative removed from test_wait_for_text_survives_resize_grow_with_scrolled_history (same anti-pattern, same project guidance about not referencing the task or fix history in code comments). No behavior change beyond the message text and test rename. Phase 7 follow-up adds the risk-band warning predicate that catches what this guard does not. --- CHANGES | 6 ++-- src/libtmux_mcp/tools/pane_tools/wait.py | 27 ++++++++------ tests/test_pane_tools.py | 46 ++++++++---------------- 3 files changed, 36 insertions(+), 43 deletions(-) diff --git a/CHANGES b/CHANGES index 2e4c9f86..5553db26 100644 --- a/CHANGES +++ b/CHANGES @@ -20,9 +20,11 @@ wait_for_text(pattern="OK", content_start=-100) wait_for_text(pattern="OK") ``` -**{tooliconl}`wait-for-text` raises on history rollover** +**{tooliconl}`wait-for-text` raises on a subset of history-rollover events** -When the pane's `history-limit` causes the baseline grid row to be trimmed mid-wait (or `clear-history` runs), {tooliconl}`wait-for-text` now raises `ToolError` instead of silently returning stale matches or missing new output. The absolute baseline anchor relies on `history_size` not shrinking due to row eviction; once `grid_collect_history` frees those rows there is no server-side way to recover the original anchor. (Resize-grow also decrements `history_size`, but does not evict rows — that case is exempted by also checking `pane_height`.) For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and use {tooliconl}`wait-for-channel`. (#45 follow-up) +When the pane's `history-limit` causes `hsize` to shrink below the entry baseline mid-wait — typically via `clear-history`, or via retroactive `history-limit` shrink on tmux >= 3.7 (commit `195a9cf`) — {tooliconl}`wait-for-text` now raises `ToolError` instead of silently returning stale matches or missing new output. Resize-grow also decrements `hsize` (rows pulled back from history), but does not evict row data; that case is exempted by also checking `pane_height`. + +The guard does **not** reliably detect `grid_collect_history` trim that occurs during continuous output: tmux's trim-then-scroll bounce keeps sampled `hsize` clamped near the cap, so the predicate stays false even though rows are being evicted. For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and use {tooliconl}`wait-for-channel`. {tooliconl}`wait-for-text` also emits a runtime `notifications/message` warning when polling within the trim-risk band (see ### Fixes), so MCP clients can surface "correctness is best-effort here" without the call failing. (#45 follow-up; tracking issue filed in libtmux for a deeper detection helper.) ### Documentation diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 4a96c7e6..7f546a76 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -237,14 +237,21 @@ async def wait_for_text( Notes ----- - **Scrollback rollover raises.** When ``history-limit`` is reached - mid-wait, tmux's ``grid_collect_history`` (``grid.c``) frees the - oldest scrollback rows and decrements ``hsize``, invalidating the - absolute baseline. The same hsize-decrement fires on - ``clear-history``. The tool raises ``ToolError`` ("history rolled - over during wait") rather than silently false-matching or silently - missing output; the caller can re-arm ``wait_for_text`` or switch - to ``wait_for_channel`` for deterministic synchronization. + **Scrollback rollover detection is partial.** The tool raises + ``ToolError`` when ``hsize`` shrinks below the entry value — which + catches ``clear-history``, retroactive ``history-limit`` shrink + (tmux >= 3.7, commit ``195a9cf``), and rollover events where the + dip is observable between polls. It does **not** reliably detect + ``grid_collect_history`` trim that fires during continuous output: + tmux trims (~10% of ``history-limit``) then immediately scrolls + new lines back, so sampled ``hsize`` can stay clamped at the cap + and never appear below entry. tmux exposes no monotonic trim + counter or hook to disambiguate. For deterministic + command-completion synchronization use ``wait_for_channel``; for + observation flows that approach ``history-limit``, see the + runtime ``ctx.warning`` notification emitted by this tool in the + trim-risk band. (Tracking: a libtmux issue investigating a + libtmux-side trim-detection helper.) Note that ``hsize`` also decrements on resize-grow when there is scrolled history available (``screen.c`` ``screen_resize_y``), @@ -385,8 +392,8 @@ async def wait_for_text( and state.pane_height <= entry.pane_height ): msg = ( - f"pane {pane.pane_id} history rolled over during wait " - f"(history_size {entry.history_size} -> " + f"pane {pane.pane_id} history shrank below entry " + f"baseline (history_size {entry.history_size} -> " f"{state.history_size}); baseline anchor lost — " "re-arm wait_for_text or use wait_for_channel for " "deterministic synchronization" diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 4aa1173b..4f6faaac 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1504,23 +1504,15 @@ def test_wait_for_text_rejects_non_positive_timeout( ) -def test_wait_for_text_raises_on_history_rollover( +def test_wait_for_text_raises_when_history_is_cleared( mcp_server: Server, mcp_pane: Pane ) -> None: - """A history trim mid-wait surfaces as ToolError, not silent miss. - - The rollover guard fires when ``state.history_size < - entry.history_size``. To force that we (1) pre-fill the pane with - scrollback so ``entry.history_size`` is large at wait entry, then - (2) run ``clear-history`` mid-wait — tmux's ``grid_clear_history`` - (``grid.c``) sets ``gd->hsize = 0`` synchronously, dropping hsize - below the baseline. The wait then detects the drop and raises - instead of silently false-matching or missing output. - - ``clear-history`` is chosen (rather than shrinking ``history-limit`` - retroactively) because the retroactive-trim path landed in tmux - master commit 195a9cf and is not in tmux 3.6a or earlier releases. - ``clear-history`` works on all supported tmux versions. + """``clear-history`` during a wait drops ``hsize`` to 0, tripping the guard. + + Pre-fills scrollback, starts the wait, then runs ``clear-history`` + on the pane. tmux's ``grid_clear_history`` sets ``gd->hsize = 0`` + synchronously, so the next poll sees ``state.history_size < + entry.history_size`` and raises ``ToolError``. """ import asyncio @@ -1550,7 +1542,7 @@ async def run() -> WaitForTextResult: await clear_after_delay() return await wait_task - with pytest.raises(ToolError, match="history rolled over"): + with pytest.raises(ToolError, match="history shrank below entry baseline"): asyncio.run(run()) @@ -1593,21 +1585,13 @@ def test_wait_for_text_survives_resize_grow_with_scrolled_history( ) -> None: """Resize-grow that pulls lines from history must NOT trip the rollover guard. - tmux's ``screen_resize_y`` (``screen.c:451-465``) decrements - ``gd->hsize`` on a vertical grow when ``hscrolled > 0`` — rows - from history are pulled back into the visible region. The rows - themselves are NOT freed; only the history/visible-region - boundary shifts and absolute indices stay valid. - - The rollover guard distinguishes this case from real row eviction - by ALSO requiring ``pane_height`` to not have grown. Resize-grow - increases ``pane_height``, so the conjunction is false and the - guard correctly does NOT fire. - - Before this distinction was added (initial Phase 1 predicate), - the same sequence raised a spurious "history rolled over" - ``ToolError`` for any agent whose pane got resized mid-wait by a - WM event, font/zoom change, or mosh reconnect. + tmux's ``screen_resize_y`` decrements ``gd->hsize`` on a vertical + grow when ``hscrolled > 0`` — rows from history are pulled back + into the visible region. The rows themselves are NOT freed; only + the history/visible-region boundary shifts and absolute indices + stay valid. The guard's conjunction with ``pane_height <= + entry.pane_height`` exempts this case, because resize-grow also + increases ``pane_height``. """ import asyncio From 2bb6b80d4d286696c47e086d44ade1d7eeaa775d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 06:46:32 -0500 Subject: [PATCH 17/31] Pane(fix[wait_for_text]): warn when polling in the history-limit risk band The strict-shrink rollover guard misses grid_collect_history trim during continuous output: tmux trims ~10% of history-limit then immediately scrolls a line back, so sampled hsize stays clamped at the cap and the predicate state.history_size < entry.history_size never fires. A 2 ms high-frequency probe across ~3000 samples confirmed there is no sub-poll window where hsize dips below entry once entry was sub-cap. Emit a one-shot ctx.warning when sampled state enters the trim-risk band (top 10% of history-limit) AND the pane has advanced since the wait's baseline. MCP clients subscribed to notifications/message see "correctness is best-effort here. For deterministic synchronization use wait_for_channel." and can decide whether to keep waiting, retry, or switch primitives. Warning is one-shot per call so we don't spam. Adds history_limit: int to _PaneState; the field is read in the same display-message round-trip as the other per-tick fields, so the loop still costs two subprocesses per tick (state + capture). No CHANGES update in this commit -- the consolidated wait-for-text CHANGES entry will land in a later commit per the Narrative Bleed doctrine (no published-release user saw "no warning" to be "fixed"; this warning is just part of the new contract). --- src/libtmux_mcp/tools/pane_tools/wait.py | 51 +++++++++++++++++++++--- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 7f546a76..31f139b7 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -105,8 +105,9 @@ class _PaneState(t.NamedTuple): Read in one ``display-message`` round-trip so the loop costs two subprocesses per tick (state + capture) instead of growing linearly with each new field. ``|`` is the field separator — - history/cursor/height are integers, ``pane_pid`` is a numeric PID - string, and ``pane_dead`` is the literal ``"0"``/``"1"`` flag. + history/cursor/height/limit are integers, ``pane_pid`` is a + numeric PID string, and ``pane_dead`` is the literal + ``"0"``/``"1"`` flag. """ history_size: int @@ -114,6 +115,7 @@ class _PaneState(t.NamedTuple): pane_height: int pane_pid: str pane_dead: bool + history_limit: int def _read_pane_state(pane: Pane) -> _PaneState: @@ -123,20 +125,25 @@ def _read_pane_state(pane: Pane) -> _PaneState: ``display-message`` call. ``history_size + cursor_y`` gives the absolute grid anchor at entry; ``pane_height`` gates the bottom- row capture clip; ``pane_pid`` and ``pane_dead`` surface - respawn-pane and pane-death events that invalidate the baseline. + respawn-pane and pane-death events that invalidate the baseline; + ``history_limit`` lets the poll loop detect the trim-risk band + (the upper 10% near ``history-limit`` where ``grid_collect_history`` + is firing but ``hsize`` stays clamped at the cap). """ stdout = pane.display_message( - "#{history_size}|#{cursor_y}|#{pane_height}|#{pane_pid}|#{pane_dead}", + "#{history_size}|#{cursor_y}|#{pane_height}|#{pane_pid}|#{pane_dead}|" + "#{history_limit}", get_text=True, ) - raw = stdout[0] if stdout else "0|0|0||0" - hs, cy, sy, pid, dead = raw.split("|", 4) + raw = stdout[0] if stdout else "0|0|0||0|0" + hs, cy, sy, pid, dead, hlimit = raw.split("|", 5) return _PaneState( history_size=int(hs), cursor_y=int(cy), pane_height=int(sy), pane_pid=pid, pane_dead=dead == "1", + history_limit=int(hlimit), ) @@ -347,6 +354,7 @@ async def wait_for_text( matched_lines: list[str] = [] found = False + warned_risk_band = False try: while True: @@ -399,6 +407,37 @@ async def wait_for_text( "deterministic synchronization" ) raise ToolError(msg) + # The shrink guard above catches clear-history and the + # entry-at-cap rollover edge. It does NOT catch + # grid_collect_history trim during continuous output, where + # hsize bounces between (hlimit - hlimit/10) and hlimit + # faster than we can poll. Emit a one-shot warning when + # sampled state is in the trim-risk band AND state has + # advanced since entry, so agents subscribed to MCP log + # notifications know to verify results or switch to + # wait_for_channel. + if not warned_risk_band and state.history_limit > 0: + trim_batch = max(state.history_limit // 10, 1) + risk_floor = state.history_limit - trim_batch + advanced = ( + state.history_size > entry.history_size + or state.cursor_y != entry.cursor_y + ) + if state.history_size >= risk_floor and advanced: + await _maybe_log( + ctx, + level="warning", + message=( + f"pane {pane.pane_id} is polling in the " + "history-limit trim-risk band " + f"(history_size {state.history_size} / " + f"history_limit {state.history_limit}); " + "wait_for_text correctness is best-effort " + "here. For deterministic synchronization " + "use wait_for_channel." + ), + ) + warned_risk_band = True # ``+ 1`` skips the baseline line itself so we don't # re-match the row the cursor sat on at entry. start_line = baseline_abs - state.history_size + 1 From 28a2fdfabc8a34d741a6466186504c106de39585 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 06:51:50 -0500 Subject: [PATCH 18/31] Pane(test[wait_for_text]): cover history-limit trim during continuous output Adds the test the rollover claim should have had from the start. Earlier test_wait_for_text_raises_when_history_is_cleared exercises clear-history only -- not the grid_collect_history trim that fires during bursty output. This new test covers the actual real-rollover path. Shape: set history-limit=50 globally, split a fresh pane that inherits the small limit, attach a stub Context that records ctx.warning calls, start wait_for_text against a never-appearing pattern, burst 200 echo lines through the pane. tmux's grid_collect_history fires repeatedly, sampled history_size enters the risk band (>= 45), and the wait emits the one-shot trim-risk-band warning. The wait's found / timed_out result is intentionally not asserted -- the contract is best-effort once polling enters the risk band, so pinning a specific result would be over-specifying what tmux's grid model can't actually guarantee. The test pins the warning contract (what the tool guarantees) only. --- tests/test_pane_tools.py | 84 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 4f6faaac..d4722b74 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1962,6 +1962,90 @@ async def warning(self, message: str) -> None: ), f"expected a timeout warning, got: {log_calls}" +def test_wait_for_text_warns_in_history_limit_risk_band( + mcp_server: Server, mcp_pane: Pane +) -> None: + """``wait_for_text`` emits a warning when polling near ``history-limit``. + + With a small ``history-limit`` and a burst of output that forces + ``grid_collect_history`` to fire repeatedly, sampled ``history_size`` + enters the trim-risk band (top 10% of ``history_limit``). The wait's + strict-shrink predicate cannot see those trims (hsize stays clamped + at the cap), so the tool emits a one-shot ``ctx.warning`` notification + so MCP clients can decide whether to keep waiting, retry, or switch + to ``wait_for_channel``. + + The wait's ``found`` / ``timed_out`` result is intentionally not + asserted — once polling enters the risk band, correctness is + best-effort. The test pins the warning contract (what the tool + guarantees), not the match contract (what tmux's grid model + fundamentally can't). + """ + import asyncio + + # ``history-limit`` is session-scope and the effective per-pane value + # is locked in at pane creation. Set the option globally, then split a + # fresh pane that inherits the small limit. The mcp_pane fixture's + # original pane keeps its larger limit and is unaffected. + mcp_pane.session.cmd("set-option", "-g", "history-limit", "50") + fresh_pane = mcp_pane.window.split() + assert fresh_pane.pane_id is not None + + def _hlimit_locked() -> bool: + hl = fresh_pane.display_message("#{history_limit}", get_text=True) + return bool(hl) and int(hl[0]) == 50 + + retry_until(_hlimit_locked, 5, raises=True) + + log_calls: list[tuple[str, str]] = [] + + class _RecordingContext: + async def report_progress( + self, + progress: float, + total: float | None = None, + message: str = "", + ) -> None: + return + + async def warning(self, message: str) -> None: + log_calls.append(("warning", message)) + + async def burst_after_delay() -> None: + await asyncio.sleep(0.1) + await asyncio.to_thread( + fresh_pane.send_keys, + "for i in $(seq 1 200); do echo burst$i; done", + True, + ) + + async def run() -> None: + wait_task = asyncio.create_task( + wait_for_text( + pattern="WILL_NEVER_MATCH_riskband_qZ9", + pane_id=fresh_pane.pane_id, + timeout=2.0, + interval=0.05, + socket_name=mcp_server.socket_name, + ctx=t.cast("t.Any", _RecordingContext()), + ) + ) + await burst_after_delay() + try: + await wait_task + except ToolError: + # The strict-shrink guard may or may not fire depending on + # whether the dip is observable between polls. Either way, + # we only assert the warning contract, not the result type. + return + + asyncio.run(run()) + + assert any( + level == "warning" and "trim-risk band" in msg for level, msg in log_calls + ), f"expected a trim-risk-band warning, got: {log_calls}" + + def test_wait_for_content_change_warns_on_timeout( mcp_server: Server, mcp_pane: Pane ) -> None: From 2aa7199f4a1e59095565d00f38f9b628c2abadb0 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 06:54:48 -0500 Subject: [PATCH 19/31] Pane(docs[wait_for_text]): describe the shipped wait-for-text contract for a published-release reader CHANGES walk-back and Phase 5 "spurious resize-grow" entries described intra-branch transitions a 0.1.0a6 user never saw. Consolidate the wait-for-text section into entries that describe only what the published release reader gets: * Breaking changes: one entry under "waits for new output, not stale scrollback" covers the #45 fix (real published bug), the new baseline-loss ToolErrors, the resize-grow exemption, the trim-risk-band warning, and the wait_for_channel cross-link. Reader needs zero branch context. * Fixes: keep the wrapped-line match and footgun-rejection entries (both real published bugs). Drop the "no longer raises spuriously on resize-grow" entry (phantom fix: 0.1.0a6 had no rollover guard to raise, spuriously or otherwise). The duplicate "waits for new output" Fixes entry is also dropped -- the consolidated Breaking changes entry covers it. Docstring Notes section in wait.py scrubbed of the tmux-commit-SHA citation and the forward-looking libtmux tracking-issue cross-reference. The tracking issue exists (filed at libtmux as part of this PR) but that's branch-internal narrative; users running the shipped wait_for_text don't need to know about a follow-up investigation issue to use the tool correctly. --- CHANGES | 18 ++++++------------ src/libtmux_mcp/tools/pane_tools/wait.py | 17 +++++++---------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/CHANGES b/CHANGES index 5553db26..30f5f0c3 100644 --- a/CHANGES +++ b/CHANGES @@ -20,11 +20,13 @@ wait_for_text(pattern="OK", content_start=-100) wait_for_text(pattern="OK") ``` -**{tooliconl}`wait-for-text` raises on a subset of history-rollover events** +**{tooliconl}`wait-for-text` waits for new output, not stale scrollback** + +Anchors on the pane's grid position at entry and only matches lines written after the call begins; the previous behaviour returned `found=True` on the first poll whenever the pattern already lived in the pane. For the synchronous "is the pattern in the pane right now?" case, call {tooliconl}`search-panes` instead. -When the pane's `history-limit` causes `hsize` to shrink below the entry baseline mid-wait — typically via `clear-history`, or via retroactive `history-limit` shrink on tmux >= 3.7 (commit `195a9cf`) — {tooliconl}`wait-for-text` now raises `ToolError` instead of silently returning stale matches or missing new output. Resize-grow also decrements `hsize` (rows pulled back from history), but does not evict row data; that case is exempted by also checking `pane_height`. +Baseline-loss events surface as `ToolError`: pane respawn, pane death, `clear-history`, and any other shrink of `history_size` below the entry value. Resize-grow is exempted — it decrements `history_size` without freeing row data, so the anchor stays valid. -The guard does **not** reliably detect `grid_collect_history` trim that occurs during continuous output: tmux's trim-then-scroll bounce keeps sampled `hsize` clamped near the cap, so the predicate stays false even though rows are being evicted. For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and use {tooliconl}`wait-for-channel`. {tooliconl}`wait-for-text` also emits a runtime `notifications/message` warning when polling within the trim-risk band (see ### Fixes), so MCP clients can surface "correctness is best-effort here" without the call failing. (#45 follow-up; tracking issue filed in libtmux for a deeper detection helper.) +Trim that fires during continuous output cannot be reliably detected from `history_size` alone: tmux's trim-then-scroll bounce keeps sampled `hsize` clamped near `history-limit`. When polling approaches that band, the tool emits a `notifications/message` warning so MCP clients can decide whether to keep waiting, retry, or switch to {tooliconl}`wait-for-channel`. For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and call {tooliconl}`wait-for-channel`. (#45) ### Documentation @@ -38,17 +40,9 @@ The {tooliconl}`send-keys` docstring, the server system instructions, and the {t ### Fixes -**{tooliconl}`wait-for-text` no longer raises spuriously on resize-grow** - -The rollover guard now fires only when `history_size` shrinks *and* `pane_height` does not grow — the actual signature of row eviction. Pane resize that pulls lines back from history into the visible region (`screen_resize_y` with `hscrolled > 0`) decrements `history_size` without freeing rows, so the baseline anchor is still valid and the wait continues. Previously this case (WM resize, font/zoom change, mosh reconnect mid-wait) surfaced as a spurious "history rolled over" `ToolError`. (#45 follow-up) - **{tooliconl}`wait-for-text` matches patterns across visually-wrapped lines** -The poll-loop capture now passes tmux's `-J` flag (`join_wrapped=True` on `Pane.capture_pane`), so a pattern that crosses the pane's visual wrap is still matched against the joined logical line. Long error strings like `"Build failed: module not found"` that tmux split across two rows previously slipped through `re.search`. The joined logical line is returned in `matched_lines` and may exceed `pane_width`. (#45 follow-up) - -**{tooliconl}`wait-for-text` waits for new output, not stale scrollback** - -{tooliconl}`wait-for-text` now anchors on the pane's grid position at entry and only matches lines written after the call begins; the previous behaviour returned `found=True` on the first poll whenever the pattern already lived in the pane. Panes that die or are respawned mid-wait surface a `ToolError`. For the synchronous "is the pattern in the pane right now?" case, call {tooliconl}`search-panes`. (#45) +The poll-loop capture now passes tmux's `-J` flag (`join_wrapped=True` on `Pane.capture_pane`), so a pattern that crosses the pane's visual wrap is still matched against the joined logical line. Long error strings like `"Build failed: module not found"` that tmux split across two rows previously slipped through `re.search`. The joined logical line is returned in `matched_lines` and may exceed `pane_width`. (#45) **{tooliconl}`wait-for-text` rejects misused `pattern` / `interval` / `timeout`** diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 31f139b7..33184b9c 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -246,19 +246,16 @@ async def wait_for_text( ----- **Scrollback rollover detection is partial.** The tool raises ``ToolError`` when ``hsize`` shrinks below the entry value — which - catches ``clear-history``, retroactive ``history-limit`` shrink - (tmux >= 3.7, commit ``195a9cf``), and rollover events where the - dip is observable between polls. It does **not** reliably detect + catches ``clear-history`` and any rollover where the dip is + observable between polls. It does **not** reliably detect ``grid_collect_history`` trim that fires during continuous output: tmux trims (~10% of ``history-limit``) then immediately scrolls new lines back, so sampled ``hsize`` can stay clamped at the cap - and never appear below entry. tmux exposes no monotonic trim - counter or hook to disambiguate. For deterministic - command-completion synchronization use ``wait_for_channel``; for - observation flows that approach ``history-limit``, see the - runtime ``ctx.warning`` notification emitted by this tool in the - trim-risk band. (Tracking: a libtmux issue investigating a - libtmux-side trim-detection helper.) + and never appear below entry. For deterministic command-completion + synchronization use ``wait_for_channel``; for observation flows + that approach ``history-limit``, the tool emits a runtime + ``ctx.warning`` notification when sampled state enters the + trim-risk band. Note that ``hsize`` also decrements on resize-grow when there is scrolled history available (``screen.c`` ``screen_resize_y``), From 62a2cef9da4b8a437f7f3cd8cac2a4590e985f28 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 07:53:40 -0500 Subject: [PATCH 20/31] Pane(fix[wait_for_text]): warn on entry-in-risk-band and cache history_limit once The risk-band warning's `advanced` conjunction (state.history_size > entry.history_size OR state.cursor_y != entry.cursor_y) gated the warning on the wait having moved since entry. Three real cases that warning was meant to cover slipped past it: * Entry already in the risk band, idle wait -- no output happens, nothing advances, warning never fires even though the baseline is in dangerous territory from tick zero. * Cursor pinned at the bottom row of the visible region -- subsequent scrolls advance history_size but leave cursor_y unchanged; if hsize also stayed at the cap across two polls, advanced returned False. * Trim+scroll bounce keeping hsize stable at the cap -- entry at 49, every subsequent poll also sees 49; advanced returns False. Drop the conjunction. The one-shot warned_risk_band flag is already the right noise filter -- it limits warnings to one per call regardless of how long the wait sits in the band. Firing on any tick where state.history_size >= risk_floor catches all three cases. Also cache baseline_hlimit at entry instead of re-reading state.history_limit each tick. history-limit is fixed at pane creation (retroactive change only lands in tmux 3.7+), so per-tick reads are redundant. The warning message uses the cached value too. Adds test_wait_for_text_warns_when_already_in_risk_band -- pre-fills a fresh pane past the risk floor, then runs an idle wait with no further output. Asserts the warning fires anyway. Pairs with the existing test_wait_for_text_warns_in_history_limit_risk_band which covers the advance-into-the-band path. --- src/libtmux_mcp/tools/pane_tools/wait.py | 54 +++++++++++---------- tests/test_pane_tools.py | 61 ++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 26 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 33184b9c..2f5b848e 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -105,9 +105,8 @@ class _PaneState(t.NamedTuple): Read in one ``display-message`` round-trip so the loop costs two subprocesses per tick (state + capture) instead of growing linearly with each new field. ``|`` is the field separator — - history/cursor/height/limit are integers, ``pane_pid`` is a - numeric PID string, and ``pane_dead`` is the literal - ``"0"``/``"1"`` flag. + history/cursor/height are integers, ``pane_pid`` is a numeric PID + string, and ``pane_dead`` is the literal ``"0"``/``"1"`` flag. """ history_size: int @@ -115,7 +114,6 @@ class _PaneState(t.NamedTuple): pane_height: int pane_pid: str pane_dead: bool - history_limit: int def _read_pane_state(pane: Pane) -> _PaneState: @@ -125,28 +123,36 @@ def _read_pane_state(pane: Pane) -> _PaneState: ``display-message`` call. ``history_size + cursor_y`` gives the absolute grid anchor at entry; ``pane_height`` gates the bottom- row capture clip; ``pane_pid`` and ``pane_dead`` surface - respawn-pane and pane-death events that invalidate the baseline; - ``history_limit`` lets the poll loop detect the trim-risk band - (the upper 10% near ``history-limit`` where ``grid_collect_history`` - is firing but ``hsize`` stays clamped at the cap). + respawn-pane and pane-death events that invalidate the baseline. """ stdout = pane.display_message( - "#{history_size}|#{cursor_y}|#{pane_height}|#{pane_pid}|#{pane_dead}|" - "#{history_limit}", + "#{history_size}|#{cursor_y}|#{pane_height}|#{pane_pid}|#{pane_dead}", get_text=True, ) - raw = stdout[0] if stdout else "0|0|0||0|0" - hs, cy, sy, pid, dead, hlimit = raw.split("|", 5) + raw = stdout[0] if stdout else "0|0|0||0" + hs, cy, sy, pid, dead = raw.split("|", 4) return _PaneState( history_size=int(hs), cursor_y=int(cy), pane_height=int(sy), pane_pid=pid, pane_dead=dead == "1", - history_limit=int(hlimit), ) +def _read_history_limit(pane: Pane) -> int: + """Read the pane's ``history-limit`` once. + + Fixed at pane creation (retroactive change only lands in tmux 3.7+), + so the result is safe to cache for the lifetime of a wait. Kept out + of :func:`_read_pane_state` so the per-tick read doesn't pay for a + value that never changes between polls. + """ + stdout = pane.display_message("#{history_limit}", get_text=True) + raw = stdout[0] if stdout else "0" + return int(raw) + + @handle_tool_errors_async async def wait_for_text( pattern: str, @@ -348,6 +354,7 @@ async def wait_for_text( entry = await asyncio.to_thread(_read_pane_state, pane) baseline_abs = entry.history_size + entry.cursor_y baseline_pid = entry.pane_pid + baseline_hlimit = await asyncio.to_thread(_read_history_limit, pane) matched_lines: list[str] = [] found = False @@ -409,18 +416,13 @@ async def wait_for_text( # grid_collect_history trim during continuous output, where # hsize bounces between (hlimit - hlimit/10) and hlimit # faster than we can poll. Emit a one-shot warning when - # sampled state is in the trim-risk band AND state has - # advanced since entry, so agents subscribed to MCP log - # notifications know to verify results or switch to - # wait_for_channel. - if not warned_risk_band and state.history_limit > 0: - trim_batch = max(state.history_limit // 10, 1) - risk_floor = state.history_limit - trim_batch - advanced = ( - state.history_size > entry.history_size - or state.cursor_y != entry.cursor_y - ) - if state.history_size >= risk_floor and advanced: + # sampled state is in the trim-risk band so agents + # subscribed to MCP log notifications know to verify + # results or switch to wait_for_channel. + if not warned_risk_band and baseline_hlimit > 0: + trim_batch = max(baseline_hlimit // 10, 1) + risk_floor = baseline_hlimit - trim_batch + if state.history_size >= risk_floor: await _maybe_log( ctx, level="warning", @@ -428,7 +430,7 @@ async def wait_for_text( f"pane {pane.pane_id} is polling in the " "history-limit trim-risk band " f"(history_size {state.history_size} / " - f"history_limit {state.history_limit}); " + f"history_limit {baseline_hlimit}); " "wait_for_text correctness is best-effort " "here. For deterministic synchronization " "use wait_for_channel." diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index d4722b74..d150c26f 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -2046,6 +2046,67 @@ async def run() -> None: ), f"expected a trim-risk-band warning, got: {log_calls}" +def test_wait_for_text_warns_when_already_in_risk_band( + mcp_server: Server, mcp_pane: Pane +) -> None: + """``wait_for_text`` warns immediately if entry is already in the risk band. + + Unlike ``test_wait_for_text_warns_in_history_limit_risk_band`` which + advances into the band, this covers the case where the pane is + already near ``history-limit`` at entry. Without output (idle wait), + the simplified predicate (no ``advanced`` gate) must still fire the + one-shot warning. + """ + import asyncio + + mcp_pane.session.cmd("set-option", "-g", "history-limit", "50") + fresh_pane = mcp_pane.window.split() + assert fresh_pane.pane_id is not None + + def _hlimit_locked() -> bool: + hl = fresh_pane.display_message("#{history_limit}", get_text=True) + return bool(hl) and int(hl[0]) == 50 + + retry_until(_hlimit_locked, 5, raises=True) + + # history-limit is 50. Risk floor (top 10%) is 45. + # Print 100 lines to ensure hsize reaches the cap (50). + fresh_pane.send_keys("for i in $(seq 1 100); do echo line$i; done", True) + + def _prefilled() -> bool: + hs = fresh_pane.display_message("#{history_size}", get_text=True) + # We need it to be in the risk band (>= 45). + return bool(hs) and int(hs[0]) >= 45 + + retry_until(_prefilled, 10, raises=True) + + log_calls: list[tuple[str, str]] = [] + + class _RecordingContext: + async def report_progress(self, *args: t.Any, **kwargs: t.Any) -> None: + return + + async def warning(self, message: str) -> None: + log_calls.append(("warning", message)) + + async def run() -> None: + # Idle wait: no new output, no cursor movement. + await wait_for_text( + pattern="NEVER_MATCH_idle_risk", + pane_id=fresh_pane.pane_id, + timeout=0.5, + interval=0.1, + socket_name=mcp_server.socket_name, + ctx=t.cast("t.Any", _RecordingContext()), + ) + + asyncio.run(run()) + + assert any( + level == "warning" and "trim-risk band" in msg for level, msg in log_calls + ), f"expected a trim-risk-band warning during idle wait, got: {log_calls}" + + def test_wait_for_content_change_warns_on_timeout( mcp_server: Server, mcp_pane: Pane ) -> None: From 82cc9cd8c85db7cf149d6296e138b7317f36b881 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 08:00:53 -0500 Subject: [PATCH 21/31] Pane(docs[wait_for_text]): point system-prompt fragment and troubleshooting at wait_for_channel Two doc surfaces still recommended the send_keys -> wait_for_text -> capture_pane pattern that the wait-family re-framing walked back: * docs/topics/prompting.md has a "For general tmux workflows" code block tagged ``:class: system-prompt`` -- specifically meant for agents to copy into their AGENTS.md / CLAUDE.md / .cursorrules. Higher leverage than tool descriptions because it shapes the agent's mental model before any tool call. * docs/topics/troubleshooting.md item 3 ("Timing") had the same recommendation in a "what to check" bullet -- lower visibility but still misleading. Both now lead with wait_for_channel + composed tmux wait-for -S as the deterministic primitive for command completion, and demote wait_for_text / wait_for_content_change to "output you don't author" duty. Matches the send_keys docstring, server instructions, and the wait_for_text "When NOT to use this" section. No CHANGES update -- the underlying re-framing already shipped under ### Documentation. These are stragglers being brought in line with the announced change. --- docs/quickstart.md | 6 +++--- docs/recipes.md | 7 +++++-- docs/tools/pane/send-keys.md | 5 ++++- docs/topics/gotchas.md | 8 ++++---- docs/topics/prompting.md | 17 +++++++++++------ docs/topics/troubleshooting.md | 2 +- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index 515bb4f6..296bdc45 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -51,11 +51,11 @@ Search all my panes for the word "error". When you say "run `make test` and show me the output", the agent executes a three-step pattern: -1. {tool}`send-keys` — send the command to a tmux pane -2. {tool}`wait-for-text` — wait for the shell prompt to return (command finished) +1. {tool}`send-keys` — send the command (composed with `tmux wait-for -S `) to a tmux pane +2. {tool}`wait-for-channel` — block deterministically until the command signals completion 3. {tool}`capture-pane` — read the terminal output -This **send → wait → capture** sequence is the fundamental workflow. Most agent interactions with tmux follow this pattern or a variation of it. +This **send → wait → capture** sequence is the fundamental workflow. For commands the agent authors, the channel pattern is deterministic; for output the agent does not author (third-party log lines, daemon prompts, interactive supervisors), substitute {tool}`wait-for-text` for step 2. ## Next steps diff --git a/docs/recipes.md b/docs/recipes.md index 9f1e2e88..2869cb59 100644 --- a/docs/recipes.md +++ b/docs/recipes.md @@ -204,8 +204,11 @@ agent calls {tooliconl}`send-keys` in the original pane: ```{warning} Calling {toolref}`capture-pane` immediately after {toolref}`send-keys` is a race condition. {toolref}`send-keys` returns the moment tmux accepts the -keystrokes, not when the command finishes. Always use {toolref}`wait-for-text` -between them. +keystrokes, not when the command finishes. For commands the agent authors, +compose `tmux wait-for -S ` into the command and call +{toolref}`wait-for-channel` — deterministic, race-free. For output the +agent does not author (server-startup banners, test-result lines like +the ones above), use {toolref}`wait-for-text` instead. ``` ### The non-obvious part diff --git a/docs/tools/pane/send-keys.md b/docs/tools/pane/send-keys.md index 11f061ac..19518a05 100644 --- a/docs/tools/pane/send-keys.md +++ b/docs/tools/pane/send-keys.md @@ -7,7 +7,10 @@ terminal. This is the primary way to execute commands in tmux panes. **Avoid when** you need to run something and immediately capture the result — -send keys first, then use {tooliconl}`capture-pane` or {tooliconl}`wait-for-text`. +compose `tmux wait-for -S ` into the keys and call +{tooliconl}`wait-for-channel` for deterministic completion, or fall back to +{tooliconl}`wait-for-text` / {tooliconl}`wait-for-content-change` when you +must observe output the agent does not author. **Side effects:** Sends keystrokes to the pane. If `enter` is true (default), the command executes. diff --git a/docs/topics/gotchas.md b/docs/topics/gotchas.md index b7aa2e91..76a63441 100644 --- a/docs/topics/gotchas.md +++ b/docs/topics/gotchas.md @@ -31,15 +31,15 @@ The `enter` parameter defaults to `true`, which is correct for commands (`make t {"tool": "capture_pane", "arguments": {"pane_id": "%0"}} ``` -The capture above may return the terminal state **before** pytest runs. Use {tooliconl}`wait-for-text` between them: +The capture above may return the terminal state **before** pytest runs. Compose `tmux wait-for -S ` into the command and block on {tooliconl}`wait-for-channel` — deterministic, race-free: ```json -{"tool": "send_keys", "arguments": {"keys": "pytest", "pane_id": "%0"}} -{"tool": "wait_for_text", "arguments": {"pattern": "passed|failed|error", "pane_id": "%0", "regex": true}} +{"tool": "send_keys", "arguments": {"keys": "pytest; tmux wait-for -S pytest_done", "pane_id": "%0"}} +{"tool": "wait_for_channel", "arguments": {"channel": "pytest_done", "timeout": 60}} {"tool": "capture_pane", "arguments": {"pane_id": "%0"}} ``` -See {ref}`recipes` for the complete pattern. +For output the agent does not author (third-party logs, daemon prompts, interactive supervisors), substitute {tooliconl}`wait-for-text` for `wait_for_channel`. See {ref}`recipes` for the complete pattern. ## Window names are not unique across sessions diff --git a/docs/topics/prompting.md b/docs/topics/prompting.md index 3f1fed98..c2c13b05 100644 --- a/docs/topics/prompting.md +++ b/docs/topics/prompting.md @@ -62,9 +62,9 @@ These natural-language prompts reliably trigger the right tool sequences: | Prompt | Agent interprets as | |--------|-------------------| -| [Run `pytest` in my build pane and show results]{.prompt} | {toolref}`send-keys` → {toolref}`wait-for-text` → {toolref}`capture-pane` | -| [Start the dev server and wait until it's ready]{.prompt} | {toolref}`send-keys` → {toolref}`wait-for-text` (for "listening on") | -| [Spin up the dev server in the bottom-right pane]{.prompt} | {toolref}`find-pane-by-position` (corner=bottom-right) → {toolref}`send-keys` → {toolref}`wait-for-text` | +| [Run `pytest` in my build pane and show results]{.prompt} | {toolref}`send-keys` (with `tmux wait-for -S` composed in) → {toolref}`wait-for-channel` → {toolref}`capture-pane` | +| [Start the dev server and wait until it's ready]{.prompt} | {toolref}`send-keys` → {toolref}`wait-for-text` (for "listening on" — third-party output the agent doesn't author) | +| [Spin up the dev server in the bottom-right pane]{.prompt} | {toolref}`find-pane-by-position` (corner=bottom-right) → {toolref}`send-keys` → {toolref}`wait-for-text` (for the server's readiness banner) | | [Check if any pane has errors]{.prompt} | {toolref}`search-panes` with pattern "error" | | [Set up a workspace with editor, server, and tests]{.prompt} | {toolref}`create-session` → {toolref}`split-window` (x2) → {toolref}`set-pane-title` (x3) | | [What's running in my tmux sessions?]{.prompt} | {toolref}`list-sessions` → {toolref}`list-panes` → {toolref}`capture-pane` | @@ -90,8 +90,13 @@ Copy these into your agent's system instructions (`AGENTS.md`, `CLAUDE.md`, `.cu When executing long-running commands (servers, builds, test suites), use tmux via the libtmux MCP server rather than running them directly. -This keeps output accessible for later inspection. Use the pattern: -send_keys → wait_for_text (for completion signal) → capture_pane. +This keeps output accessible for later inspection. + +For command completion, compose `tmux wait-for -S ` into the +shell command and call wait_for_channel — deterministic, no polling. +Use wait_for_text or wait_for_content_change for observation flows +(third-party logs, daemon prompts). Never capture_pane immediately +after send_keys — the command may still be running. ``` ### For safe agent behavior @@ -134,6 +139,6 @@ When an agent is unsure which tool to use, these rules help: 1. **Discovery first**: Call {toolref}`list-sessions` or {toolref}`list-panes` before acting on specific targets 2. **Prefer IDs**: Once you have a `pane_id`, use it for all subsequent calls — it never changes during the pane's lifetime -3. **Wait, don't poll**: Use {toolref}`wait-for-text` instead of repeatedly calling {toolref}`capture-pane` in a loop +3. **Wait, don't poll**: For commands the agent authors, prefer {toolref}`wait-for-channel` with `tmux wait-for -S ` composed into the command — deterministic and race-free. Fall back to {toolref}`wait-for-text` or {toolref}`wait-for-content-change` for output the agent doesn't author. Never call {toolref}`capture-pane` in a retry loop. 4. **Content vs. metadata**: If looking for text *in* a terminal, use {toolref}`search-panes`. If looking for pane *properties* (name, PID, path), use {toolref}`list-panes` or {toolref}`get-pane-info` 5. **Destructive tools are opt-in**: Never kill sessions, windows, or panes unless the user explicitly asks diff --git a/docs/topics/troubleshooting.md b/docs/topics/troubleshooting.md index 34779398..977aa280 100644 --- a/docs/topics/troubleshooting.md +++ b/docs/topics/troubleshooting.md @@ -75,7 +75,7 @@ Symptom-based guide. Find your problem, follow the steps. 2. **Special characters**: tmux interprets some key names (e.g. `C-c`, `Enter`). If sending literal text, use `literal=true`. -3. **Timing**: After `send_keys`, use `wait_for_text` to wait for the command to complete before capturing output. Don't `capture_pane` immediately — the command may still be running. +3. **Timing**: After `send_keys`, prefer composing `tmux wait-for -S ` into the shell command and calling `wait_for_channel` for deterministic completion. Use `wait_for_text` or `wait_for_content_change` only when waiting on output you do not author. Don't `capture_pane` immediately — the command may still be running. ## Silent startup failure From dc0389bb56b700bb6b05e5e9c0a83d753054927a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 09:00:08 -0500 Subject: [PATCH 22/31] docs(CHANGES[wait_for_text]): refine unreleased section to AGENTS.md proportions and ordering Three corrections against the Changelog Conventions in AGENTS.md: * **Section order.** The fixed subheading order is Breaking changes -> Dependencies -> What's new -> Fixes -> Documentation -> Development. The unreleased block had Documentation in the wrong slot (between Breaking changes and Dependencies) and Fixes ahead of Documentation out of order. Reordered to match. * **Proportional prose.** Dropped backend-mechanism phrasing ("trim-then-scroll bounce", "sampled hsize clamped near history-limit", "no longer falls back to raw cmd() calls") that violates the "internal jargon" anti-pattern. The big breaking-change entry is now three short paragraphs in user vocabulary (what changed, what raises, what the warning means). The Dependencies entry no longer walls-of-text 9 specific {meth} method names; it describes the capability ("typed wrappers for tmux commands the server invokes"). * **PR ref form.** Replaced the lone `(#45 follow-up)` qualifier with bare `(#45)` to match the project's pattern across 25+ other entries. The Documentation entry also picks up the doc-fragment work that Phase 12 + Phase 14a actually shipped (quickstart, gotchas, prompting, troubleshooting, recipes, send-keys topics), not just the three docstring surfaces named originally. The reordered breaking change pair leads with the bigger semantic change (waits for new output) and follows with the syntactic break (content_start / content_end removal) so readers see the meatier fix first. --- CHANGES | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/CHANGES b/CHANGES index 30f5f0c3..9a2f09ba 100644 --- a/CHANGES +++ b/CHANGES @@ -8,9 +8,17 @@ _Notes on upcoming releases will be added here_ ### Breaking changes +**{tooliconl}`wait-for-text` waits for new output, not stale scrollback** + +{tooliconl}`wait-for-text` now matches lines written *after* the call begins. The previous behaviour returned `found=True` on the first poll whenever the pattern already lived in the pane, so agents synchronising on command output got the wrong result. For the synchronous "is the pattern in the pane right now?" case, call {tooliconl}`search-panes` instead. + +Baseline-loss events surface as `ToolError`: pane respawn, pane death, `clear-history`, and any other event that drops history below the entry baseline. Pane resize that pulls lines back from history into the visible region is exempted — the anchor stays valid. + +Trim during heavy output near `history-limit` can't be reliably detected from polling alone. When polling approaches that limit, the tool emits a `notifications/message` warning so MCP clients can decide whether to keep waiting, retry, or switch to {tooliconl}`wait-for-channel`. For deterministic command completion, compose `tmux wait-for -S ` into the shell command and call {tooliconl}`wait-for-channel`. (#45) + **{tooliconl}`wait-for-text` drops `content_start` / `content_end`** -The baseline anchor introduced in this release follows the pane's grid position automatically, so the previous manual capture-range parameters have no remaining purpose. Agents that named them should drop them from their call sites. (#45) +The new baseline anchor follows the pane's grid position automatically, so the manual capture-range parameters have no remaining purpose. Drop them from call sites. (#45) ```python # Before @@ -20,33 +28,25 @@ wait_for_text(pattern="OK", content_start=-100) wait_for_text(pattern="OK") ``` -**{tooliconl}`wait-for-text` waits for new output, not stale scrollback** - -Anchors on the pane's grid position at entry and only matches lines written after the call begins; the previous behaviour returned `found=True` on the first poll whenever the pattern already lived in the pane. For the synchronous "is the pattern in the pane right now?" case, call {tooliconl}`search-panes` instead. - -Baseline-loss events surface as `ToolError`: pane respawn, pane death, `clear-history`, and any other shrink of `history_size` below the entry value. Resize-grow is exempted — it decrements `history_size` without freeing row data, so the anchor stays valid. - -Trim that fires during continuous output cannot be reliably detected from `history_size` alone: tmux's trim-then-scroll bounce keeps sampled `hsize` clamped near `history-limit`. When polling approaches that band, the tool emits a `notifications/message` warning so MCP clients can decide whether to keep waiting, retry, or switch to {tooliconl}`wait-for-channel`. For deterministic command-completion synchronization, compose `tmux wait-for -S ` into the shell command and call {tooliconl}`wait-for-channel`. (#45) - -### Documentation - -**Wait family is re-framed around {tooliconl}`wait-for-channel` as the deterministic primitive** - -The {tooliconl}`send-keys` docstring, the server system instructions, and the {tooliconl}`wait-for-text` docstring now point agents at {tooliconl}`wait-for-channel` with composed `tmux wait-for -S` for command completion, and reserve {tooliconl}`wait-for-text` / {tooliconl}`wait-for-content-change` for output the agent does not author. The `run_and_wait` recipe is the canonical status-preserving pattern. (#45 follow-up) - ### Dependencies -**Minimum `libtmux>=0.56.0`** (was `>=0.55.1`). Unlocks the new tmux-command wrappers shipped in libtmux 0.56.0 — {meth}`~libtmux.Pane.respawn`, {meth}`~libtmux.Pane.copy_mode`, {meth}`~libtmux.Pane.pipe`, {meth}`~libtmux.Pane.swap`, {meth}`~libtmux.Pane.paste_buffer`, {meth}`~libtmux.Pane.clear_history`, {meth}`~libtmux.Pane.display_message`, {meth}`~libtmux.Server.delete_buffer`, and the {meth}`~libtmux.Session.next_window` / {meth}`~libtmux.Session.previous_window` / {meth}`~libtmux.Session.last_window` trio — so the MCP no longer falls back to raw `cmd()` calls for tmux commands the upstream API now covers. (#46) +**Minimum `libtmux>=0.56.0`** (was `>=0.55.1`). Picks up libtmux 0.56's typed wrappers for the tmux commands the server invokes — the MCP now uses libtmux's public API instead of raw command-line escapes for pane lifecycle, scrollback, and session navigation. (#46) ### Fixes **{tooliconl}`wait-for-text` matches patterns across visually-wrapped lines** -The poll-loop capture now passes tmux's `-J` flag (`join_wrapped=True` on `Pane.capture_pane`), so a pattern that crosses the pane's visual wrap is still matched against the joined logical line. Long error strings like `"Build failed: module not found"` that tmux split across two rows previously slipped through `re.search`. The joined logical line is returned in `matched_lines` and may exceed `pane_width`. (#45) +Long patterns like `"Build failed: module not found"` that tmux wraps at the pane's column width are now matched against the joined logical line. Previously the wrap split the pattern across two captured rows and neither row matched. The joined line is returned in `matched_lines` and can exceed the pane width. (#45) **{tooliconl}`wait-for-text` rejects misused `pattern` / `interval` / `timeout`** -Empty `pattern`, `interval` below 10 ms, and non-positive `timeout` each now raise `ToolError` at entry instead of silently matching every line, spinning the tmux server in a tight loop, or completing a surprise single probe. (#45) +Empty `pattern`, `interval` below 10 ms, and non-positive `timeout` each raise `ToolError` at entry. Previously they silently matched every line, spun the tmux server in a tight loop, or completed a surprise single probe. (#45) + +### Documentation + +**Wait family is re-framed around {tooliconl}`wait-for-channel` as the deterministic primitive** + +The {tooliconl}`send-keys` docstring, server system instructions, {tooliconl}`wait-for-text` docstring, and the user-facing quickstart, gotchas, prompting, troubleshooting, recipes, and send-keys topics now point agents at {tooliconl}`wait-for-channel` with composed `tmux wait-for -S` for command completion. {tooliconl}`wait-for-text` and {tooliconl}`wait-for-content-change` are reframed as the fallbacks for output the agent does not author. The `run_and_wait` recipe shows the canonical status-preserving pattern. (#45) ## libtmux-mcp 0.1.0a6 (2026-05-09) From 61f42efece7a711277d6a5db2f4a1f777e6b780c Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 09:21:18 -0500 Subject: [PATCH 23/31] Pane(fix[wait_for_channel]): stop killing the parent shell in run_and_wait recipe The run_and_wait prompt template appended ``exit $__mcp_status`` to its shell payload to preserve the command's exit status. In an interactive shell that exits the shell itself; with a single-pane session, the session and tmux server exit with it. Reproduced: channel signals, then list-panes / capture-pane fail with "no server running." The equivalent example in docs/tools/pane/wait-for-channel.md was worse -- ``pytest; status=$?; tmux wait-for -S tests_done; exit $status`` uses ``status`` (read-only in zsh) AND ``exit $status`` (kills the shell in any shell). Reproduced: ``zsh: read-only variable: status`` followed by a deadlocked wait-for. Replace both with the simpler ``{command}; tmux wait-for -S {channel}`` form. Shell ``;`` semantics fire the wait-for whether the command succeeds or fails, so the edge-triggered signal still never deadlocks on a crashed command -- which was the safety property the original ``exit $status`` chain was supposed to provide. Drop the "preserving the command's exit status" claim from the recipe docstring and rendered prose; preserving the status in an interactive shell requires out-of-band routing (a file, a tmux user variable, a scratch pane) that doesn't belong in a teaching recipe. The existing test pinned ``exit $__mcp_status`` as a required token in the rendered template -- that assertion is inverted to lock the new safe contract. Tests still pin the channel naming pattern and the wait-for suffix. Adds a Fixes CHANGES entry: the run_and_wait prompt shipped in 0.1.0a2, so this is a real user-facing bug for 0.1.0a6 users. --- CHANGES | 4 ++++ docs/tools/pane/wait-for-channel.md | 10 ++++++---- src/libtmux_mcp/prompts/recipes.py | 19 ++++++++++++------- tests/test_prompts.py | 21 ++++++++++++++++++--- 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/CHANGES b/CHANGES index 9a2f09ba..3f3e8797 100644 --- a/CHANGES +++ b/CHANGES @@ -34,6 +34,10 @@ wait_for_text(pattern="OK") ### Fixes +**{tooliconl}`wait-for-channel` recipe no longer exits the parent shell** + +The `run_and_wait` prompt template previously appended `exit $__mcp_status` to its shell payload to preserve the command's exit status. In an interactive shell that exits the shell itself, destroying single-pane sessions. The recipe now signals completion via `tmux wait-for -S` without exiting, and the equivalent example in {doc}`/tools/pane/wait-for-channel` is similarly fixed. Exit-status preservation in interactive shells is documented as out-of-scope; agents that need it should inspect the captured output for command-specific success markers. (#47) + **{tooliconl}`wait-for-text` matches patterns across visually-wrapped lines** Long patterns like `"Build failed: module not found"` that tmux wraps at the pane's column width are now matched against the joined logical line. Previously the wrap split the pattern across two captured rows and neither row matched. The joined line is returned in `matched_lines` and can exceed the pane width. (#45) diff --git a/docs/tools/pane/wait-for-channel.md b/docs/tools/pane/wait-for-channel.md index e0c3d815..1fbb7c98 100644 --- a/docs/tools/pane/wait-for-channel.md +++ b/docs/tools/pane/wait-for-channel.md @@ -2,24 +2,26 @@ tmux's `wait-for` command exposes named, server-global channels that clients can signal and block on. These give agents an explicit synchronization primitive — strictly cheaper in agent turns than polling pane content via {tooliconl}`capture-pane` or {tooliconl}`wait-for-text`. -The composition pattern: {tooliconl}`send-keys` a command that emits the signal on its exit, then `wait_for_channel`. The signal MUST fire on both success and failure paths or the wait will block until the timeout. +The composition pattern: {tooliconl}`send-keys` a command followed by `; tmux wait-for -S NAME`, then call `wait_for_channel`. Shell `;` semantics fire the second statement whether the first succeeds or fails, so the edge-triggered signal never deadlocks the agent on a crashed command. ```python send_keys( pane_id="%1", - keys="pytest; status=$?; tmux wait-for -S tests_done; exit $status", + keys="pytest; tmux wait-for -S tests_done", ) wait_for_channel("tests_done", timeout=60) ``` -The `; status=$?; tmux wait-for -S NAME; exit $status` idiom is the load-bearing safety contract — `wait-for` is edge-triggered, so a crash before the signal would deadlock until the wait's `timeout`. +The `; tmux wait-for -S NAME` suffix is the load-bearing safety contract — `wait-for` is edge-triggered, so a crash before the signal would deadlock until the wait's `timeout`. The shell separator `;` runs the next statement unconditionally, so the signal fires on both success and failure paths. + +The payload deliberately does not append `exit $?` — in an interactive shell that exits the shell itself, taking single-pane sessions down with it. If exit-status preservation matters, capture the status out-of-band (e.g. write it to a file the agent reads later, or use a dedicated scratch pane). ```{fastmcp-tool} wait_for_tools.wait_for_channel ``` **Use when** the shell command can reliably emit the signal (single test runs, build scripts, dev-server boot, anything composable with -`; status=$?; tmux wait-for -S name; exit $status`). +`; tmux wait-for -S name`). **Avoid when** the signal cannot be guaranteed — for example, when the command might be killed externally. Use {tooliconl}`wait-for-text` diff --git a/src/libtmux_mcp/prompts/recipes.py b/src/libtmux_mcp/prompts/recipes.py index 098c6b28..77881782 100644 --- a/src/libtmux_mcp/prompts/recipes.py +++ b/src/libtmux_mcp/prompts/recipes.py @@ -21,9 +21,10 @@ def run_and_wait( """Run a shell command in a tmux pane and wait for completion. The returned template teaches the model the safe composition - pattern — always emit ``tmux wait-for -S`` on both success and - failure paths so a crash never deadlocks the agent on an - edge-triggered signal. See ``docs/topics/prompting.md``. + pattern: shell ``;`` semantics fire ``tmux wait-for -S`` whether + the command succeeds or fails, so the edge-triggered signal + never deadlocks an agent waiting on a crashed command. See + ``docs/topics/prompting.md``. Each invocation embeds a fresh UUID-scoped channel name so concurrent agents (or parallel prompt calls from a single agent) @@ -40,11 +41,9 @@ def run_and_wait( Maximum seconds to wait for the signal. Default 60. """ channel = f"libtmux_mcp_wait_{uuid.uuid4().hex}" - shell_payload = ( - f"{command}; __mcp_status=$?; tmux wait-for -S {channel}; exit $__mcp_status" - ) + shell_payload = f"{command}; tmux wait-for -S {channel}" return f"""Run this shell command in tmux pane {pane_id} and block -until it finishes, preserving the command's exit status: +until it finishes: ```python send_keys( @@ -58,6 +57,12 @@ def run_and_wait( After the channel signals, read the last ~100 lines to verify the command's behaviour. Do NOT use a `capture_pane` retry loop — `wait_for_channel` is strictly cheaper in agent turns. + +The payload does not preserve the command's exit status: doing so +in an interactive shell would require exiting the shell (which kills +the pane) or routing through an out-of-band file or tmux variable. +If you need the status, inspect the captured output for +command-specific success markers. """ diff --git a/tests/test_prompts.py b/tests/test_prompts.py index 99e32278..253d6a21 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -50,14 +50,29 @@ def test_prompts_as_tools_enabled_by_env( def test_run_and_wait_returns_string_template() -> None: - """``run_and_wait`` prompt produces a string with the safe idiom.""" + """``run_and_wait`` prompt produces a string with the safe idiom. + + The rendered payload must NOT contain ``exit`` in the shell command + portion: an interactive-shell ``exit`` after the signal kills the + parent shell, which destroys single-pane tmux sessions. The signal + fires unconditionally via shell ``;`` semantics whether the command + succeeds or fails — the wait-for primitive doesn't need an exit to + preserve safety. + """ from libtmux_mcp.prompts.recipes import run_and_wait text = run_and_wait(command="pytest", pane_id="%1", timeout=30.0) assert "tmux wait-for -S libtmux_mcp_wait_" in text assert "wait_for_channel" in text - # Exit-status preservation is the whole point — pin it. - assert "exit $__mcp_status" in text + # The shell payload (between `keys=` and the closing quote) must + # not append ``exit`` after the wait-for — that would kill the + # parent shell in an interactive pane. Check the rendered keys= + # line for the absence of the exit suffix. + keys_line = next( + line for line in text.splitlines() if line.strip().startswith("keys=") + ) + assert "; exit" not in keys_line + assert "exit $" not in keys_line def test_run_and_wait_channel_is_uuid_scoped() -> None: From 5927faac0ba7cd21b90c7abe9a2e8462ae567b34 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 09:26:57 -0500 Subject: [PATCH 24/31] Pane(fix[wait_for_text]): filter stale below-cursor content captured at entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cursor-position anchor captures rows below the entry cursor each tick, which means rows that already contained content at entry got matched by the regex on the first poll. Reviewer reproduced with pane state 0:0:10 and STALE_BELOW painted on row 1: the wait returned found=True with matched_lines=['STALE_BELOW'] in 10 ms. The docstring claim that wait_for_text matches "lines written after the call starts" was broader than the implementation could honestly deliver. Snapshot the rows below the entry cursor once at entry as a frozenset. On every tick, drop captured lines whose content is already in that set before running the regex. Result is an honest content-delta contract: "row exists at start_line onward AND was not at that content at entry time". Stale paint-style writes (TUI repaints, paste-text, manual cursor positioning) get filtered; new output appended below the entry cursor still matches; in-place updates of rows that pre-existed at entry are also filtered (false-negative edge case the docstring now calls out). Trade-off: one extra capture-pane call at entry. The frozenset lookup is O(1) per line. Per-tick cost is unchanged in subprocess count. Adds test_wait_for_text_ignores_stale_below_cursor as the regression the reviewer's reproduction would have caught: respawn the pane with ``printf 'TOP\nSTALE_BELOW\n'; printf '\033[H'; sleep 60`` (the ESC[H sequence moves the cursor back to (0,0) so STALE_BELOW sits on row 1 below the cursor), then assert wait_for_text(STALE_BELOW) returns found=False. No CHANGES update: this tightens behavior introduced earlier in the same PR (the baseline-anchor design). A 0.1.0a6 user never saw the cursor anchor at all, so the filter is not a user-visible fix — it's the docstring contract getting honest in the same release. --- src/libtmux_mcp/tools/pane_tools/wait.py | 45 ++++++++++++++++++++--- tests/test_pane_tools.py | 47 ++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 2f5b848e..e2583bf1 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -174,13 +174,25 @@ async def wait_for_text( Use this instead of polling :func:`capture_pane` manually — it saves agent tokens and turns. - **What "new" means.** At entry the tool snapshots the pane's absolute - grid position (``history_size + cursor_y``) and only matches lines - written below that baseline. Stale scrollback that was already - present when the call began is ignored. For the synchronous "is - the pattern in the pane right now?" check, call + **What "new" means.** At entry the tool snapshots two things: the + pane's absolute grid position (``history_size + cursor_y``) and the + contents of every row below the entry cursor. Each tick captures + the rows below the original baseline and discards any row whose + content matches the entry snapshot — those rows are stale paint + that pre-dates the wait, not output written after it. Scrollback + that was already present when the call began is ignored, and so + is paint-style content left below the cursor by TUI repaints, + ``paste-text``, or manual cursor positioning. For the synchronous + "is the pattern in the pane right now?" check, call {tooliconl}`search-panes` instead. + The content-delta filter has a rare false-negative case: if new + output happens to byte-match a row in the entry snapshot, that + new row is filtered out. The patterns agents typically wait on + (command-specific markers, full status strings) make this + collision unlikely in practice. For stricter "any change" + semantics, use {tooliconl}`wait-for-content-change`. + In-place updates to the entry cursor's row — carriage-return rewrites, progress spinners, single-line status updates — are not observed; only rows below the entry cursor count as "new." @@ -356,6 +368,22 @@ async def wait_for_text( baseline_pid = entry.pane_pid baseline_hlimit = await asyncio.to_thread(_read_history_limit, pane) + # Snapshot rows below the entry cursor by content. The cursor anchor + # alone matches any row at start_line onward, which includes stale + # paint-style content (TUI repaints, paste-text, manual cursor + # positioning) that pre-dates the wait. Filtering per-tick captures + # against this set turns the cursor anchor into an honest "content + # written after entry" predicate. Stored as a frozenset for O(1) + # lookup against the typically small below-cursor row set. + entry_below_cursor: frozenset[str] = frozenset( + await asyncio.to_thread( + pane.capture_pane, + start=entry.cursor_y + 1, + end=None, + join_wrapped=True, + ) + ) + matched_lines: list[str] = [] found = False warned_risk_band = False @@ -462,7 +490,12 @@ async def wait_for_text( end=None, join_wrapped=True, ) - hits = [line for line in lines if compiled.search(line)] + # Filter out lines whose content was already below the + # entry cursor — those are stale paint, not output written + # after the call began. Then run the regex against the + # truly-new lines. + new_lines = [line for line in lines if line not in entry_below_cursor] + hits = [line for line in new_lines if compiled.search(line)] if hits: matched_lines.extend(hits) found = True diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index d150c26f..42fbf8a9 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1301,6 +1301,53 @@ async def run() -> WaitForTextResult: assert any("WAIT_MARKER_after" in line for line in result.matched_lines) +def test_wait_for_text_ignores_stale_below_cursor( + mcp_server: Server, mcp_pane: Pane +) -> None: + """Stale paint-style content below the cursor must not match. + + The cursor-position anchor (``start_line = cy0 + 1``) captures + rows below the entry cursor — which can include content that + pre-dates the wait (TUI repaints, ``paste-text``, manual cursor + positioning). The entry-time content snapshot filters those rows + out so only content written after entry matches the regex. + + Setup parks the cursor at row 0 with ``STALE_BELOW`` painted on + row 1, then waits for a pattern that's already on screen. The + snapshot filter must drop the row before the regex sees it. + """ + import asyncio + + # Print STALE_BELOW, then move the cursor back to the top-left so + # row 1 holds stale content that wait_for_text would otherwise + # match on the first poll. The trailing sleep keeps the pane state + # frozen for the wait's duration. Double-quote the sh -c argument + # so the inner single-quoted printf format strings don't break the + # outer quoting. + paint_and_park = ( + "printf 'TOP\\nSTALE_BELOW\\n'; " # write 2 rows; cursor lands on row 2 + "printf '\\033[H'; " # ESC[H = move cursor to (0,0) + "sleep 60" + ) + mcp_pane.respawn(kill=True, shell=f'sh -c "{paint_and_park}"') + + def _staged() -> bool: + return any("STALE_BELOW" in line for line in mcp_pane.capture_pane()) + + retry_until(_staged, 5, raises=True) + + result = asyncio.run( + wait_for_text( + pattern="STALE_BELOW", + pane_id=mcp_pane.pane_id, + timeout=0.5, + socket_name=mcp_server.socket_name, + ) + ) + assert result.found is False + assert result.timed_out is True + + def test_wait_for_text_does_not_match_bottom_row_clip( mcp_server: Server, mcp_pane: Pane ) -> None: From 2b6124f207b5b74eced548e171a3cb3cfe69775f Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 09:29:00 -0500 Subject: [PATCH 25/31] Pane(docs[wait_for_text]): finish the wait-for-channel re-framing in remaining surfaces Phase 12 + Phase 14a swept six doc surfaces; a follow-up review caught three more: * docs/tools/index.md "Running a command?" entry led with send-keys -> wait-for-text + capture-pane. Now leads with the channel-composed pattern for authored commands, with wait-for-text and wait-for-content-change demoted to "output you don't author." * docs/recipes.md pytest workflow used wait-for-text(pattern="passed|failed|error"). Pytest is the agent's authored command -- the deterministic composition is "pytest; tmux wait-for -S pytest_done" + wait-for-channel. Updated the Act paragraph to match. * src/libtmux_mcp/prompts/recipes.py interrupt_gracefully kept wait_for_text on purpose -- C-c is a signal, not a command, so there is no shell statement to compose tmux wait-for -S into. Added a clarifying note in the rendered template so future readers don't apply the channel re-framing reflexively. No CHANGES update: the wait-family re-framing already has its entry under ### Documentation. This just completes the sweep. --- docs/recipes.md | 8 +++++--- docs/tools/index.md | 3 ++- src/libtmux_mcp/prompts/recipes.py | 4 ++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/recipes.md b/docs/recipes.md index 2869cb59..ea926a75 100644 --- a/docs/recipes.md +++ b/docs/recipes.md @@ -394,9 +394,11 @@ long-lived process, I would not hijack it -- I would use a different pane. ### Act The agent calls {tooliconl}`clear-pane`, then {tooliconl}`send-keys` with -`keys: "pytest"`, then {tooliconl}`wait-for-text` with -`pattern: "passed|failed|error"` and `regex: true`, then -{tooliconl}`capture-pane` to read the fresh output. +`keys: "pytest; tmux wait-for -S pytest_done"`, then +{tooliconl}`wait-for-channel` with `channel: "pytest_done"`, then +{tooliconl}`capture-pane` to read the fresh output. Composing the +`tmux wait-for -S` signal directly into the shell command is the +deterministic path for authored commands. ### The non-obvious part diff --git a/docs/tools/index.md b/docs/tools/index.md index 1bd13ba2..a9e153ec 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -20,7 +20,8 @@ All tools accept an optional `socket_name` parameter for multi-server support. I - Already know the `pane_id` → use it directly **Running a command?** -- {tool}`send-keys` — then {tool}`wait-for-text` + {tool}`capture-pane` +- {tool}`send-keys` (with `tmux wait-for -S ` composed into the keys) → {tool}`wait-for-channel` → {tool}`capture-pane` — the deterministic path for commands the agent authors +- For output the agent does not author (third-party logs, daemon prompts), use {tool}`wait-for-text` or {tool}`wait-for-content-change` between `send-keys` and `capture-pane` - Pasting multi-line text? → {tool}`paste-text` **Creating workspace structure?** diff --git a/src/libtmux_mcp/prompts/recipes.py b/src/libtmux_mcp/prompts/recipes.py index 77881782..810f3ef3 100644 --- a/src/libtmux_mcp/prompts/recipes.py +++ b/src/libtmux_mcp/prompts/recipes.py @@ -163,6 +163,10 @@ def interrupt_gracefully(pane_id: str) -> str: 2. `wait_for_text(pane_id="{pane_id}", pattern="\\$ |\\# |\\% ", regex=True, timeout=5.0)` — waits for a common shell prompt glyph. Adjust the pattern to match the user's shell theme. + The `wait_for_channel` pattern doesn't apply here — `C-c` is a + signal, not a shell command, so there's no statement to compose + `tmux wait-for -S` into. The shell prompt itself is the only + signal that the interrupt landed. 3. If the wait times out the process is ignoring SIGINT. Stop and ask the caller how to proceed — do NOT escalate automatically to `C-\\` (SIGQUIT) or `kill`. From 85c0c974bd49dca0090328e1b2f4d984525ee661 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 09:30:23 -0500 Subject: [PATCH 26/31] Pane(docs[wait_for_text]): correct timeout-budget comment to describe what early-anchor measures The pre-baseline start_time anchor was annotated as preventing a stalled tmux server from blowing the user-supplied timeout. That overstated what the early anchor does. libtmux's tmux_cmd uses Popen.communicate() with no subprocess timeout, so a hung tmux call blocks indefinitely regardless of where start_time was anchored. The early anchor's actual value: WaitForTextResult.elapsed_seconds reflects total call duration including the baseline read, so a hung tmux call's blowout is visible in the returned result. Measurement, not prevention. Comment-only change. No CHANGES, no behavior delta. --- src/libtmux_mcp/tools/pane_tools/wait.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index e2583bf1..08885416 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -349,10 +349,14 @@ async def wait_for_text( assert pane.pane_id is not None - # Anchor ``start_time`` before the baseline read so a stalled - # tmux server cannot blow the user-supplied ``timeout`` budget - # — libtmux's ``tmux_cmd`` uses ``Popen.communicate()`` with no - # subprocess timeout, so the read can block arbitrarily long. + # Anchor ``start_time`` before the baseline read so the elapsed + # time returned in ``WaitForTextResult.elapsed_seconds`` reflects + # total call duration, including the baseline read. The + # user-supplied ``timeout`` still cannot bound a stalled tmux + # command — libtmux's ``tmux_cmd`` uses ``Popen.communicate()`` + # with no subprocess timeout, so a hung tmux read can exceed the + # budget. The early anchor measures that blowout; it doesn't + # prevent it. start_time = time.monotonic() deadline = start_time + timeout From aad282118dec70f420d0e15bce5e7c329ecd448b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 10:07:56 -0500 Subject: [PATCH 27/31] Pane(docs[wait_for_content_change]): note weaker baseline-loss detection than wait_for_text This branch hardens wait_for_text against pane respawn, pane death, and clear-history mid-wait, and re-frames the wait family as a fallback ladder that ends at wait_for_content_change. But the sibling tool itself never gained those guards -- so an agent that follows the recommended fallback order lands on the tool with the weakest baseline-loss detection. Respawn, death, and clear-history surface there as silent changed=True returns instead of ToolError. Add a single docstring paragraph naming the gap and pointing back at wait_for_channel for correctness-sensitive flows. Full parity with wait_for_text is filed as #53 -- this commit closes the disclosure gap so agents see the asymmetry from the tool description directly, not only from following the issue trail. Verified via the local MCP probe: wait_for_content_change now exposes the wait_for_text / ToolError / wait_for_channel tokens in its description across all four configured clients. --- src/libtmux_mcp/tools/pane_tools/wait.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 08885416..27be9866 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -556,6 +556,11 @@ async def wait_for_content_change( what the output will be — it waits for "something happened" rather than a specific pattern. + Unlike ``wait_for_text``, this tool does not raise ``ToolError`` on + pane respawn, pane death, or ``clear-history`` mid-wait — those events + surface as ``changed=True`` returns instead. For correctness-sensitive + flows prefer ``wait_for_channel`` composed with ``tmux wait-for -S``. + Emits :meth:`fastmcp.Context.report_progress` each tick when a Context is injected, so clients can render a progress indicator during the wait. From 94ec86560a13b6247769d28902e19f970c6ac25b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 10:08:57 -0500 Subject: [PATCH 28/31] Pane(docs[wait_for_channel]): drop dangerous exit-$status pattern from tool docstrings The recipe fix in #47 replaced run_and_wait's killing payload (`cmd; status=$?; tmux wait-for -S name; exit $status`) with the safe `cmd; tmux wait-for -S name` form, because `exit $status` in an interactive shell exits the shell itself and destroys single-pane sessions. The sweep missed the two surfaces where it matters most: * wait_for_tools.py module docstring (Safety section) -- told every agent reading it to use the killing payload. * wait_for_channel function docstring (composition example) -- the same payload appears as the canonical composition example, with a body paragraph explicitly recommending `status=$?; ...; exit $status` as "important." Both become MCP tool descriptions at the protocol level, so agents learn the destructive pattern directly from the channel tool that this PR positions as the deterministic primitive. That is the opposite of the framing the rest of the branch establishes. Replace both example payloads with `pytest; tmux wait-for -S tests_done` (the form recipes.py renders since the recipe fix), and add a "Do NOT chain exit $status" warning explaining the failure mode so an agent that has read older docs can correct course. Exit-status preservation in interactive shells is named as out-of-scope to match the recipe's stance. Verified via the local MCP probe against the swapped `tmux` server on claude / codex / cursor / gemini: wait_for_channel now teaches the safe pattern and contains no unguarded `exit $status` token. --- src/libtmux_mcp/tools/wait_for_tools.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/libtmux_mcp/tools/wait_for_tools.py b/src/libtmux_mcp/tools/wait_for_tools.py index f1a7841c..cd7d0511 100644 --- a/src/libtmux_mcp/tools/wait_for_tools.py +++ b/src/libtmux_mcp/tools/wait_for_tools.py @@ -17,9 +17,15 @@ timeout and wraps the underlying ``subprocess.run`` call in ``timeout=timeout``. Agents SHOULD use the safe composition pattern:: - send_keys("pytest; status=$?; tmux wait-for -S tests_done; exit $status") - -This ensures the signal fires on both success and failure paths. + send_keys("pytest; tmux wait-for -S tests_done") + +Shell ``;`` semantics fire ``wait-for -S`` whether ``pytest`` succeeded +or failed, so the edge-triggered signal never deadlocks the wait. Do +NOT chain ``exit $status`` after the signal — in interactive shells +that exits the shell itself, which destroys single-pane sessions and +takes the tmux server down with them. Exit-status preservation in +interactive shells is out-of-scope; inspect the captured output for +command-specific success markers. """ from __future__ import annotations @@ -109,15 +115,18 @@ async def wait_for_channel( milestones into explicit synchronisation points:: send_keys( - "pytest; status=$?; tmux wait-for -S tests_done; exit $status", + "pytest; tmux wait-for -S tests_done", pane_id=..., ) wait_for_channel("tests_done", timeout=60) - The ``status=$?; ...; exit $status`` idiom is important: ``wait-for`` - is edge-triggered, so if the shell command crashes before issuing - the signal the wait will block until ``timeout``. Emitting the - signal unconditionally (success or failure) avoids that penalty. + Shell ``;`` semantics fire ``wait-for -S`` whether the command + succeeded or failed, so the edge-triggered signal never deadlocks + on a crash. Do NOT chain ``exit $status`` after the signal — in an + interactive shell that exits the shell itself, which destroys + single-pane sessions. Exit-status preservation in interactive + shells is out-of-scope; inspect the captured output for + command-specific success markers. Parameters ---------- From 0ded55861d66a7b32cf302da62dbadffca3bf093 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 10:09:15 -0500 Subject: [PATCH 29/31] Pane(docs[run_and_wait]): drop stale "status-preserving" claims across surfaces The recipe fix in #47 removed exit-status preservation from the run_and_wait recipe itself but the supporting prose that describes what run_and_wait produces was not swept. Five surfaces still advertised the recipe as status-preserving: * docs/prompts.md sample render -- showed the OLD payload `pytest; __mcp_status=$?; tmux wait-for -S name; exit $__mcp_status` in the run_and_wait sample-render block, including the body paragraph that justified the exit-status idiom. Readers of the prompts catalogue page learned the killing pattern even though the rendered prompt itself no longer emits it. * src/libtmux_mcp/tools/pane_tools/io.py send_keys docstring -- referred to run_and_wait as "the canonical status-preserving pattern." The send_keys description is the entry point most agents read for picking a sync primitive; pointing them at a status- preserving recipe that no longer preserves status is a contract lie. * src/libtmux_mcp/tools/pane_tools/wait.py wait_for_text docstring -- carried "safe, status-preserving composition" in the channel cross-link. * CHANGES unreleased entry -- the Documentation bullet for the channel-first re-framing closed with "The run_and_wait recipe shows the canonical status-preserving pattern." * docs/demo.md -- the worked examples and "fundamental pattern" prose still chained `send_keys -> wait_for_text -> capture_pane`, rather than the channel-first pattern this PR establishes. Replace "status-preserving" with "safe-completion" on the io.py / CHANGES surfaces; rewrite the prompts.md sample render to match the actual run_and_wait output (now `cmd; tmux wait-for -S channel`) with the recipe's explicit "does not preserve exit status" disclaimer; switch demo.md's worked examples and "fundamental pattern" sentence to the wait_for_channel composition. Verified via the local MCP probe across claude / codex / cursor / gemini: send_keys description now contains "safe-completion" and no surface teaches the OLD payload string. --- CHANGES | 2 +- docs/demo.md | 6 +++--- docs/prompts.md | 19 +++++++++++++------ src/libtmux_mcp/tools/pane_tools/io.py | 2 +- src/libtmux_mcp/tools/pane_tools/wait.py | 6 +++--- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/CHANGES b/CHANGES index 3f3e8797..cc42eb8b 100644 --- a/CHANGES +++ b/CHANGES @@ -50,7 +50,7 @@ Empty `pattern`, `interval` below 10 ms, and non-positive `timeout` each raise ` **Wait family is re-framed around {tooliconl}`wait-for-channel` as the deterministic primitive** -The {tooliconl}`send-keys` docstring, server system instructions, {tooliconl}`wait-for-text` docstring, and the user-facing quickstart, gotchas, prompting, troubleshooting, recipes, and send-keys topics now point agents at {tooliconl}`wait-for-channel` with composed `tmux wait-for -S` for command completion. {tooliconl}`wait-for-text` and {tooliconl}`wait-for-content-change` are reframed as the fallbacks for output the agent does not author. The `run_and_wait` recipe shows the canonical status-preserving pattern. (#45) +The {tooliconl}`send-keys` docstring, server system instructions, {tooliconl}`wait-for-text` docstring, and the user-facing quickstart, gotchas, prompting, troubleshooting, recipes, and send-keys topics now point agents at {tooliconl}`wait-for-channel` with composed `tmux wait-for -S` for command completion. {tooliconl}`wait-for-text` and {tooliconl}`wait-for-content-change` are reframed as the fallbacks for output the agent does not author. The `run_and_wait` recipe shows the canonical safe-completion pattern. (#45) ## libtmux-mcp 0.1.0a6 (2026-05-09) diff --git a/docs/demo.md b/docs/demo.md index 21475479..a107f0fb 100644 --- a/docs/demo.md +++ b/docs/demo.md @@ -66,11 +66,11 @@ These are the actual tool headings as they render on tool pages: ### In prose -Use {tooliconl}`search-panes` to find text across all panes. If you know which pane, use {tooliconl}`capture-pane` instead. After running a command with {tooliconl}`send-keys`, always {tooliconl}`wait-for-text` before capturing. +Use {tooliconl}`search-panes` to find text across all panes. If you know which pane, use {tooliconl}`capture-pane` instead. After running a command with {tooliconl}`send-keys`, compose `tmux wait-for -S` and call {tooliconl}`wait-for-channel` before capturing. ### Dense inline (toolref, no badges) -The fundamental pattern: {toolref}`send-keys` → {toolref}`wait-for-text` → {toolref}`capture-pane`. For discovery: {toolref}`list-sessions` → {toolref}`list-panes` → {toolref}`get-pane-info`. +The fundamental pattern: {toolref}`send-keys` → {toolref}`wait-for-channel` → {toolref}`capture-pane`. For discovery: {toolref}`list-sessions` → {toolref}`list-panes` → {toolref}`get-pane-info`. ## Environment variable references @@ -87,7 +87,7 @@ Use {tooliconl}`search-panes` before {tooliconl}`capture-pane` when you don't kn ``` ```{warning} -Do not call {toolref}`capture-pane` immediately after {toolref}`send-keys` — there is a race condition. Use {toolref}`wait-for-text` between them. +Do not call {toolref}`capture-pane` immediately after {toolref}`send-keys` — there is a race condition. Compose `tmux wait-for -S` into the command and use {toolref}`wait-for-channel` between them. ``` ```{note} diff --git a/docs/prompts.md b/docs/prompts.md index ed4febcf..00acb44f 100644 --- a/docs/prompts.md +++ b/docs/prompts.md @@ -69,12 +69,12 @@ channel is signalled — strictly cheaper in agent turns than a ````markdown Run this shell command in tmux pane %1 and block -until it finishes, preserving the command's exit status: +until it finishes: ```python send_keys( pane_id='%1', - keys='pytest; __mcp_status=$?; tmux wait-for -S libtmux_mcp_wait_; exit $__mcp_status', + keys='pytest; tmux wait-for -S libtmux_mcp_wait_', ) wait_for_channel(channel='libtmux_mcp_wait_', timeout=60.0) capture_pane(pane_id='%1', max_lines=100) @@ -83,12 +83,19 @@ capture_pane(pane_id='%1', max_lines=100) After the channel signals, read the last ~100 lines to verify the command's behaviour. Do NOT use a `capture_pane` retry loop — `wait_for_channel` is strictly cheaper in agent turns. + +The payload does not preserve the command's exit status: doing so +in an interactive shell would require exiting the shell (which kills +the pane) or routing through an out-of-band file or tmux variable. +If you need the status, inspect the captured output for +command-specific success markers. ```` -The ``__mcp_status=$?`` capture and ``exit $__mcp_status`` mean the -agent observes the command's real exit code via shell-conventional -``$?`` — even though the wait-for signal fires regardless of -success or failure. +Shell ``;`` semantics fire the ``wait-for -S`` whether ``pytest`` +succeeded or failed, so the edge-triggered signal never deadlocks the +agent on a crashed command. Status preservation is intentionally +omitted: chaining ``exit $status`` after the signal would exit the +interactive shell itself, destroying single-pane sessions. --- diff --git a/src/libtmux_mcp/tools/pane_tools/io.py b/src/libtmux_mcp/tools/pane_tools/io.py index 22d4c473..de209b2d 100644 --- a/src/libtmux_mcp/tools/pane_tools/io.py +++ b/src/libtmux_mcp/tools/pane_tools/io.py @@ -37,7 +37,7 @@ def send_keys( - **Deterministic (preferred):** compose ``tmux wait-for -S `` into the shell command and call ``wait_for_channel``. See the - ``run_and_wait`` prompt for the canonical status-preserving pattern. + ``run_and_wait`` prompt for the canonical safe-completion pattern. Cheaper in agent turns and immune to baseline races. - **Pattern-match:** call ``wait_for_text`` when the output you await is yours to author and won't appear before the wait locks its diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index 27be9866..fe1791a9 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -215,9 +215,9 @@ async def wait_for_text( For commands you author, prefer the channel pattern: append ``; tmux wait-for -S `` to your ``send_keys`` payload and call ``wait_for_channel`` instead. The ``run_and_wait`` prompt at - ``libtmux_mcp.prompts.recipes`` shows the safe, status-preserving - composition. Reserve ``wait_for_text`` for output you do not - control (third-party process logs, daemon prompts, interactive + ``libtmux_mcp.prompts.recipes`` shows the safe composition. + Reserve ``wait_for_text`` for output you do not control + (third-party process logs, daemon prompts, interactive supervisors). When a :class:`fastmcp.Context` is available, this tool emits From f7d6edfbf14722514a12b2aea8c5d1397a768503 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 12:36:36 -0500 Subject: [PATCH 30/31] Pane(refactor[wait]): drop redundant timed_out field from result models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WaitForTextResult.timed_out was mechanically `not found`; ContentChangeResult.timed_out was mechanically `not changed`. There is no third state — every wait outcome is either match-found or timed-out, so the second boolean carries no information beyond the primary result field. The redundancy was visible in the test suite as `assert result.timed_out is (not expected_found)`, asserting the identity directly. Drop the field from both Pydantic models. Drop the assignments in the two return sites in wait.py. Drop the redundant assertions in test_pane_tools.py — each was paired with an equivalent assertion on `found` or `changed`. Replace the now-unused-result lints in the two timeout-warning tests with affirmative `assert result.found is False` / `assert result.changed is False` so the timeout outcome stays covered alongside the warning-emission contract. Update the timed_out references in test docstrings and in the JSON examples on the wait-for-text / wait-for-content-change docs pages. CHANGES gains a Breaking changes bullet. Pre-alpha, no compat shim. Agents that read `result.timed_out` should switch to `not result.found` / `not result.changed`. Verified via the local MCP probe across the swapped clients and the full quality-gate chain: 462 tests pass, ruff and mypy clean, build-docs succeeds, tool descriptions still teach the safe pattern. --- CHANGES | 16 +++++++++++ docs/tools/pane/wait-for-content-change.md | 3 +- docs/tools/pane/wait-for-text.md | 3 +- src/libtmux_mcp/models.py | 2 -- src/libtmux_mcp/tools/pane_tools/wait.py | 2 -- tests/test_pane_tools.py | 33 +++++++--------------- 6 files changed, 28 insertions(+), 31 deletions(-) diff --git a/CHANGES b/CHANGES index cc42eb8b..e3ade09b 100644 --- a/CHANGES +++ b/CHANGES @@ -28,6 +28,22 @@ wait_for_text(pattern="OK", content_start=-100) wait_for_text(pattern="OK") ``` +**{class}`~libtmux_mcp.models.WaitForTextResult` and {class}`~libtmux_mcp.models.ContentChangeResult` drop `timed_out`** + +The `timed_out` field was mechanically the boolean negation of the primary result field (`not found` / `not changed`). Callers that read `result.timed_out` should switch to `not result.found` / `not result.changed`. (#47) + +```python +# Before +result = wait_for_text(pattern="OK") +if result.timed_out: + ... + +# After +result = wait_for_text(pattern="OK") +if not result.found: + ... +``` + ### Dependencies **Minimum `libtmux>=0.56.0`** (was `>=0.55.1`). Picks up libtmux 0.56's typed wrappers for the tmux commands the server invokes — the MCP now uses libtmux's public API instead of raw command-line escapes for pane lifecycle, scrollback, and session navigation. (#46) diff --git a/docs/tools/pane/wait-for-content-change.md b/docs/tools/pane/wait-for-content-change.md index e5d5887c..a6ed108f 100644 --- a/docs/tools/pane/wait-for-content-change.md +++ b/docs/tools/pane/wait-for-content-change.md @@ -31,8 +31,7 @@ Response: { "changed": true, "pane_id": "%0", - "elapsed_seconds": 1.234, - "timed_out": false + "elapsed_seconds": 1.234 } ``` diff --git a/docs/tools/pane/wait-for-text.md b/docs/tools/pane/wait-for-text.md index 813f998c..f7cd9b68 100644 --- a/docs/tools/pane/wait-for-text.md +++ b/docs/tools/pane/wait-for-text.md @@ -35,8 +35,7 @@ Response: "Server listening on port 8000" ], "pane_id": "%2", - "elapsed_seconds": 0.002, - "timed_out": false + "elapsed_seconds": 0.002 } ``` diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 85e083fd..866bd170 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -237,7 +237,6 @@ class WaitForTextResult(BaseModel): ) pane_id: str = Field(description="Pane ID that was polled") elapsed_seconds: float = Field(description="Time spent waiting in seconds") - timed_out: bool = Field(description="Whether the timeout was reached") class PaneSnapshot(BaseModel): @@ -445,4 +444,3 @@ class ContentChangeResult(BaseModel): changed: bool = Field(description="Whether the content changed before timeout") pane_id: str = Field(description="Pane ID that was polled") elapsed_seconds: float = Field(description="Time spent waiting in seconds") - timed_out: bool = Field(description="Whether the timeout was reached") diff --git a/src/libtmux_mcp/tools/pane_tools/wait.py b/src/libtmux_mcp/tools/pane_tools/wait.py index fe1791a9..924634b0 100644 --- a/src/libtmux_mcp/tools/pane_tools/wait.py +++ b/src/libtmux_mcp/tools/pane_tools/wait.py @@ -534,7 +534,6 @@ async def wait_for_text( matched_lines=matched_lines, pane_id=pane.pane_id, elapsed_seconds=round(elapsed, 3), - timed_out=not found, ) @@ -658,5 +657,4 @@ async def wait_for_content_change( changed=changed, pane_id=pane.pane_id, elapsed_seconds=round(elapsed, 3), - timed_out=not changed, ) diff --git a/tests/test_pane_tools.py b/tests/test_pane_tools.py index 42fbf8a9..e7dba3fd 100644 --- a/tests/test_pane_tools.py +++ b/tests/test_pane_tools.py @@ -1250,7 +1250,6 @@ def _stale_settled() -> bool: ) assert isinstance(result, WaitForTextResult) assert result.found is expected_found - assert result.timed_out is (not expected_found) assert result.pane_id == mcp_pane.pane_id assert result.elapsed_seconds >= 0 @@ -1297,7 +1296,6 @@ async def run() -> WaitForTextResult: result = asyncio.run(run()) assert result.found is True - assert result.timed_out is False assert any("WAIT_MARKER_after" in line for line in result.matched_lines) @@ -1345,7 +1343,6 @@ def _staged() -> bool: ) ) assert result.found is False - assert result.timed_out is True def test_wait_for_text_does_not_match_bottom_row_clip( @@ -1401,7 +1398,6 @@ def _bottom_row_ready() -> bool: ) ) assert result.found is False - assert result.timed_out is True def test_wait_for_text_invalid_regex(mcp_server: Server, mcp_pane: Pane) -> None: @@ -1624,7 +1620,6 @@ async def run() -> WaitForTextResult: result = asyncio.run(run()) assert result.found is True - assert result.timed_out is False def test_wait_for_text_survives_resize_grow_with_scrolled_history( @@ -1686,7 +1681,6 @@ async def run() -> WaitForTextResult: # The wait must complete cleanly via timeout — NOT a ToolError. result = asyncio.run(run()) assert result.found is False - assert result.timed_out is True def test_wait_for_text_handles_resize_during_wait( @@ -1734,7 +1728,6 @@ async def run() -> WaitForTextResult: result = asyncio.run(run()) assert result.found is False - assert result.timed_out is True def test_wait_for_text_matches_pattern_across_wrap( @@ -1783,7 +1776,6 @@ async def run() -> WaitForTextResult: result = asyncio.run(run()) assert result.found is True - assert result.timed_out is False assert any(marker in line for line in result.matched_lines) @@ -1824,7 +1816,6 @@ async def warning(self, message: str) -> None: ) ) assert result.found is False - assert result.timed_out is True assert len(progress_calls) >= 2 first_progress, first_total, first_msg = progress_calls[0] assert first_progress >= 0.0 @@ -1917,7 +1908,6 @@ async def warning(self, message: str) -> None: ) ) assert result.found is False - assert result.timed_out is True def test_wait_for_text_warns_on_invalid_regex( @@ -1972,9 +1962,9 @@ def test_wait_for_text_warns_on_timeout(mcp_server: Server, mcp_pane: Pane) -> N Sibling guard to the invalid-regex warning. The timeout case is where operators most need a structured signal — the tool returns - ``timed_out=True`` in the result but agents and human log readers - have to dig into the ``WaitForTextResult`` to notice. The warning - surfaces it directly. + ``found=False`` but agents and human log readers have to dig into + the ``WaitForTextResult`` to notice. The warning surfaces it + directly. """ import asyncio @@ -2003,7 +1993,7 @@ async def warning(self, message: str) -> None: ) ) - assert result.timed_out is True + assert result.found is False assert any( level == "warning" and "timeout" in msg.lower() for level, msg in log_calls ), f"expected a timeout warning, got: {log_calls}" @@ -2022,11 +2012,10 @@ def test_wait_for_text_warns_in_history_limit_risk_band( so MCP clients can decide whether to keep waiting, retry, or switch to ``wait_for_channel``. - The wait's ``found`` / ``timed_out`` result is intentionally not - asserted — once polling enters the risk band, correctness is - best-effort. The test pins the warning contract (what the tool - guarantees), not the match contract (what tmux's grid model - fundamentally can't). + The wait's ``found`` result is intentionally not asserted — once + polling enters the risk band, correctness is best-effort. The test + pins the warning contract (what the tool guarantees), not the + match contract (what tmux's grid model fundamentally can't). """ import asyncio @@ -2212,7 +2201,7 @@ async def warning(self, message: str) -> None: ctx=t.cast("t.Any", _RecordingContext()), ) ) - assert result.timed_out is True + assert result.changed is False assert any( level == "warning" and "timeout" in msg.lower() for level, msg in log_calls ), f"expected a timeout warning, got: {log_calls}" @@ -2542,7 +2531,6 @@ def _send_later() -> None: thread.join() assert isinstance(result, ContentChangeResult) assert result.changed is True - assert result.timed_out is False assert result.elapsed_seconds > 0 @@ -2555,7 +2543,7 @@ def test_wait_for_content_change_timeout(mcp_server: Server, mcp_pane: Pane) -> machines the shell prompt can take well over 500 ms to fully render (cursor blink, zsh right-prompt, git status async hooks) and would otherwise be observed as pane-content change during the test window, - failing ``timed_out=True`` spuriously under ``--reruns=0``. + failing ``changed=True`` spuriously under ``--reruns=0``. """ import time @@ -2594,7 +2582,6 @@ def test_wait_for_content_change_timeout(mcp_server: Server, mcp_pane: Pane) -> ) assert isinstance(result, ContentChangeResult) assert result.changed is False - assert result.timed_out is True # --------------------------------------------------------------------------- From f79625cddd12a19262598ea95fedc9e24f8b06f6 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 16 May 2026 12:42:54 -0500 Subject: [PATCH 31/31] docs(CHANGES[wait]): tighten timed_out subheading to name the deliverable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: AGENTS.md "Changelog Conventions" requires each deliverable to have a bold subheading naming it in user vocabulary, with cross-references in the prose body. The previous heading led with two `{class}` rST roles that crowded the user-visible change ("drop `timed_out`") and pushed it to the end of a long line. what: - Rename the bold subheading to "Wait result models drop `timed_out`" — names the deliverable, fits on one line, reads as user vocabulary. - Move the two `{class}` cross-references for WaitForTextResult and ContentChangeResult into the opening sentence of the body where they render inline. - Body wording unchanged otherwise; the migration guidance and PR reference stay intact. --- CHANGES | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index e3ade09b..3a0ee931 100644 --- a/CHANGES +++ b/CHANGES @@ -28,9 +28,9 @@ wait_for_text(pattern="OK", content_start=-100) wait_for_text(pattern="OK") ``` -**{class}`~libtmux_mcp.models.WaitForTextResult` and {class}`~libtmux_mcp.models.ContentChangeResult` drop `timed_out`** +**Wait result models drop `timed_out`** -The `timed_out` field was mechanically the boolean negation of the primary result field (`not found` / `not changed`). Callers that read `result.timed_out` should switch to `not result.found` / `not result.changed`. (#47) +{class}`~libtmux_mcp.models.WaitForTextResult` and {class}`~libtmux_mcp.models.ContentChangeResult` drop the `timed_out` field. It was mechanically the boolean negation of the primary result (`not found` / `not changed`) and carried no information beyond that. Callers should switch to `not result.found` / `not result.changed`. (#47) ```python # Before