From afa16e427acbfea880d6dada3bdbf78c0adbe6cf Mon Sep 17 00:00:00 2001 From: Florian DAVID <150798857+fdaviddpt@users.noreply.github.com> Date: Fri, 26 Jun 2026 14:45:22 +0200 Subject: [PATCH 1/2] fix(mcp): treat server timeout as infra, not a diagnostic (#346) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cclsp returns its internal LSP-orchestrator timeout ("orchestrator timeout after 3s") as a normal MCP text result with isError unset, so it flowed through dispatch as a real result and lsp-diag counted it as a phantom +1 diagnostic — reading like the edit caused a regression. Detect infra conditions in the shared _mcp_call_or_message (covers diag/hover/rename) via two signals: the structural MCP isError flag and a configurable per-server infra_patterns list (default orchestrator timeout / timed out / timeout after). Matched results are returned prefixed `op: …` so the adapter's existing op-prefix guard drops them. No count delta. Tests: TestMcpInfraSkip (8 pure + 2 end-to-end). Docs + CHANGELOG. Co-Authored-By: Max --- CHANGELOG.md | 1 + docs/mcp-integration.md | 6 ++++ supertool.py | 56 +++++++++++++++++++++++++----- tests/test_security_lsp.py | 71 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5415471..39eb870 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- **An MCP server's own timeout no longer surfaces as a phantom `+1` diagnostic** — after an edit, `lsp-diag` reported a `0 → 1` count where the single "diag" was actually `orchestrator timeout after 3s`, an infrastructure condition, not a code finding (phpstan had already covered the same file clean). The cause: cclsp swallows its *internal* LSP-orchestrator timeout and returns it as normal MCP text content with the `isError` flag unset — so it flowed through the shared dispatch as a real result and the lsp-diag adapter counted it as an advisory, reading like the edit introduced a regression. Detection now lives in the shared `_mcp_call_or_message` (so every MCP-backed op — `diag`/`hover`/`rename` — benefits, not just lsp-diag) with two signals: the structural MCP `isError` flag (spec-standard, any server) and a configurable per-server `infra_patterns` substring list (default `["orchestrator timeout", "timed out", "timeout after"]`) for servers that report timeouts as plain text. A matched result is returned prefixed `op: …` — the same shape as supertool's own MCP errors — so the adapter's existing `op:`-prefix guard drops it instead of counting it. No count delta, no false regression, and the timeout value itself stays per-server configurable via `mcp..timeout`. Closes [#346](https://github.com/Digital-Process-Tools/claude-supertool/issues/346). - **`git-diff:PATH` no longer reports a missing/untracked path as "No changes."** — a path absent or untracked in the current repo produced an empty diff that printed `No changes.`, indistinguishable from a clean tracked file. Combined with a stale CWD (e.g. a `cd` into another repo) this is a silent false-negative: the op reads as "nothing changed" when it actually looked in the wrong place. Path mode now checks `git ls-files` first — a missing path warns `not found under — wrong CWD?` and exits 1, an untracked on-disk path warns `untracked (not in git)`, and only a genuinely clean tracked file still says `No changes.`. Every mode also stamps a `Repo: ` header so a wrong-repo invocation is visible at a glance (the trigger: `git-diff:.supertool.json` silently returned "No changes" while anchored to the wrong clone). Closes [#336](https://github.com/Digital-Process-Tools/claude-supertool/issues/336). - **`grep` path-not-found error now includes the CWD** — `ERROR: path not found: (cwd: ) — wrong CWD?` instead of the bare path, so the recurring "stale cwd makes a relative path silently miss" trap is diagnosable from the message alone. Part of [#336](https://github.com/Digital-Process-Tools/claude-supertool/issues/336). - **`git-commit` no longer aborts on an already-staged deletion** — listing a `git rm`'d path (gone from disk) alongside present files made the op's `git add` step fail with `pathspec did not match any files`, killing the whole commit. The op now drops paths that are already staged as deletions from the `git add` step (their deletion is already staged and gets committed), so a mixed changeset of modifications + new files + deletions lands in one commit. Genuinely-unknown paths still error as before. Closes [#324](https://github.com/Digital-Process-Tools/claude-supertool/issues/324). diff --git a/docs/mcp-integration.md b/docs/mcp-integration.md index 72e0856..e854d18 100644 --- a/docs/mcp-integration.md +++ b/docs/mcp-integration.md @@ -131,6 +131,7 @@ Edit `.supertool.json`: - `env` — environment for the spawned MCP server - `tools` — maps supertool op names to MCP tool names exposed by the server. Omit any op you don't want to use; that op falls back to the heuristic path (where one exists) - `timeout` — request timeout in seconds (LSP cold-start can be slow; 60s is comfortable) +- `infra_patterns` — list of substrings that mark a tool result as an infrastructure condition (timeout/overload) rather than a real diagnostic. Some servers (cclsp) swallow their own internal timeout and hand it back as normal text content (e.g. `orchestrator timeout after 3s`) with the MCP `isError` flag unset — without this, `diag` would count that text as a phantom `+1` diagnostic that reads like the edit caused a regression (#346). Matched results are returned prefixed `op: …` so adapters drop them. Defaults to `["orchestrator timeout", "timed out", "timeout after"]`; the structural `isError` flag is always honored regardless - `stopOnNewFile` — `true` to SIGTERM this daemon when a mutating op (`edit`/`paste`/etc.) **creates a brand-new file** matching `match`. The warm LSP holds a reflection cache that doesn't index new classes, so it reports phantom errors on a just-created file (#239); stopping it forces the next validator run to cold-start a daemon that sees the file. Cost: that one post-create validate pays the cold-reindex (~30-60s on a large repo). Leave unset for servers that index new files fine. ### 4. Use it @@ -210,6 +211,11 @@ binary, the wiring is the same. server exposes via `tools/list`. Without this, the op falls through to the heuristic path. - `timeout` — request timeout in seconds. + - `infra_patterns` (optional) — substrings that mark a result as an infra + condition (timeout/overload) not a real diagnostic; matched results are + prefixed `op: …` so adapters drop them. Defaults to + `["orchestrator timeout", "timed out", "timeout after"]`. The structural + MCP `isError` flag is always honored regardless (#346). - `idle_timeout` (optional) — daemon shuts itself down after this many seconds idle (default 600). - `stopOnNewFile` (optional) — `true` to SIGTERM this daemon when a mutating op diff --git a/supertool.py b/supertool.py index a60c57b..5346fe4 100755 --- a/supertool.py +++ b/supertool.py @@ -107,7 +107,7 @@ import time from datetime import datetime from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Tuple VERSION = "0.19.0" @@ -9619,9 +9619,47 @@ def op_validate_multi(paths: list, tool_filter: Optional[list] = None, # make sense with a real language server). # --------------------------------------------------------------------------- +# Patterns that mark an MCP text result as an infrastructure condition (timeout, +# overload) rather than a real tool result. Some servers (cclsp) swallow their own +# timeout and return it as normal text content with the `isError` flag unset — +# these patterns catch that case. Overridable per server via +# mcp..infra_patterns in .supertool.json. See #346. +_MCP_INFRA_DEFAULT_PATTERNS = ("orchestrator timeout", "timed out", "timeout after") + + +def _mcp_result_text(result: object) -> str: + """Join the text content items of an MCP tool result into one string.""" + content = result.get("content") if isinstance(result, dict) else None + if isinstance(content, list): + texts = [item.get("text", "") for item in content + if isinstance(item, dict) and item.get("type") == "text"] + return chr(10).join(t for t in texts if t) + return "" + + +def _mcp_result_is_infra(result: object, patterns: Iterable[str]) -> bool: + """True if an MCP tool result is an infra condition, not real content. + + Two signals, in order: + 1. structural — the MCP `isError` flag (spec-standard, any server). + 2. textual — the content matches a configured infra pattern, for servers + that report a timeout/overload as normal text with isError unset. + """ + if not isinstance(result, dict): + return False + if result.get("isError"): + return True + text = _mcp_result_text(result).lower() + return bool(text) and any(p.lower() in text for p in patterns) + + def _mcp_call_or_message(op_name: str, file_path: str, args: dict) -> str: """Shared dispatch for diag/hover/rename. Returns the MCP text result or a diagnostic message if no route / no server / call failed. + + Infra conditions (timeout/overload) are returned prefixed `op_name: ...` — + same shape as our own errors — so adapters (lsp-diag) drop them via their + op_name-guard instead of counting them as findings. See #346. """ if not file_path: return f"{op_name}: missing file path\n" @@ -9638,14 +9676,16 @@ def _mcp_call_or_message(op_name: str, file_path: str, args: dict) -> str: return f"{op_name}: MCP error: {e}\n" if result is None: return f"{op_name}: no result from {mcp_tool}\n" + # Infra condition (timeout/overload) → prefix it so adapters drop it (#346). + infra_patterns = _mcp_specs.get(server_name, {}).get( + "infra_patterns", _MCP_INFRA_DEFAULT_PATTERNS) + if _mcp_result_is_infra(result, infra_patterns): + text = _mcp_result_text(result).strip() or "infra condition" + return f"{op_name}: {text}\n" # Pull text content (most common MCP response shape) - content = result.get("content") if isinstance(result, dict) else None - if isinstance(content, list): - texts = [item.get("text", "") for item in content - if isinstance(item, dict) and item.get("type") == "text"] - text = "\n".join(t for t in texts if t) - if text: - return text.rstrip("\n") + "\n" + text = _mcp_result_text(result) + if text: + return text.rstrip("\n") + "\n" return json.dumps(result, indent=2) + "\n" diff --git a/tests/test_security_lsp.py b/tests/test_security_lsp.py index e922e0a..8ecbd20 100644 --- a/tests/test_security_lsp.py +++ b/tests/test_security_lsp.py @@ -802,3 +802,74 @@ def test_config_injection_contract_documented(self) -> None: """ # This is a documentation test — always passes assert True, "See docstring for threat model." + + +# --------------------------------------------------------------------------- +# 11. Infra timeout/error must not surface as a diagnostic (#346) +# --------------------------------------------------------------------------- + +class TestMcpInfraSkip: + """#346: an MCP timeout/fatal is an infra condition, not a code diagnostic. + + Two detection signals, both in the shared dispatch so every MCP-backed op + (diag/hover/rename) benefits: + - structural: the MCP `isError` flag (spec-standard, any server). + - textual: a configurable per-server `infra_patterns` list, for servers + (cclsp) that swallow their own timeout into normal text content. + """ + + def test_iserror_flag_is_infra(self) -> None: + result = {"isError": True, "content": [{"type": "text", "text": "boom"}]} + assert supertool._mcp_result_is_infra(result, supertool._MCP_INFRA_DEFAULT_PATTERNS) + + def test_orchestrator_timeout_text_is_infra(self) -> None: + result = _make_mcp_text_response("orchestrator timeout after 3s") + assert supertool._mcp_result_is_infra(result, supertool._MCP_INFRA_DEFAULT_PATTERNS) + + def test_timed_out_text_is_infra(self) -> None: + result = _make_mcp_text_response("MCP daemon 'cclsp' read timed out after 3s") + assert supertool._mcp_result_is_infra(result, supertool._MCP_INFRA_DEFAULT_PATTERNS) + + def test_match_is_case_insensitive(self) -> None: + result = _make_mcp_text_response("Orchestrator Timeout after 10s") + assert supertool._mcp_result_is_infra(result, supertool._MCP_INFRA_DEFAULT_PATTERNS) + + def test_real_diagnostic_is_not_infra(self) -> None: + result = _make_mcp_text_response("• [error] undefined variable $foo at line 42, col 13") + assert not supertool._mcp_result_is_infra(result, supertool._MCP_INFRA_DEFAULT_PATTERNS) + + def test_empty_content_is_not_infra(self) -> None: + assert not supertool._mcp_result_is_infra({"content": []}, supertool._MCP_INFRA_DEFAULT_PATTERNS) + + def test_non_dict_is_not_infra(self) -> None: + assert not supertool._mcp_result_is_infra("nope", supertool._MCP_INFRA_DEFAULT_PATTERNS) + + def test_custom_patterns_override_default(self) -> None: + result = _make_mcp_text_response("server overloaded, try later") + assert not supertool._mcp_result_is_infra(result, supertool._MCP_INFRA_DEFAULT_PATTERNS) + assert supertool._mcp_result_is_infra(result, ["overloaded"]) + + def test_dispatch_prefixes_infra_so_adapter_guard_drops_it( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """End-to-end: a server-swallowed timeout returned from diag must come back + prefixed `diag: ...` so the lsp-diag adapter's startswith-guard drops it + rather than counting it as a +1 advisory.""" + php_file = tmp_path / "Foo.php" + php_file.write_text(" None: + php_file = tmp_path / "Foo.php" + php_file.write_text(" Date: Fri, 26 Jun 2026 14:56:43 +0200 Subject: [PATCH 2/2] fix(mcp): tighten infra patterns, add isError e2e test (review #348) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop bare "timed out"/"timeout after" from the default infra_patterns — they match against the whole diag result, so one real diagnostic containing the phrase would drop every diagnostic in that result. Default is now ("orchestrator timeout", "timed out after") — both require word-pairing. The cclsp bug is caught by "orchestrator timeout"; the daemon variant is already handled by the exception path. Also: chr(10) -> "\\n" for file convention, add an end-to-end test for the structural isError path, sync the default list in docs + CHANGELOG. Co-Authored-By: Max --- CHANGELOG.md | 2 +- docs/mcp-integration.md | 4 ++-- supertool.py | 4 ++-- tests/test_security_lsp.py | 13 +++++++++++++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39eb870..f1d1f7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- **An MCP server's own timeout no longer surfaces as a phantom `+1` diagnostic** — after an edit, `lsp-diag` reported a `0 → 1` count where the single "diag" was actually `orchestrator timeout after 3s`, an infrastructure condition, not a code finding (phpstan had already covered the same file clean). The cause: cclsp swallows its *internal* LSP-orchestrator timeout and returns it as normal MCP text content with the `isError` flag unset — so it flowed through the shared dispatch as a real result and the lsp-diag adapter counted it as an advisory, reading like the edit introduced a regression. Detection now lives in the shared `_mcp_call_or_message` (so every MCP-backed op — `diag`/`hover`/`rename` — benefits, not just lsp-diag) with two signals: the structural MCP `isError` flag (spec-standard, any server) and a configurable per-server `infra_patterns` substring list (default `["orchestrator timeout", "timed out", "timeout after"]`) for servers that report timeouts as plain text. A matched result is returned prefixed `op: …` — the same shape as supertool's own MCP errors — so the adapter's existing `op:`-prefix guard drops it instead of counting it. No count delta, no false regression, and the timeout value itself stays per-server configurable via `mcp..timeout`. Closes [#346](https://github.com/Digital-Process-Tools/claude-supertool/issues/346). +- **An MCP server's own timeout no longer surfaces as a phantom `+1` diagnostic** — after an edit, `lsp-diag` reported a `0 → 1` count where the single "diag" was actually `orchestrator timeout after 3s`, an infrastructure condition, not a code finding (phpstan had already covered the same file clean). The cause: cclsp swallows its *internal* LSP-orchestrator timeout and returns it as normal MCP text content with the `isError` flag unset — so it flowed through the shared dispatch as a real result and the lsp-diag adapter counted it as an advisory, reading like the edit introduced a regression. Detection now lives in the shared `_mcp_call_or_message` (so every MCP-backed op — `diag`/`hover`/`rename` — benefits, not just lsp-diag) with two signals: the structural MCP `isError` flag (spec-standard, any server) and a configurable per-server `infra_patterns` substring list (default `["orchestrator timeout", "timed out after"]`) for servers that report timeouts as plain text. A matched result is returned prefixed `op: …` — the same shape as supertool's own MCP errors — so the adapter's existing `op:`-prefix guard drops it instead of counting it. No count delta, no false regression, and the timeout value itself stays per-server configurable via `mcp..timeout`. Closes [#346](https://github.com/Digital-Process-Tools/claude-supertool/issues/346). - **`git-diff:PATH` no longer reports a missing/untracked path as "No changes."** — a path absent or untracked in the current repo produced an empty diff that printed `No changes.`, indistinguishable from a clean tracked file. Combined with a stale CWD (e.g. a `cd` into another repo) this is a silent false-negative: the op reads as "nothing changed" when it actually looked in the wrong place. Path mode now checks `git ls-files` first — a missing path warns `not found under — wrong CWD?` and exits 1, an untracked on-disk path warns `untracked (not in git)`, and only a genuinely clean tracked file still says `No changes.`. Every mode also stamps a `Repo: ` header so a wrong-repo invocation is visible at a glance (the trigger: `git-diff:.supertool.json` silently returned "No changes" while anchored to the wrong clone). Closes [#336](https://github.com/Digital-Process-Tools/claude-supertool/issues/336). - **`grep` path-not-found error now includes the CWD** — `ERROR: path not found: (cwd: ) — wrong CWD?` instead of the bare path, so the recurring "stale cwd makes a relative path silently miss" trap is diagnosable from the message alone. Part of [#336](https://github.com/Digital-Process-Tools/claude-supertool/issues/336). - **`git-commit` no longer aborts on an already-staged deletion** — listing a `git rm`'d path (gone from disk) alongside present files made the op's `git add` step fail with `pathspec did not match any files`, killing the whole commit. The op now drops paths that are already staged as deletions from the `git add` step (their deletion is already staged and gets committed), so a mixed changeset of modifications + new files + deletions lands in one commit. Genuinely-unknown paths still error as before. Closes [#324](https://github.com/Digital-Process-Tools/claude-supertool/issues/324). diff --git a/docs/mcp-integration.md b/docs/mcp-integration.md index e854d18..a5e245d 100644 --- a/docs/mcp-integration.md +++ b/docs/mcp-integration.md @@ -131,7 +131,7 @@ Edit `.supertool.json`: - `env` — environment for the spawned MCP server - `tools` — maps supertool op names to MCP tool names exposed by the server. Omit any op you don't want to use; that op falls back to the heuristic path (where one exists) - `timeout` — request timeout in seconds (LSP cold-start can be slow; 60s is comfortable) -- `infra_patterns` — list of substrings that mark a tool result as an infrastructure condition (timeout/overload) rather than a real diagnostic. Some servers (cclsp) swallow their own internal timeout and hand it back as normal text content (e.g. `orchestrator timeout after 3s`) with the MCP `isError` flag unset — without this, `diag` would count that text as a phantom `+1` diagnostic that reads like the edit caused a regression (#346). Matched results are returned prefixed `op: …` so adapters drop them. Defaults to `["orchestrator timeout", "timed out", "timeout after"]`; the structural `isError` flag is always honored regardless +- `infra_patterns` — list of substrings that mark a tool result as an infrastructure condition (timeout/overload) rather than a real diagnostic. Some servers (cclsp) swallow their own internal timeout and hand it back as normal text content (e.g. `orchestrator timeout after 3s`) with the MCP `isError` flag unset — without this, `diag` would count that text as a phantom `+1` diagnostic that reads like the edit caused a regression (#346). Matched results are returned prefixed `op: …` so adapters drop them. Defaults to `["orchestrator timeout", "timed out after"]`; the structural `isError` flag is always honored regardless - `stopOnNewFile` — `true` to SIGTERM this daemon when a mutating op (`edit`/`paste`/etc.) **creates a brand-new file** matching `match`. The warm LSP holds a reflection cache that doesn't index new classes, so it reports phantom errors on a just-created file (#239); stopping it forces the next validator run to cold-start a daemon that sees the file. Cost: that one post-create validate pays the cold-reindex (~30-60s on a large repo). Leave unset for servers that index new files fine. ### 4. Use it @@ -214,7 +214,7 @@ binary, the wiring is the same. - `infra_patterns` (optional) — substrings that mark a result as an infra condition (timeout/overload) not a real diagnostic; matched results are prefixed `op: …` so adapters drop them. Defaults to - `["orchestrator timeout", "timed out", "timeout after"]`. The structural + `["orchestrator timeout", "timed out after"]`. The structural MCP `isError` flag is always honored regardless (#346). - `idle_timeout` (optional) — daemon shuts itself down after this many seconds idle (default 600). diff --git a/supertool.py b/supertool.py index 5346fe4..bafca19 100755 --- a/supertool.py +++ b/supertool.py @@ -9624,7 +9624,7 @@ def op_validate_multi(paths: list, tool_filter: Optional[list] = None, # timeout and return it as normal text content with the `isError` flag unset — # these patterns catch that case. Overridable per server via # mcp..infra_patterns in .supertool.json. See #346. -_MCP_INFRA_DEFAULT_PATTERNS = ("orchestrator timeout", "timed out", "timeout after") +_MCP_INFRA_DEFAULT_PATTERNS = ("orchestrator timeout", "timed out after") def _mcp_result_text(result: object) -> str: @@ -9633,7 +9633,7 @@ def _mcp_result_text(result: object) -> str: if isinstance(content, list): texts = [item.get("text", "") for item in content if isinstance(item, dict) and item.get("type") == "text"] - return chr(10).join(t for t in texts if t) + return "\n".join(t for t in texts if t) return "" diff --git a/tests/test_security_lsp.py b/tests/test_security_lsp.py index 8ecbd20..53a2c08 100644 --- a/tests/test_security_lsp.py +++ b/tests/test_security_lsp.py @@ -873,3 +873,16 @@ def test_dispatch_passes_real_diagnostic_through_unprefixed( out = supertool.op_diag(str(php_file)) assert not out.startswith("diag:"), f"real diagnostic wrongly prefixed: {out!r}" assert "missing semicolon" in out + + def test_dispatch_prefixes_iserror_result( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """A structural isError result must also be prefixed and dropped, even + when its text matches no infra pattern.""" + php_file = tmp_path / "Foo.php" + php_file.write_text("