From 37fb732bcc31c3207af1cf148e18b5f98f69ad90 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:41:29 +0100 Subject: [PATCH 01/17] refactor: deduplicate JSON compression into utils.compress_json_value() Replace three near-identical JSON summarization implementations (network._summarize_json, file_content._summarize_json_value, cloud_cli._compress_json_value) with calls to the shared utils.compress_json_value() function. --- src/processors/cloud_cli.py | 41 ++++------------------------------ src/processors/file_content.py | 29 +++--------------------- src/processors/network.py | 30 +++---------------------- 3 files changed, 10 insertions(+), 90 deletions(-) diff --git a/src/processors/cloud_cli.py b/src/processors/cloud_cli.py index afd364d..05d87ef 100644 --- a/src/processors/cloud_cli.py +++ b/src/processors/cloud_cli.py @@ -4,6 +4,7 @@ import re from .base import Processor +from .utils import compress_json_value _CLOUD_CMD_RE = re.compile(r"\b(aws|gcloud|az)\s+") @@ -57,7 +58,9 @@ def _process_json(self, output: str, command: str) -> str: return output return self._truncate_text(lines) - compressed = self._compress_json_value(data, depth=0, max_depth=4) + compressed = compress_json_value( + data, max_depth=4, important_key_re=_IMPORTANT_KEY_RE, + ) result = json.dumps(compressed, indent=2, default=str) # Add summary if significant compression @@ -68,42 +71,6 @@ def _process_json(self, output: str, command: str) -> str: return result - def _compress_json_value(self, value, depth=0, max_depth=4): - """Recursively compress JSON, truncating at depth.""" - if depth >= max_depth: - if isinstance(value, dict): - return f"{{... {len(value)} keys}}" - if isinstance(value, list): - return f"[... {len(value)} items]" - if isinstance(value, str) and len(value) > 200: - return value[:197] + "..." - return value - - if isinstance(value, dict): - result = {} - for k, v in value.items(): - # Preserve important keys at full depth - if _IMPORTANT_KEY_RE.search(k): - result[k] = self._compress_json_value(v, depth, max_depth + 1) - else: - result[k] = self._compress_json_value(v, depth + 1, max_depth) - return result - - if isinstance(value, list): - if len(value) == 0: - return value - # Don't increment depth for list traversal - if len(value) <= 5: - return [self._compress_json_value(item, depth, max_depth) for item in value] - compressed = [self._compress_json_value(item, depth, max_depth) for item in value[:3]] - compressed.append(f"... ({len(value) - 3} more items)") - return compressed - - if isinstance(value, str) and len(value) > 200: - return value[:197] + "..." - - return value - def _is_table(self, lines: list[str]) -> bool: """Detect table output format.""" for line in lines[:5]: diff --git a/src/processors/file_content.py b/src/processors/file_content.py index a0d49ce..87eb6e0 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -16,6 +16,7 @@ from .. import config from .base import Processor +from .utils import compress_json_value # ── File type sets ─────────────────────────────────────────────────── @@ -442,34 +443,10 @@ def _compress_json(self, raw: str, total: int) -> str: except (json.JSONDecodeError, ValueError): return self._truncate_default(raw.splitlines()) - result = self._summarize_json_value(data, depth=0, max_depth=2) + compressed = compress_json_value(data, max_depth=2) + result = json.dumps(compressed, indent=2, default=str) return f"{result}\n\n({total} total lines)" - def _summarize_json_value(self, val, depth: int, max_depth: int) -> str: - indent = " " * depth - if isinstance(val, dict): - if depth >= max_depth: - return f"{{{len(val)} keys}}" - items = [] - for k, v in val.items(): - summarized = self._summarize_json_value(v, depth + 1, max_depth) - items.append(f'{indent} "{k}": {summarized}') - return "{\n" + ",\n".join(items) + f"\n{indent}}}" - elif isinstance(val, list): - if len(val) == 0: - return "[]" - if len(val) <= 3: - inner = [self._summarize_json_value(v, depth + 1, max_depth) for v in val] - return "[" + ", ".join(inner) + "]" - first_three = [self._summarize_json_value(v, depth + 1, max_depth) for v in val[:3]] - return "[" + ", ".join(first_three) + f", ... ({len(val)} items total)]" - elif isinstance(val, str): - if len(val) > 100: - return f'"{val[:80]}..." ({len(val)} chars)' - return json.dumps(val) - else: - return json.dumps(val) - def _compress_yaml(self, lines: list[str], total: int) -> str: result = [] nested_count = 0 diff --git a/src/processors/network.py b/src/processors/network.py index f4232a4..04f9323 100644 --- a/src/processors/network.py +++ b/src/processors/network.py @@ -4,6 +4,7 @@ import re from .base import Processor +from .utils import compress_json_value class NetworkProcessor(Processor): @@ -170,35 +171,10 @@ def _maybe_compress_json(self, text: str) -> str: if len(stripped) < 500: return text - summary = self._summarize_json(data, depth=0, max_depth=2) + compressed = compress_json_value(data, max_depth=2) + summary = json.dumps(compressed, indent=2, default=str) return f"{summary}\n\n({len(stripped)} chars, {len(text.splitlines())} lines)" - def _summarize_json(self, val, depth: int, max_depth: int) -> str: - """Recursively summarize a JSON value.""" - indent = " " * depth - if isinstance(val, dict): - if depth >= max_depth: - return f"{{{len(val)} keys}}" - items = [] - for k, v in val.items(): - summarized = self._summarize_json(v, depth + 1, max_depth) - items.append(f'{indent} "{k}": {summarized}') - return "{\n" + ",\n".join(items) + f"\n{indent}}}" - elif isinstance(val, list): - if len(val) == 0: - return "[]" - if len(val) <= 2: - inner = [self._summarize_json(v, depth + 1, max_depth) for v in val] - return "[" + ", ".join(inner) + "]" - first = self._summarize_json(val[0], depth + 1, max_depth) - return f"[{first}, ... ({len(val)} items total)]" - elif isinstance(val, str): - if len(val) > 80: - return f'"{val[:60]}..." ({len(val)} chars)' - return json.dumps(val) - else: - return json.dumps(val) - def _process_wget(self, output: str) -> str: lines = output.splitlines() result = [] From c623a5422e6364ae3e3ada44713af7241d3ad343 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:43:06 +0100 Subject: [PATCH 02/17] refactor: extract shared log compression into utils.compress_log_lines() Unify the head+tail+error-extraction-with-context pattern from file_content._compress_log, docker._process_logs, and kubectl._process_logs into a single compress_log_lines() utility. --- src/processors/docker.py | 39 ++++----------------- src/processors/file_content.py | 63 +++++----------------------------- src/processors/kubectl.py | 29 ++++------------ src/processors/utils.py | 56 ++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 108 deletions(-) diff --git a/src/processors/docker.py b/src/processors/docker.py index a107f4a..8a0f8d5 100644 --- a/src/processors/docker.py +++ b/src/processors/docker.py @@ -5,6 +5,7 @@ from .. import config from .base import Processor +from .utils import compress_log_lines # Optional docker global options that may appear before the subcommand. # Covers: --context , -H , --host @@ -174,38 +175,12 @@ def _process_logs(self, output: str) -> str: if len(lines) <= keep_head + keep_tail: return output - # Collect error lines and their indices - error_lines = [] - for i, line in enumerate(lines): - if re.search( - r"\b(error|Error|ERROR|exception|Exception|EXCEPTION|" - r"fatal|Fatal|FATAL|panic|Panic|PANIC|traceback|Traceback)\b", - line, - ): - # Include context: 2 lines before, the error line, 2 after - start = max(0, i - 2) - end = min(len(lines), i + 3) - error_lines.extend(lines[start:end]) - if end < len(lines): - error_lines.append("") # separator - - # Deduplicate error context (overlapping windows) - seen = set() - unique_errors = [] - for line in error_lines: - if line not in seen: - unique_errors.append(line) - seen.add(line) - - result = lines[:keep_head] - if unique_errors: - result.append(f"\n... ({len(lines)} total lines, showing errors) ...\n") - result.extend(unique_errors[:50]) # Cap error lines - else: - result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") - result.extend(lines[-keep_tail:]) - - return "\n".join(result) + return compress_log_lines( + lines, + keep_head=keep_head, + keep_tail=keep_tail, + context_lines=2, + ) def _process_pull(self, output: str) -> str: """Compress docker pull/push: strip layer progress, keep digest and status.""" diff --git a/src/processors/file_content.py b/src/processors/file_content.py index 87eb6e0..fedb52e 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -16,7 +16,7 @@ from .. import config from .base import Processor -from .utils import compress_json_value +from .utils import compress_json_value, compress_log_lines # ── File type sets ─────────────────────────────────────────────────── @@ -513,59 +513,14 @@ def _compress_xml(self, lines: list[str], total: int) -> str: # ── Log compression ───────────────────────────────────────────── def _compress_log(self, lines: list[str]) -> str: - total = len(lines) - context_lines = config.get("file_log_context_lines") - - head = lines[:5] - tail = lines[-5:] - - middle = lines[5:-5] if len(lines) > 10 else [] - error_indices = set() - info_count = 0 - debug_count = 0 - - for idx, line in enumerate(middle): - if _LOG_ERROR_RE.search(line): - for c in range(idx - context_lines, idx + context_lines + 1): - if 0 <= c < len(middle): - error_indices.add(c) - elif re.search(r"\bDEBUG\b", line, re.IGNORECASE): - debug_count += 1 - elif re.search(r"\bINFO\b", line, re.IGNORECASE): - info_count += 1 - - result = head[:] - if middle: - if error_indices: - result.append(f"\n... (scanning {len(middle)} middle lines) ...\n") - sorted_indices = sorted(error_indices) - prev = -2 - for idx in sorted_indices: - if idx > prev + 1: - gap = idx - prev - 1 - if prev >= 0: - result.append(f" ... ({gap} lines skipped)") - result.append(middle[idx]) - prev = idx - remaining = len(middle) - 1 - prev - if remaining > 0: - result.append(f" ... ({remaining} lines skipped)") - else: - result.append(f"\n... ({len(middle)} lines, no errors/warnings found) ...\n") - - omitted_parts = [] - if info_count > 0: - omitted_parts.append(f"{info_count} INFO") - if debug_count > 0: - omitted_parts.append(f"{debug_count} DEBUG") - other = len(middle) - len(error_indices) - info_count - debug_count - if other > 0: - omitted_parts.append(f"{other} other") - - result.extend(tail) - summary = ", ".join(omitted_parts) + " lines omitted" if omitted_parts else "" - result.append(f"\n({total} total lines{'; ' + summary if summary else ''})") - return "\n".join(result) + context = config.get("file_log_context_lines") + return compress_log_lines( + lines, + keep_head=5, + keep_tail=5, + error_re=_LOG_ERROR_RE, + context_lines=context, + ) # ── CSV compression ───────────────────────────────────────────── diff --git a/src/processors/kubectl.py b/src/processors/kubectl.py index ba9ccf2..3b3c1c0 100644 --- a/src/processors/kubectl.py +++ b/src/processors/kubectl.py @@ -4,6 +4,7 @@ from .. import config from .base import Processor +from .utils import compress_log_lines # Optional kubectl global options that may appear before the subcommand. # Covers: -n , --namespace , --context , --kubeconfig , @@ -238,28 +239,12 @@ def _process_logs(self, output: str) -> str: if len(lines) <= keep_head + keep_tail: return output - error_lines = [] - for i, line in enumerate(lines): - if re.search( - r"\b(error|Error|ERROR|exception|Exception|" - r"fatal|Fatal|FATAL|panic|Panic)\b", - line, - ): - start = max(0, i - 1) - end = min(len(lines), i + 2) - for el in lines[start:end]: - if el not in error_lines: - error_lines.append(el) - - result = lines[:keep_head] - if error_lines: - result.append(f"\n... ({len(lines)} total lines, showing errors) ...\n") - result.extend(error_lines[:40]) - else: - result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") - result.extend(lines[-keep_tail:]) - - return "\n".join(result) + return compress_log_lines( + lines, + keep_head=keep_head, + keep_tail=keep_tail, + context_lines=1, + ) def _process_mutate(self, output: str) -> str: """Compress kubectl apply/delete/create: keep result lines, skip verbose details.""" diff --git a/src/processors/utils.py b/src/processors/utils.py index a1d89c3..0bef482 100644 --- a/src/processors/utils.py +++ b/src/processors/utils.py @@ -3,6 +3,11 @@ import re from collections import defaultdict +_DEFAULT_ERROR_RE = re.compile( + r"\b(error|Error|ERROR|exception|Exception|EXCEPTION|" + r"fatal|Fatal|FATAL|panic|Panic|PANIC|traceback|Traceback)\b" +) + def compress_json_value(value, depth=0, max_depth=4, important_key_re=None): """Recursively compress a JSON value, truncating at depth. @@ -150,3 +155,54 @@ def group_files_by_dir(lines, max_files): result.append(f"... ({len(dirs) - max_files} more directories)") return result + + +def compress_log_lines( + lines: list[str], + keep_head: int = 10, + keep_tail: int = 20, + error_re: re.Pattern | None = None, + context_lines: int = 2, + max_error_lines: int = 50, +) -> str: + """Compress log-style output: keep head, tail, and error lines with context.""" + if len(lines) <= keep_head + keep_tail: + return "\n".join(lines) + + err_re = error_re or _DEFAULT_ERROR_RE + head = lines[:keep_head] + tail = lines[-keep_tail:] + middle = lines[keep_head:-keep_tail] if len(lines) > keep_head + keep_tail else [] + + # Find error lines with context in the middle section + error_indices: set[int] = set() + for idx, line in enumerate(middle): + if err_re.search(line): + for c in range(idx - context_lines, idx + context_lines + 1): + if 0 <= c < len(middle): + error_indices.add(c) + + result = head[:] + + if middle: + if error_indices: + result.append(f"\n... ({len(lines)} total lines, showing errors) ...\n") + sorted_indices = sorted(error_indices) + prev = -2 + for idx in sorted_indices: + if idx > prev + 1 and prev >= 0: + gap = idx - prev - 1 + result.append(f" ... ({gap} lines skipped)") + result.append(middle[idx]) + prev = idx + # Cap error output + if len(sorted_indices) > max_error_lines: + result = result[: keep_head + 1 + max_error_lines] + result.append(f" ... ({len(sorted_indices) - max_error_lines} more error lines)") + else: + result.append( + f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n" + ) + + result.extend(tail) + return "\n".join(result) From b4ef6118231ea4a4f82a29c20908a176c76764ab Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:44:58 +0100 Subject: [PATCH 03/17] feat: detect and summarize lockfile diffs in git processor Pre-scan git diff output for lockfile paths (package-lock.json, yarn.lock, etc.) and replace their entire diff section with a one-line summary, preventing hundreds of useless hunks from being sent through compress_diff. --- src/processors/git.py | 55 ++++++++++++++++++++++++++++++++++-- tests/test_processors.py | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 2 deletions(-) diff --git a/src/processors/git.py b/src/processors/git.py index 4fba591..2cd1208 100644 --- a/src/processors/git.py +++ b/src/processors/git.py @@ -175,6 +175,12 @@ def _process_status(self, output: str) -> str: return "\n".join(result) if result else output + _LOCK_FILES = { + "package-lock.json", "yarn.lock", "pnpm-lock.yaml", + "poetry.lock", "Pipfile.lock", "Cargo.lock", + "composer.lock", "Gemfile.lock", "go.sum", "bun.lockb", + } + def _process_diff(self, output: str, command: str = "") -> str: lines = output.splitlines() @@ -188,10 +194,55 @@ def _process_diff(self, output: str, command: str = "") -> str: if lines and not any(line.startswith("diff --git") for line in lines): return self._process_diff_stat(lines) + # Pre-scan: separate lockfile diffs from normal diffs + non_lock_lines: list[str] = [] + lockfile_summaries: list[str] = [] + current_file = "" + current_file_lines = 0 + in_lockfile = False + + for line in lines: + if line.startswith("diff --git"): + # Flush previous lockfile summary + if in_lockfile and current_file: + lockfile_summaries.append(f"diff --git {current_file}") + lockfile_summaries.append( + f" (lockfile changed, {current_file_lines} lines)" + ) + # Detect new file + m = re.match(r"^diff --git a/(.+?) b/", line) + filename = m.group(1).rsplit("/", 1)[-1] if m else "" + in_lockfile = filename in self._LOCK_FILES + if in_lockfile: + current_file = filename + current_file_lines = 0 + else: + non_lock_lines.append(line) + continue + + if in_lockfile: + current_file_lines += 1 + continue + + non_lock_lines.append(line) + + # Flush last lockfile + if in_lockfile and current_file: + lockfile_summaries.append(f"diff --git {current_file}") + lockfile_summaries.append( + f" (lockfile changed, {current_file_lines} lines)" + ) + + # Compress the non-lockfile lines, then append lockfile summaries max_hunk = config.get("max_diff_hunk_lines") max_context = config.get("max_diff_context_lines") - result = compress_diff(lines, max_hunk, max_context) - return "\n".join(result) + if any(line.startswith("diff --git") for line in non_lock_lines): + result = compress_diff(non_lock_lines, max_hunk, max_context) + result.extend(lockfile_summaries) + return "\n".join(result) + if lockfile_summaries: + return "\n".join(lockfile_summaries) + return "\n".join(non_lock_lines) def _process_name_list(self, lines: list[str]) -> str: """Compress --name-only or --name-status output: group by directory.""" diff --git a/tests/test_processors.py b/tests/test_processors.py index 8831b17..9e01141 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -2567,6 +2567,66 @@ def test_status_sb_branch_no_tracking(self): assert "On branch feature/foo" in result +class TestGitLockfileDiff: + """Tests for lockfile detection in git diff output.""" + + def setup_method(self): + self.p = GitProcessor() + + def test_lockfile_only_diff_summarized(self): + """A diff containing only package-lock.json should be summarized.""" + lock_lines = ["diff --git a/package-lock.json b/package-lock.json"] + lock_lines.append("index abc1234..def5678 100644") + lock_lines.append("--- a/package-lock.json") + lock_lines.append("+++ b/package-lock.json") + lock_lines.append("@@ -1,100 +1,100 @@") + for i in range(500): + lock_lines.append(f'+ "pkg-{i}": "{i}.0.0",') + output = "\n".join(lock_lines) + result = self.p.process("git diff", output) + assert "lockfile changed" in result + assert "package-lock.json" in result + assert len(result.splitlines()) <= 5 + + def test_mixed_lockfile_and_normal(self): + """Lockfile should be summarized, normal file compressed normally.""" + lines = [ + "diff --git a/src/app.py b/src/app.py", + "@@ -1,5 +1,5 @@", + "-old line", + "+new line", + " context", + "diff --git a/yarn.lock b/yarn.lock", + "index abc..def 100644", + "--- a/yarn.lock", + "+++ b/yarn.lock", + "@@ -1,200 +1,200 @@", + ] + for i in range(200): + lines.append(f'+pkg-{i}@^{i}.0.0:') + output = "\n".join(lines) + result = self.p.process("git diff", output) + # Normal file is preserved + assert "app.py" in result + assert "+new line" in result + # Lockfile is summarized + assert "lockfile changed" in result + assert "yarn.lock" in result + + def test_normal_files_only_unchanged(self): + """Diffs with only normal files should be compressed normally.""" + output = "\n".join([ + "diff --git a/src/app.py b/src/app.py", + "@@ -1,3 +1,3 @@", + "-old", + "+new", + " context", + ]) + result = self.p.process("git diff", output) + assert "lockfile" not in result + assert "+new" in result + + class TestBuildBunCanHandle: """Test bun command handling in build processor.""" From 52842f0b97162b736dc62fa2bf49c5900fd8266b Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:45:53 +0100 Subject: [PATCH 04/17] feat: group git diff --stat output by directory for large refactors When git diff --stat has more than 20 files, group them by directory with aggregate change counts instead of listing each file individually. --- src/processors/git.py | 55 +++++++++++++++++++++++++++++++++++++++- tests/test_processors.py | 34 +++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/processors/git.py b/src/processors/git.py index 2cd1208..7535862 100644 --- a/src/processors/git.py +++ b/src/processors/git.py @@ -275,7 +275,16 @@ def _process_name_list(self, lines: list[str]) -> str: return "\n".join(result) def _process_diff_stat(self, lines: list[str]) -> str: - """Compress `git diff --stat` output: strip visual bars.""" + """Compress `git diff --stat` output: strip visual bars, group when many files.""" + # Count stat lines (exclude summary line) + stat_lines = [ + line for line in lines + if re.match(r"^\s*.+?\s+\|\s+\d+", line) + ] + + if len(stat_lines) > 20: + return self._group_stat_by_dir(lines) + result = [] for line in lines: # Match stat lines: " path/file | 5 ++-" -> " path/file | 5" @@ -286,6 +295,50 @@ def _process_diff_stat(self, lines: list[str]) -> str: result.append(line) return "\n".join(result) + def _group_stat_by_dir(self, lines: list[str]) -> str: + """Group --stat output by directory when many files changed.""" + by_dir: dict[str, list[tuple[str, str]]] = {} + summary_line = "" + + for line in lines: + stripped = line.strip() + # Summary line: "N files changed, X insertions(+), Y deletions(-)" + if re.match(r"\s*\d+ files? changed", stripped): + summary_line = stripped + continue + # Stat line: " path/to/file.py | 42 +++---" + m = re.match(r"^\s*(.+?)\s+\|\s+(.+)$", stripped) + if m: + filepath = m.group(1).strip() + stats = m.group(2).strip() + parts = filepath.rsplit("/", 1) + dir_name = parts[0] if len(parts) > 1 else "." + by_dir.setdefault(dir_name, []).append((filepath, stats)) + + if not by_dir: + return "\n".join(lines) + + result = [] + for dir_name, files in sorted(by_dir.items(), key=lambda x: -len(x[1])): + if len(files) > 5: + total_changes = sum( + int(s.group(1)) + for _, stats in files + if (s := re.search(r"(\d+)", stats)) + ) + result.append( + f" {dir_name}/ ({len(files)} files, ~{total_changes} changes)" + ) + else: + for filepath, stats in files: + # Strip +/- visual bars from stats + clean_stats = re.sub(r"\s+[+\-]+\s*$", "", stats) + result.append(f" {filepath} | {clean_stats}") + + if summary_line: + result.append(summary_line) + return "\n".join(result) + def _process_log(self, output: str, command: str = "") -> str: max_entries = config.get("max_log_entries") lines = output.splitlines() diff --git a/tests/test_processors.py b/tests/test_processors.py index 9e01141..b23a320 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -2567,6 +2567,40 @@ def test_status_sb_branch_no_tracking(self): assert "On branch feature/foo" in result +class TestGitDiffStatGrouping: + """Tests for --stat directory grouping with many files.""" + + def setup_method(self): + self.p = GitProcessor() + + def test_stat_many_files_grouped(self): + """git diff --stat with 50+ files should group by directory.""" + lines = [] + for i in range(25): + lines.append(f" src/components/file{i}.tsx | 10 ++++------") + for i in range(25): + lines.append(f" src/utils/helper{i}.ts | 5 ++---") + lines.append(" 50 files changed, 375 insertions(+), 375 deletions(-)") + output = "\n".join(lines) + result = self.p.process("git diff --stat", output) + assert "src/components/" in result + assert "25 files" in result + assert "src/utils/" in result + assert "50 files changed" in result + + def test_stat_few_files_not_grouped(self): + """git diff --stat with few files should not group.""" + lines = [ + " src/app.py | 5 ++---", + " src/utils.py | 3 +--", + " 2 files changed, 3 insertions(+), 5 deletions(-)", + ] + output = "\n".join(lines) + result = self.p.process("git diff --stat", output) + assert "app.py" in result + assert "utils.py" in result + + class TestGitLockfileDiff: """Tests for lockfile detection in git diff output.""" From 84b963eae4894e10de2b68c9a7275f76f369969a Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:47:48 +0100 Subject: [PATCH 05/17] feat: compress pytest coverage reports in test output processor Detect coverage tables in pytest output and compress them to show only the TOTAL line and files below 80% coverage. --- src/processors/test_output.py | 58 +++++++++++++++++++++++++++++++++++ tests/test_processors.py | 56 +++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/src/processors/test_output.py b/src/processors/test_output.py index 87c76ec..579269c 100644 --- a/src/processors/test_output.py +++ b/src/processors/test_output.py @@ -178,9 +178,67 @@ def _process_pytest(self, lines: list[str]) -> str: if passed_count > 0: result.insert(0, f"[{passed_count} tests passed]") + # Detect and compress coverage report in remaining lines + coverage_lines = self._extract_coverage(lines) + if coverage_lines: + result.extend(self._compress_coverage(coverage_lines)) + result.extend(summary_lines) return "\n".join(result) if result else "\n".join(lines) + def _extract_coverage(self, lines: list[str]) -> list[str]: + """Extract coverage table lines from pytest output.""" + coverage_start = None + coverage_end = None + for i, line in enumerate(lines): + stripped = line.strip() + if re.match(r"^-+ coverage", stripped) or re.match( + r"^Name\s+Stmts\s+Miss", stripped + ): + if coverage_start is None: + coverage_start = i + if coverage_start is not None and i > coverage_start: + if re.match(r"^TOTAL\s+", stripped): + coverage_end = i + break + if coverage_start is None: + return [] + end = coverage_end + 1 if coverage_end is not None else len(lines) + return lines[coverage_start:end] + + def _compress_coverage(self, lines: list[str]) -> list[str]: + """Compress pytest coverage report: keep low-coverage files + TOTAL.""" + result = [] + total_line = "" + low_coverage_files = [] + + for line in lines: + stripped = line.strip() + if stripped.startswith("TOTAL"): + total_line = stripped + continue + if stripped.startswith("Name") or stripped.startswith("-"): + continue + # Parse: filename stmts miss cover% + m = re.match(r"^(\S+)\s+\d+\s+\d+\s+(\d+)%", stripped) + if m: + cover_pct = int(m.group(2)) + if cover_pct < 80: + low_coverage_files.append(stripped) + + if total_line: + result.append(total_line) + if low_coverage_files: + result.append( + f"Files below 80% coverage ({len(low_coverage_files)}):" + ) + for f in low_coverage_files[:10]: + result.append(f" {f}") + if len(low_coverage_files) > 10: + result.append(f" ... ({len(low_coverage_files) - 10} more)") + + return result + def _collapse_warnings(self, warning_lines: list[str]) -> list[str]: """Group warnings by type, show count + one example per type.""" by_type: dict[str, list[str]] = {} diff --git a/tests/test_processors.py b/tests/test_processors.py index b23a320..fea87f2 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -568,6 +568,62 @@ def test_traceback_short_unchanged(self): assert result == block +class TestPytestCoverage: + """Tests for pytest coverage report compression.""" + + def setup_method(self): + self.p = TestOutputProcessor() + + def test_coverage_table_compressed(self): + """Coverage table with low-coverage files should be compressed.""" + lines = [ + "tests/test_app.py::test_one PASSED", + "tests/test_app.py::test_two PASSED", + "---------- coverage: 85% ----------", + "Name Stmts Miss Cover", + "---------------------------------------", + ] + for i in range(20): + pct = 95 if i < 17 else 60 + lines.append(f"src/mod{i}.py 100 {100 - pct} {pct}%") + lines.append("TOTAL 2000 300 85%") + lines.append("---------------------------------------") + lines.append("======================== 2 passed in 1.0s ========================") + output = "\n".join(lines) + result = self.p.process("pytest --cov", output) + assert "TOTAL" in result + assert "2000" in result + assert "Files below 80%" in result + assert "60%" in result + # High-coverage files should not appear + assert "95%" not in result + + def test_no_coverage_table_unchanged(self): + """Output without coverage should not be modified by coverage logic.""" + lines = [ + "tests/test_app.py::test_one PASSED", + "======================== 1 passed in 0.5s ========================", + ] + output = "\n".join(lines) + result = self.p.process("pytest", output) + assert "Files below" not in result + + def test_total_line_always_preserved(self): + """TOTAL line in coverage must always be in output.""" + lines = [ + "---------- coverage: 100% ----------", + "Name Stmts Miss Cover", + "---------------------------------------", + "src/app.py 50 0 100%", + "TOTAL 50 0 100%", + "---------------------------------------", + "======================== 1 passed ========================", + ] + output = "\n".join(lines) + result = self.p.process("pytest --cov", output) + assert "TOTAL" in result + + class TestBuildOutputProcessor: def setup_method(self): self.p = BuildOutputProcessor() From e81461928f2592a1ad1b917504071d8615129fd6 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:48:45 +0100 Subject: [PATCH 06/17] feat: group parameterized pytest results with failure detail Track parameterized test names during pytest output processing and report them grouped with failed parameter names when failures occur. --- src/processors/test_output.py | 30 ++++++++++++++++++++++- tests/test_processors.py | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/processors/test_output.py b/src/processors/test_output.py index 579269c..2e0ac6f 100644 --- a/src/processors/test_output.py +++ b/src/processors/test_output.py @@ -77,6 +77,7 @@ def _process_pytest(self, lines: list[str]) -> str: warning_lines: list[str] = [] summary_lines = [] passed_count = 0 + param_tests: dict[str, dict] = {} # base_name -> {"passed": int, "failed": [param]} for line in lines: # Skip collection output @@ -152,11 +153,25 @@ def _process_pytest(self, lines: list[str]) -> str: # Count passed tests if re.search(r"\bPASSED\b", line): passed_count += 1 + # Track parameterized tests + m = re.match(r"^(\S+?)\[(.+)\]\s+PASSED", line.strip()) + if m: + base = m.group(1) + param_tests.setdefault(base, {"passed": 0, "failed": []}) + param_tests[base]["passed"] += 1 continue # Keep FAILED/ERROR individual lines if re.search(r"\bFAILED\b|\bERROR\b", line): - result.append(line) + # Track parameterized test failures + m = re.match(r"^(\S+?)\[(.+)\]\s+FAILED", line.strip()) + if m: + base = m.group(1) + param = m.group(2) + param_tests.setdefault(base, {"passed": 0, "failed": []}) + param_tests[base]["failed"].append(param) + else: + result.append(line) continue # Keep final summary lines (skip "test session starts" header) @@ -175,6 +190,19 @@ def _process_pytest(self, lines: list[str]) -> str: if failure_block: result.extend(self._truncate_traceback(failure_block)) + # Add grouped summaries for parameterized tests with failures + for base, info in param_tests.items(): + if info["failed"]: + total = info["passed"] + len(info["failed"]) + failed_params = ", ".join(info["failed"][:5]) + extra = "" + if len(info["failed"]) > 5: + extra = f", ... ({len(info['failed']) - 5} more)" + result.append( + f"{base}: {info['passed']}/{total} passed, " + f"FAILED: [{failed_params}{extra}]" + ) + if passed_count > 0: result.insert(0, f"[{passed_count} tests passed]") diff --git a/tests/test_processors.py b/tests/test_processors.py index fea87f2..218f729 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -568,6 +568,51 @@ def test_traceback_short_unchanged(self): assert result == block +class TestPytestParameterized: + """Tests for parameterized test grouping.""" + + def setup_method(self): + self.p = TestOutputProcessor() + + def test_all_param_passed_grouped(self): + """50 parameterized tests all PASSED should show just the count.""" + lines = [] + for i in range(50): + lines.append(f"tests/test_math.py::test_add[{i}] PASSED") + lines.append("======================== 50 passed in 1.0s ========================") + output = "\n".join(lines) + result = self.p.process("pytest -v", output) + assert "50 tests passed" in result + # Individual param lines should not appear + assert "test_add[0] PASSED" not in result + + def test_param_with_failures_grouped(self): + """Parameterized tests with failures should show grouped summary.""" + lines = [] + for i in range(47): + lines.append(f"tests/test_math.py::test_add[{i}] PASSED") + for i in range(47, 50): + lines.append(f"tests/test_math.py::test_add[{i}] FAILED") + lines.append("======================== 47 passed, 3 failed ========================") + output = "\n".join(lines) + result = self.p.process("pytest -v", output) + assert "47/50 passed" in result + assert "FAILED: [47, 48, 49]" in result + + def test_non_param_unchanged(self): + """Non-parameterized tests should not trigger grouping.""" + lines = [ + "tests/test_app.py::test_one PASSED", + "tests/test_app.py::test_two PASSED", + "tests/test_app.py::test_three FAILED", + "======================== 2 passed, 1 failed ========================", + ] + output = "\n".join(lines) + result = self.p.process("pytest -v", output) + assert "2 tests passed" in result + assert "test_three FAILED" in result + + class TestPytestCoverage: """Tests for pytest coverage report compression.""" From e7c3d14bc221de42e6f01e88a2a57b7cd2792338 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:49:52 +0100 Subject: [PATCH 07/17] feat: detect and compress minified files in file_content processor Add minified file detection before source code pass-through check. Files like .min.js, .bundle.js and files with very long average line length are summarized instead of passed through unchanged. --- src/processors/file_content.py | 32 ++++++++++++++++++++++++++++++++ tests/test_processors.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/src/processors/file_content.py b/src/processors/file_content.py index fedb52e..c1c6e03 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -146,6 +146,18 @@ def process(self, command: str, output: str) -> str: ext = self._extract_extension(command) filename = self._extract_filename(command) + # ── COMPRESS: minified files (never useful for patching) ────── + if self._is_minified(ext, filename, output): + lines = output.splitlines() + total_chars = len(output) + total_lines = len(lines) + preview = output[:200].replace("\n", " ") + return ( + f"[minified file: {filename or 'unknown'}, " + f"{total_chars:,} chars, {total_lines} lines]\n" + f"Preview: {preview}..." + ) + # ── NEVER COMPRESS: source code ────────────────────────────── if ext in _SOURCE_CODE_EXTENSIONS: return output @@ -226,6 +238,26 @@ def _extract_filename(self, command: str) -> str: return part.rsplit("/", 1)[-1].rsplit("\\", 1)[-1] return "" + # ── Minified file detection ───────────────────────────────────── + + def _is_minified(self, ext: str, filename: str, output: str) -> bool: + """Detect minified files by name pattern or content heuristics.""" + # Name-based detection + if re.search(r"\.min\.(js|css|html)$", filename, re.I): + return True + if re.search(r"\.bundle\.(js|css)$", filename, re.I): + return True + + # Content heuristic: very few lines relative to total length + lines = output.splitlines() + if len(lines) <= 3 and len(output) > 5000: + return True + # Average line length > 500 chars + if lines and len(output) / len(lines) > 500: + return True + + return False + # ── Heuristic detection (for extensionless files) ──────────────── def _detect_heuristic(self, lines: list[str]) -> str: diff --git a/tests/test_processors.py b/tests/test_processors.py index 218f729..78285d9 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -1367,6 +1367,34 @@ def test_progress_bar_stripped(self): assert "━" not in result +class TestMinifiedFileDetection: + """Tests for minified file detection in file_content processor.""" + + def setup_method(self): + self.p = FileContentProcessor() + + def test_min_js_compressed(self): + """cat dist/app.min.js with large single line should be summarized.""" + output = "a" * 100_000 + result = self.p.process("cat dist/app.min.js", output) + assert "minified file" in result + assert "app.min.js" in result + assert "100,000 chars" in result + + def test_bundle_js_heuristic(self): + """cat bundle.js with very long lines should be detected as minified.""" + output = "\n".join(["x" * 10_000 for _ in range(5)]) + result = self.p.process("cat bundle.js", output) + assert "minified file" in result + + def test_normal_js_not_minified(self): + """Normal JS file with short lines should NOT be treated as minified.""" + output = "\n".join(f"const x{i} = {i};" for i in range(500)) + result = self.p.process("cat app.js", output) + # Source code (.js) should pass-through unchanged + assert result == output + + class TestNetworkProcessor: def setup_method(self): self.p = NetworkProcessor() From 8e76e9f29f122e97f9d963f824ef7346802da431 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:50:46 +0100 Subject: [PATCH 08/17] feat: redact secrets in .env variant files (.env.production, .env.local) Add .env variant detection that redacts sensitive values in deployed env files while preserving .env and .env.example pass-through behavior. --- src/processors/file_content.py | 41 ++++++++++++++++++++++++++++++++++ tests/test_processors.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/src/processors/file_content.py b/src/processors/file_content.py index c1c6e03..20e6ccd 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -158,6 +158,10 @@ def process(self, command: str, output: str) -> str: f"Preview: {preview}..." ) + # ── Handle .env variants: .env.production, .env.local ──────── + if self._is_env_file_to_redact(filename): + return self._compress_env_file(output.splitlines()) + # ── NEVER COMPRESS: source code ────────────────────────────── if ext in _SOURCE_CODE_EXTENSIONS: return output @@ -258,6 +262,43 @@ def _is_minified(self, ext: str, filename: str, output: str) -> bool: return False + # ── .env variant detection ────────────────────────────────────── + + def _is_env_file_to_redact(self, filename: str) -> bool: + """Detect .env variant files that should have secrets redacted. + + .env exactly and .env.example/.env.template are handled by existing + pass-through logic (model may need exact values for editing). + """ + if filename in (".env", ".env.example", ".env.template"): + return False + return bool(re.match(r"^\.env\..+$", filename, re.I)) + + def _compress_env_file(self, lines: list[str]) -> str: + """Compress .env files: redact sensitive values, keep structure.""" + from .env import _SENSITIVE_PATTERNS # noqa: PLC0415 + + result = [] + redacted = 0 + for line in lines: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + result.append(line) + continue + if "=" in stripped: + key = stripped.split("=", 1)[0] + if _SENSITIVE_PATTERNS.search(key): + result.append(f"{key}=***") + redacted += 1 + else: + result.append(line) + else: + result.append(line) + + if redacted > 0: + result.append(f"\n({redacted} sensitive values redacted)") + return "\n".join(result) + # ── Heuristic detection (for extensionless files) ──────────────── def _detect_heuristic(self, lines: list[str]) -> str: diff --git a/tests/test_processors.py b/tests/test_processors.py index 78285d9..1c531da 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -1367,6 +1367,43 @@ def test_progress_bar_stripped(self): assert "━" not in result +class TestEnvVariantDetection: + """Tests for .env variant file handling.""" + + def setup_method(self): + self.p = FileContentProcessor() + + def test_env_production_redacted(self): + """cat .env.production should redact sensitive values.""" + output = "\n".join([ + "APP_NAME=myapp", + "API_KEY=secret123", + "DATABASE_URL=postgres://user:pass@host/db", + "DEBUG=true", + ]) + result = self.p.process("cat .env.production", output) + assert "API_KEY=***" in result + assert "DATABASE_URL=***" in result + assert "APP_NAME=myapp" in result + assert "DEBUG=true" in result + assert "sensitive values redacted" in result + + def test_env_example_passthrough(self): + """cat .env.example should pass through (template file).""" + output = "\n".join([ + "API_KEY=your_api_key_here", + "SECRET=change_me", + ]) + result = self.p.process("cat .env.example", output) + assert result == output + + def test_env_exact_passthrough(self): + """cat .env should still pass through unchanged (existing behavior).""" + output = "\n".join(f"KEY_{i}=value_{i}" for i in range(500)) + result = self.p.process("cat .env", output) + assert result == output + + class TestMinifiedFileDetection: """Tests for minified file detection in file_content processor.""" From 3053150d98eca9be55e7384beea5fddecf799453 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:52:14 +0100 Subject: [PATCH 09/17] feat: group tsc --noEmit errors by TypeScript error code Route tsc --noEmit through a dedicated type-check handler that groups errors by TS error code with occurrence counts and examples. --- src/processors/build_output.py | 45 ++++++++++++++++++++++++++++++++++ tests/test_processors.py | 42 +++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/src/processors/build_output.py b/src/processors/build_output.py index 4abffd5..d2ea7bc 100644 --- a/src/processors/build_output.py +++ b/src/processors/build_output.py @@ -45,6 +45,10 @@ def process(self, command: str, output: str) -> str: if not output or not output.strip(): return output + # tsc --noEmit is a type-check (lint), not a build — group errors by code + if re.search(r"\btsc\b.*--noEmit", command): + return self._process_tsc_typecheck(output) + # Piped output may be partial — skip aggressive summarization to # avoid claiming "Build succeeded" when errors were piped away. if "|" in command: @@ -281,6 +285,47 @@ def _process_audit(self, output: str) -> str: return "\n".join(result) + def _process_tsc_typecheck(self, output: str) -> str: + """Compress tsc --noEmit: group errors by TS error code.""" + lines = output.splitlines() + by_code: dict[str, list[str]] = {} + summary_line = "" + + for line in lines: + stripped = line.strip() + # TS error format: src/file.ts(10,5): error TS2322: message + m = re.match(r"^(.+?)\(\d+,\d+\):\s+error\s+(TS\d+):\s+(.+)$", stripped) + if not m: + # Also match: src/file.ts:10:5 - error TS2322: message + m = re.match(r"^(.+?):\d+:\d+\s+-\s+error\s+(TS\d+):\s+(.+)$", stripped) + if m: + code = m.group(2) + by_code.setdefault(code, []).append(stripped) + continue + # Summary line: Found N errors in M files. + if re.match(r"^Found \d+ error", stripped): + summary_line = stripped + + if not by_code: + return output + + total = sum(len(v) for v in by_code.values()) + result = [f"{total} type errors across {len(by_code)} codes:"] + for code, violations in sorted(by_code.items(), key=lambda x: -len(x[1])): + count = len(violations) + if count > 3: + result.append(f" {code}: {count} occurrences") + for v in violations[:2]: + result.append(f" {v}") + result.append(f" ... ({count - 2} more)") + else: + for v in violations: + result.append(f" {v}") + + if summary_line: + result.append(summary_line) + return "\n".join(result) + def _is_progress_line(self, line: str) -> bool: if not line: return False diff --git a/tests/test_processors.py b/tests/test_processors.py index 1c531da..996b9c7 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -2870,6 +2870,48 @@ def test_warning_samples_capped_at_five(self): assert " WARNING: issue-5" not in result +class TestTscTypecheck: + """Tests for tsc --noEmit type-check grouping.""" + + def setup_method(self): + self.p = BuildOutputProcessor() + + def test_tsc_noemit_errors_grouped(self): + """tsc --noEmit with many errors should group by code.""" + lines = [] + for i in range(30): + lines.append(f"src/file{i}.ts(10,5): error TS2322: Type 'string' is not assignable.") + for i in range(15): + lines.append(f"src/util{i}.ts:5:3 - error TS2345: Argument of type 'number' not assignable.") + for i in range(5): + lines.append(f"src/other{i}.ts(1,1): error TS7006: Parameter implicitly has 'any' type.") + lines.append("Found 50 errors in 50 files.") + output = "\n".join(lines) + result = self.p.process("tsc --noEmit", output) + assert "50 type errors across 3 codes" in result + assert "TS2322: 30 occurrences" in result + assert "TS2345: 15 occurrences" in result + assert "Found 50 errors" in result + + def test_tsc_build_unchanged(self): + """tsc (without --noEmit) should use existing build logic.""" + output = "Build succeeded.\nDone in 2.5s." + result = self.p.process("tsc", output) + # Should not trigger typecheck grouping + assert "type errors" not in result + + def test_tsc_error_codes_preserved(self): + """Error codes and file paths should be preserved in examples.""" + output = "\n".join([ + "src/app.ts(10,5): error TS2322: Type 'string' is not assignable.", + "src/app.ts(20,3): error TS2322: Type 'number' is not assignable.", + "Found 2 errors.", + ]) + result = self.p.process("tsc --noEmit", output) + assert "TS2322" in result + assert "src/app.ts" in result + + class TestBuildOutputPipeGuard: """Test that piped build commands bypass aggressive summarization.""" From dc7547c1df764ee7a43e96ba37592950c396606a Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:55:27 +0100 Subject: [PATCH 10/17] feat: group docker compose logs by service with error extraction Detect compose log format (service | message) and group output by service, showing error lines with context and per-service summaries. --- src/processors/docker.py | 52 ++++++++++++++++++++++++++++++++++++++++ tests/test_processors.py | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/src/processors/docker.py b/src/processors/docker.py index 8a0f8d5..0a43569 100644 --- a/src/processors/docker.py +++ b/src/processors/docker.py @@ -175,6 +175,13 @@ def _process_logs(self, output: str) -> str: if len(lines) <= keep_head + keep_tail: return output + # Detect compose log format: "service-name | message" + compose_re = re.compile(r"^(\S+)\s+\|\s+(.*)$") + is_compose = any(compose_re.match(line) for line in lines[:20]) + + if is_compose: + return self._process_compose_logs(lines, compose_re) + return compress_log_lines( lines, keep_head=keep_head, @@ -182,6 +189,51 @@ def _process_logs(self, output: str) -> str: context_lines=2, ) + def _process_compose_logs(self, lines: list[str], compose_re: re.Pattern) -> str: + """Compress docker compose logs: group by service, keep errors + tail per service.""" + service_lines: dict[str, list[str]] = {} + for line in lines: + m = compose_re.match(line) + if m: + service = m.group(1) + service_lines.setdefault(service, []).append(line) + else: + service_lines.setdefault("_other", []).append(line) + + result = [f"{len(lines)} log lines across {len(service_lines)} services:"] + + for service, svc_lines in sorted(service_lines.items()): + if service == "_other": + continue + error_count = sum( + 1 for ln in svc_lines + if re.search(r"\b(error|ERROR|exception|fatal|FATAL|panic)\b", ln, re.I) + ) + result.append( + f"\n--- {service} ({len(svc_lines)} lines, {error_count} errors) ---" + ) + + # Show errors with context + last 3 lines + errors_shown: list[str] = [] + for i, line in enumerate(svc_lines): + if re.search( + r"\b(error|ERROR|exception|fatal|FATAL|panic)\b", line, re.I + ): + start = max(0, i - 1) + end = min(len(svc_lines), i + 2) + for el in svc_lines[start:end]: + if el not in errors_shown: + errors_shown.append(el) + + if errors_shown: + result.extend(errors_shown[:20]) + # Always show last 3 lines per service + for line in svc_lines[-3:]: + if line not in errors_shown: + result.append(line) + + return "\n".join(result) + def _process_pull(self, output: str) -> str: """Compress docker pull/push: strip layer progress, keep digest and status.""" lines = output.splitlines() diff --git a/tests/test_processors.py b/tests/test_processors.py index 996b9c7..ce988e7 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -1945,6 +1945,50 @@ def test_fd_short_unchanged(self): assert result == output +class TestDockerComposeLogs: + """Tests for docker compose log grouping by service.""" + + def setup_method(self): + self.p = DockerProcessor() + + def test_compose_logs_grouped(self): + """docker compose logs with multiple services should group by service.""" + lines = [] + for i in range(200): + service = ["web", "api", "db"][i % 3] + lines.append(f"{service} | Log line {i}") + output = "\n".join(lines) + result = self.p.process("docker compose logs", output) + assert "200 log lines" in result + assert "--- web" in result + assert "--- api" in result + assert "--- db" in result + + def test_compose_logs_errors_shown(self): + """Service with errors should show error lines.""" + lines = [] + for i in range(100): + lines.append(f"web | Normal log line {i}") + lines.append("web | ERROR: Connection refused") + lines.append("web | Failed to connect to database") + for i in range(100): + lines.append(f"api | API running on port {i}") + output = "\n".join(lines) + result = self.p.process("docker compose logs", output) + assert "ERROR: Connection refused" in result + assert "errors" in result + + def test_compose_no_errors_shows_tail(self): + """Service with no errors should show last 3 lines.""" + lines = [] + for i in range(100): + lines.append(f"web | Log line {i}") + output = "\n".join(lines) + result = self.p.process("docker compose logs", output) + assert "Log line 99" in result + assert "0 errors" in result + + class TestKubectlProcessor: def setup_method(self): self.p = KubectlProcessor() From d78cad892c62256938d10e3febf95a519429cdc4 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:56:48 +0100 Subject: [PATCH 11/17] feat: group search results by directory for large result sets When search results span more than 30 files, group them by directory with top files per directory and match counts. --- src/processors/search.py | 62 ++++++++++++++++++++++++++++++++++++++++ tests/test_processors.py | 30 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/processors/search.py b/src/processors/search.py index 83d4437..9270a3e 100644 --- a/src/processors/search.py +++ b/src/processors/search.py @@ -73,6 +73,11 @@ def process(self, command: str, output: str) -> str: max_per_file = config.get("search_max_per_file") max_files = config.get("search_max_files") + if total_files > 30: + return self._process_grouped_by_dir( + by_file, total_matches, total_files, + ) + result = [f"{total_matches} matches across {total_files} files:"] sorted_files = sorted(by_file.items(), key=lambda x: -len(x[1])) @@ -98,6 +103,63 @@ def process(self, command: str, output: str) -> str: return "\n".join(result) + def _process_grouped_by_dir( + self, by_file: dict, total_matches: int, total_files: int, + ) -> str: + """Group search results by directory for large result sets.""" + max_per_file = config.get("search_max_per_file") + max_files = config.get("search_max_files") + + by_dir: dict[str, dict[str, list[str]]] = {} + for filepath, matches in by_file.items(): + parts = filepath.rsplit("/", 1) + dir_name = parts[0] if len(parts) > 1 else "." + by_dir.setdefault(dir_name, {})[filepath] = matches + + result = [ + f"{total_matches} matches across {total_files} files " + f"in {len(by_dir)} directories:" + ] + + dirs_shown = 0 + for dir_name, files in sorted( + by_dir.items(), key=lambda x: -sum(len(v) for v in x[1].values()) + ): + if dirs_shown >= max_files: + break + dir_matches = sum(len(v) for v in files.values()) + result.append( + f"\n{dir_name}/ ({dir_matches} matches in {len(files)} files)" + ) + + # Show top 3 files in this directory + for filepath, matches in sorted( + files.items(), key=lambda x: -len(x[1]) + )[:3]: + fname = filepath.rsplit("/", 1)[-1] + if len(matches) > max_per_file: + result.append(f" {fname}: ({len(matches)} matches)") + for m in matches[:max_per_file]: + display = m[len(filepath) + 1 :] if m.startswith(filepath + ":") else m + result.append(f" {display}") + else: + for m in matches: + result.append(f" {m}") + + remaining_files = len(files) - 3 + if remaining_files > 0: + result.append( + f" ... ({remaining_files} more files in this directory)" + ) + + dirs_shown += 1 + + remaining_dirs = len(by_dir) - dirs_shown + if remaining_dirs > 0: + result.append(f"\n... ({remaining_dirs} more directories)") + + return "\n".join(result) + def _process_fd(self, output: str) -> str: """Compress fd/fdfind output: group by directory.""" lines = [line.strip() for line in output.splitlines() if line.strip()] diff --git a/tests/test_processors.py b/tests/test_processors.py index ce988e7..7f69d9d 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -1945,6 +1945,36 @@ def test_fd_short_unchanged(self): assert result == output +class TestSearchDirectoryGrouping: + """Tests for search result directory grouping with large result sets.""" + + def setup_method(self): + self.p = SearchProcessor() + + def test_many_files_grouped_by_dir(self): + """rg with 40+ files should group by directory.""" + lines = [] + for d in range(12): + for f in range(4): + for m in range(3): + lines.append(f"src/dir{d}/file{f}.py:{m + 1}:TODO: fix this {d}-{f}-{m}") + output = "\n".join(lines) + result = self.p.process("rg TODO", output) + assert "directories" in result + assert "src/dir0/" in result + + def test_few_files_not_grouped(self): + """rg with 10 files should use per-file grouping.""" + lines = [] + for f in range(10): + lines.append(f"src/file{f}.py:1:TODO fix") + lines.append(f"src/file{f}.py:5:TODO cleanup") + output = "\n".join(lines) + result = self.p.process("rg TODO", output) + # Should use normal per-file grouping, not directory grouping + assert "directories" not in result + + class TestDockerComposeLogs: """Tests for docker compose log grouping by service.""" From c6a1c7b14a36e10c5f570953539e578447ff0899 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 19:57:35 +0100 Subject: [PATCH 12/17] fix: raise JSON compression threshold from 500 to 1500 chars Short API responses (500-1500 chars) are often more useful uncompressed for the model to read. --- src/processors/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/processors/network.py b/src/processors/network.py index 04f9323..18cdb1b 100644 --- a/src/processors/network.py +++ b/src/processors/network.py @@ -168,7 +168,7 @@ def _maybe_compress_json(self, text: str) -> str: return text # Only compress if the JSON is large - if len(stripped) < 500: + if len(stripped) < 1500: return text compressed = compress_json_value(data, max_depth=2) From 4fabd6191976d0634b1cdf24889d9f4eff494c76 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:02:16 +0100 Subject: [PATCH 13/17] feat: add ansible, helm, and syslog processors Three new specialized processors for DevOps/infrastructure output: - AnsibleProcessor (priority 40): compresses ok/skipped tasks, preserves errors and PLAY RECAP - HelmProcessor (priority 41): summarizes template manifests, omits NOTES, truncates lists - SyslogProcessor (priority 42): head/tail compression for journalctl and dmesg output Updates test suite with 43 new tests (565 total). --- src/processors/ansible.py | 99 ++++++++++++++++++ src/processors/helm.py | 128 +++++++++++++++++++++++ src/processors/syslog.py | 36 +++++++ tests/test_engine.py | 4 +- tests/test_processors.py | 207 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 472 insertions(+), 2 deletions(-) create mode 100644 src/processors/ansible.py create mode 100644 src/processors/helm.py create mode 100644 src/processors/syslog.py diff --git a/src/processors/ansible.py b/src/processors/ansible.py new file mode 100644 index 0000000..d346a46 --- /dev/null +++ b/src/processors/ansible.py @@ -0,0 +1,99 @@ +"""Ansible output processor: ansible-playbook, ansible.""" + +import re + +from .base import Processor + + +class AnsibleProcessor(Processor): + priority = 40 + hook_patterns = [ + r"^ansible(-playbook)?\b", + ] + + @property + def name(self) -> str: + return "ansible" + + def can_handle(self, command: str) -> bool: + return bool(re.search(r"\b(ansible-playbook|ansible)\b", command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + if len(lines) <= 20: + return output + + result = [] + ok_count = 0 + skipped_count = 0 + in_recap = False + + for line in lines: + stripped = line.strip() + + # PLAY RECAP is always kept in full + if stripped.startswith("PLAY RECAP"): + in_recap = True + result.append(line) + continue + + if in_recap: + result.append(line) + continue + + # PLAY and TASK headers — keep + if re.match(r"^(PLAY|TASK)\s+\[", stripped): + result.append(line) + continue + + # Separator lines (****) + if re.match(r"^\*+$", stripped): + continue + + # changed — always keep + if re.match(r"^changed:", stripped): + result.append(line) + continue + + # failed / fatal / unreachable — always keep + if re.match(r"^(fatal|failed|unreachable):", stripped, re.I): + result.append(line) + continue + + # Error/warning output lines (indented after fatal/failed) + if re.search(r"\b(ERROR|FAILED|UNREACHABLE|fatal)\b", stripped): + result.append(line) + continue + + # "msg:" lines (error messages) — keep + if re.match(r'^\s*"?msg"?\s*:', stripped): + result.append(line) + continue + + # ok — count and skip + if re.match(r"^ok:", stripped): + ok_count += 1 + continue + + # skipping — count and skip + if re.match(r"^skipping:", stripped): + skipped_count += 1 + continue + + # included/imported — skip + if re.match(r"^(included|imported):", stripped): + continue + + # Insert summary at the top + summary_parts = [] + if ok_count: + summary_parts.append(f"{ok_count} ok") + if skipped_count: + summary_parts.append(f"{skipped_count} skipped") + if summary_parts: + result.insert(0, f"[{', '.join(summary_parts)}]") + + return "\n".join(result) if result else output diff --git a/src/processors/helm.py b/src/processors/helm.py new file mode 100644 index 0000000..619717f --- /dev/null +++ b/src/processors/helm.py @@ -0,0 +1,128 @@ +"""Helm output processor: install, upgrade, list, template, status.""" + +import re + +from .base import Processor + + +class HelmProcessor(Processor): + priority = 41 + hook_patterns = [ + r"^helm\s+(install|upgrade|list|template|status|rollback|history|uninstall|get)\b", + ] + + @property + def name(self) -> str: + return "helm" + + def can_handle(self, command: str) -> bool: + return bool(re.search( + r"\bhelm\s+(install|upgrade|list|template|status|rollback|" + r"history|uninstall|get)\b", command + )) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + if re.search(r"\bhelm\s+template\b", command): + return self._process_template(output) + if re.search(r"\bhelm\s+(install|upgrade)\b", command): + return self._process_install(output) + if re.search(r"\bhelm\s+list\b", command): + return self._process_list(output) + if re.search(r"\bhelm\s+status\b", command): + return self._process_install(output) + if re.search(r"\bhelm\s+history\b", command): + return self._process_history(output) + return output + + def _process_template(self, output: str) -> str: + """Compress helm template: summarize YAML manifests.""" + lines = output.splitlines() + if len(lines) <= 50: + return output + + manifests: list[tuple[str, int]] = [] + current_kind = "" + current_name = "" + current_lines = 0 + + for line in lines: + stripped = line.strip() + if stripped == "---": + if current_kind: + manifests.append((f"{current_kind}/{current_name}", current_lines)) + current_kind = "" + current_name = "" + current_lines = 0 + continue + if stripped.startswith("kind:"): + current_kind = stripped.split(":", 1)[1].strip() + elif ( + stripped.startswith(" name:") or + (stripped.startswith("name:") and not current_name) + ): + current_name = stripped.split(":", 1)[1].strip() + current_lines += 1 + + if current_kind: + manifests.append((f"{current_kind}/{current_name}", current_lines)) + + result = [f"helm template: {len(manifests)} manifests, {len(lines)} lines total:"] + for manifest, count in manifests: + result.append(f" {manifest} ({count} lines)") + return "\n".join(result) + + def _process_install(self, output: str) -> str: + """Compress helm install/upgrade/status: keep status, skip NOTES boilerplate.""" + lines = output.splitlines() + if len(lines) <= 20: + return output + + result = [] + in_notes = False + + for line in lines: + stripped = line.strip() + + if stripped.startswith("NOTES:"): + in_notes = True + result.append("[NOTES section omitted]") + continue + + if in_notes: + if ( + stripped + and not line.startswith((" ", "\t")) + and not stripped.startswith("NOTES:") + ): + in_notes = False + result.append(line) + continue + + if stripped: + result.append(line) + + return "\n".join(result) if result else output + + def _process_list(self, output: str) -> str: + """Compress helm list: truncate long lists.""" + lines = output.splitlines() + if len(lines) <= 25: + return output + + result = [lines[0]] + result.extend(lines[1:20]) + result.append(f"... ({len(lines) - 21} more releases)") + return "\n".join(result) + + def _process_history(self, output: str) -> str: + """Compress helm history: truncate old revisions.""" + lines = output.splitlines() + if len(lines) <= 15: + return output + result = [lines[0]] + result.insert(1, f"... ({len(lines) - 11} older revisions)") + result.extend(lines[-10:]) + return "\n".join(result) diff --git a/src/processors/syslog.py b/src/processors/syslog.py new file mode 100644 index 0000000..715aafa --- /dev/null +++ b/src/processors/syslog.py @@ -0,0 +1,36 @@ +"""System log processor: journalctl, dmesg.""" + +import re + +from .. import config +from .base import Processor +from .utils import compress_log_lines + + +class SyslogProcessor(Processor): + priority = 42 + hook_patterns = [ + r"^(journalctl|dmesg)\b", + ] + + @property + def name(self) -> str: + return "syslog" + + def can_handle(self, command: str) -> bool: + return bool(re.search(r"\b(journalctl|dmesg)\b", command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + if len(lines) <= 30: + return output + + return compress_log_lines( + lines, + keep_head=10, + keep_tail=20, + context_lines=config.get("file_log_context_lines"), + ) diff --git a/tests/test_engine.py b/tests/test_engine.py index 6b1e6b1..a625a6b 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -210,9 +210,9 @@ class TestProcessorRegistry: """Tests for auto-discovery and the processor registry.""" def test_discover_processors_finds_all(self): - """Auto-discovery should find all 18 processors.""" + """Auto-discovery should find all 21 processors.""" processors = discover_processors() - assert len(processors) == 18 + assert len(processors) == 21 def test_discover_processors_sorted_by_priority(self): """Processors must be returned in ascending priority order.""" diff --git a/tests/test_processors.py b/tests/test_processors.py index 7f69d9d..b28a6bf 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -24,6 +24,9 @@ from src.processors.system_info import SystemInfoProcessor from src.processors.terraform import TerraformProcessor from src.processors.test_output import TestOutputProcessor +from src.processors.ansible import AnsibleProcessor +from src.processors.helm import HelmProcessor +from src.processors.syslog import SyslogProcessor class TestGitProcessor: @@ -3277,3 +3280,207 @@ def test_kubectl_processor_handles_extracted(self): primary = extract_primary_command("cd /deploy && kubectl get pods") assert primary == "kubectl get pods" assert p.can_handle(primary) + + +class TestAnsibleProcessor: + def setup_method(self): + self.p = AnsibleProcessor() + + def test_can_handle(self): + assert self.p.can_handle("ansible-playbook site.yml") + assert self.p.can_handle("ansible all -m ping") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("ansible-playbook site.yml", "") == "" + + def test_short_output_unchanged(self): + output = "\n".join(f"line {i}" for i in range(15)) + result = self.p.process("ansible-playbook site.yml", output) + assert result == output + + def test_all_ok_compressed(self): + """Playbook with all ok tasks should compress to headers + recap.""" + lines = [] + lines.append("PLAY [all] ****") + lines.append("*" * 60) + for i in range(30): + lines.append(f"TASK [task-{i}] ****") + lines.append("*" * 60) + for h in range(20): + lines.append(f"ok: [host-{h}]") + lines.append("PLAY RECAP ****") + for h in range(20): + lines.append(f"host-{h} : ok=30 changed=0 unreachable=0 failed=0") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "ok" in result + assert "PLAY RECAP" in result + # All 600 ok lines should be counted + assert "600 ok" in result + assert len(result) < len(output) + + def test_failed_tasks_preserved(self): + """Failed tasks should be fully preserved with error messages.""" + lines = [] + lines.append("PLAY [webservers] ****") + for i in range(5): + lines.append(f"TASK [task-{i}] ****") + lines.append(f"ok: [host-{i}]") + lines.append("TASK [deploy] ****") + lines.append('fatal: [host-1]: FAILED! => {"msg": "Connection refused"}') + lines.append("TASK [rollback] ****") + lines.append("changed: [host-1]") + lines.append("PLAY RECAP ****") + lines.append("host-1 : ok=5 changed=1 unreachable=0 failed=1") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "FAILED" in result + assert "Connection refused" in result + assert "changed:" in result + assert "host-1" in result + + def test_changed_tasks_preserved(self): + """Changed tasks should be kept in output.""" + lines = ["PLAY [all] ****"] + ["*" * 60] + for i in range(10): + lines.append(f"TASK [task-{i}] ****") + lines.append("*" * 60) + lines.append(f"ok: [host-{i}]") + lines.append("TASK [update] ****") + lines.append("*" * 60) + lines.append("changed: [host-0]") + lines.append("PLAY RECAP ****") + lines.append("host-0 : ok=10 changed=1") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "changed:" in result + + def test_recap_host_names_preserved(self): + """Host names in RECAP should be preserved.""" + lines = ["PLAY [all] ****"] + ["*" * 60] + for i in range(25): + lines.append(f"TASK [task-{i}] ****") + lines.append(f"ok: [prod-server-{i}]") + lines.append("PLAY RECAP ****") + lines.append("prod-server-0 : ok=25 changed=0 unreachable=0 failed=0") + lines.append("prod-server-1 : ok=25 changed=0 unreachable=0 failed=0") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "prod-server-0" in result + assert "prod-server-1" in result + + +class TestHelmProcessor: + def setup_method(self): + self.p = HelmProcessor() + + def test_can_handle(self): + assert self.p.can_handle("helm install myrelease mychart") + assert self.p.can_handle("helm upgrade myrelease mychart") + assert self.p.can_handle("helm list") + assert self.p.can_handle("helm template mychart") + assert self.p.can_handle("helm status myrelease") + assert self.p.can_handle("helm history myrelease") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("helm install x y", "") == "" + + def test_template_many_manifests_summarized(self): + """helm template with 500 lines, 8 manifests should be summarized.""" + lines = [] + for i in range(8): + lines.append("---") + lines.append(f"kind: {'Deployment' if i < 3 else 'Service'}") + lines.append("metadata:") + lines.append(f" name: app-{i}") + for j in range(60): + lines.append(f" field{j}: value{j}") + output = "\n".join(lines) + result = self.p.process("helm template mychart", output) + assert "8 manifests" in result + assert "Deployment" in result + assert "Service" in result + assert len(result.splitlines()) < 20 + + def test_install_notes_omitted(self): + """helm install with NOTES section should omit NOTES.""" + lines = [ + "NAME: myrelease", + "NAMESPACE: default", + "STATUS: deployed", + "REVISION: 1", + "NOTES:", + ] + for i in range(30): + lines.append(f" Get the application URL by running line {i}") + output = "\n".join(lines) + result = self.p.process("helm install myrelease mychart", output) + assert "STATUS: deployed" in result + assert "NOTES section omitted" in result + assert "Get the application URL" not in result + + def test_list_many_releases_truncated(self): + """helm list with 50 releases should be truncated.""" + lines = ["NAME\tNAMESPACE\tREVISION\tSTATUS"] + for i in range(50): + lines.append(f"release-{i}\tdefault\t1\tdeployed") + output = "\n".join(lines) + result = self.p.process("helm list", output) + assert "more releases" in result + assert len(result.splitlines()) < 25 + + def test_short_output_unchanged(self): + """Short output should not be compressed.""" + output = "NAME: myrelease\nSTATUS: deployed" + result = self.p.process("helm install x y", output) + assert result == output + + +class TestSyslogProcessor: + def setup_method(self): + self.p = SyslogProcessor() + + def test_can_handle(self): + assert self.p.can_handle("journalctl -u nginx") + assert self.p.can_handle("dmesg") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("journalctl", "") == "" + + def test_short_output_unchanged(self): + output = "\n".join(f"line {i}" for i in range(25)) + result = self.p.process("dmesg", output) + assert result == output + + def test_journalctl_with_errors(self): + """journalctl with 500 lines and errors should preserve errors.""" + lines = [] + for i in range(500): + if i == 250: + lines.append("Mar 17 10:00:00 host nginx[1234]: ERROR: connection refused") + elif i == 251: + lines.append("Mar 17 10:00:01 host nginx[1234]: retrying connection") + elif i == 300: + lines.append("Mar 17 10:05:00 host nginx[1234]: fatal: out of memory") + else: + lines.append(f"Mar 17 10:00:00 host nginx[1234]: normal log line {i}") + output = "\n".join(lines) + result = self.p.process("journalctl -u nginx", output) + assert "ERROR" in result + assert "fatal" in result + assert len(result) < len(output) + + def test_dmesg_no_errors_truncated(self): + """dmesg with 200 lines and no errors should show head + tail.""" + lines = [f"[{i}.000000] Normal kernel message {i}" for i in range(200)] + output = "\n".join(lines) + result = self.p.process("dmesg", output) + assert "truncated" in result + # Head preserved + assert "Normal kernel message 0" in result + # Tail preserved + assert "Normal kernel message 199" in result + assert len(result) < len(output) From 1b0e108012b40f60af05211c19698ace4e731391 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:03:27 +0100 Subject: [PATCH 14/17] feat: allow local rsync through hook, exclude only remote rsync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split rsync from the ssh/scp exclusion — local rsync (no host:path) is now compressible via the file_listing processor, while remote rsync (with host:path like user@server:/path/) remains excluded. --- scripts/hook_pretool.py | 6 ++++-- src/processors/file_listing.py | 4 ++-- tests/test_hooks.py | 12 ++++++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/scripts/hook_pretool.py b/scripts/hook_pretool.py index a5ecf8b..c7113f6 100644 --- a/scripts/hook_pretool.py +++ b/scripts/hook_pretool.py @@ -86,7 +86,8 @@ def _load_compressible_patterns() -> list[str]: EXCLUDED_PATTERNS = [ r"(?\s", # redirections @@ -105,7 +106,8 @@ def _load_compressible_patterns() -> list[str]: r"<\(", # process substitution r"^\s*sudo\b", r"^\s*(vi|vim|nano|emacs|code)\b", - r"^\s*(ssh|scp|rsync)\b", + r"^\s*(ssh|scp)\b", + r"^\s*rsync\b.*\S+:\S+", # only exclude remote rsync (host:path) r"^\s*env\s+\S+=", r"(?:^|\s)token[-_]saver\s", r"wrap\.py", diff --git a/src/processors/file_listing.py b/src/processors/file_listing.py index 77bdcdc..b607877 100644 --- a/src/processors/file_listing.py +++ b/src/processors/file_listing.py @@ -10,7 +10,7 @@ class FileListingProcessor(Processor): priority = 50 hook_patterns = [ - r"^(ls|find|tree|dir|exa|eza)\b", + r"^(ls|find|tree|dir|exa|eza|rsync)\b", ] @property @@ -18,7 +18,7 @@ def name(self) -> str: return "file_listing" def can_handle(self, command: str) -> bool: - return bool(re.search(r"\b(ls|find|tree|dir|exa|eza)\b", command)) + return bool(re.search(r"\b(ls|find|tree|dir|exa|eza|rsync)\b", command)) def process(self, command: str, output: str) -> str: if not output or not output.strip(): diff --git a/tests/test_hooks.py b/tests/test_hooks.py index 7ca188c..bc4b18a 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -108,6 +108,18 @@ def test_interactive_commands_excluded(self): assert not is_compressible("nano file.py") assert not is_compressible("ssh server") + def test_rsync_local_compressible(self): + """Local rsync (no remote host) should be compressible.""" + assert is_compressible("rsync -av src/ dest/") + assert is_compressible("rsync -r --delete /tmp/a/ /tmp/b/") + assert is_compressible("rsync --progress ./build/ /var/www/html/") + + def test_rsync_remote_excluded(self): + """Remote rsync (with host:path) should be excluded.""" + assert not is_compressible("rsync -av src/ user@server:/path/") + assert not is_compressible("rsync -r server:/remote/path /local/path") + assert not is_compressible("rsync -e ssh file.tar.gz host:/backup/") + def test_self_wrapping_excluded(self): assert not is_compressible("python3 wrap.py git status") assert not is_compressible("python3 /path/to/token_saver/wrap.py ls") From 3491f5a2c9cef48c738553a394f78de637fc95ff Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:04:08 +0100 Subject: [PATCH 15/17] test: add priority assertions and hook patterns for new processors Update test_expected_priority_order with ansible/helm/syslog entries. Add ansible, helm, journalctl, dmesg commands to hook pattern coverage test. --- tests/test_engine.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_engine.py b/tests/test_engine.py index a625a6b..183026a 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -257,6 +257,9 @@ def test_expected_priority_order(self): assert name_to_priority["gh"] == 37 assert name_to_priority["db_query"] == 38 assert name_to_priority["cloud_cli"] == 39 + assert name_to_priority["ansible"] == 40 + assert name_to_priority["helm"] == 41 + assert name_to_priority["syslog"] == 42 assert name_to_priority["file_listing"] == 50 assert name_to_priority["file_content"] == 51 assert name_to_priority["generic"] == 999 @@ -383,6 +386,18 @@ def test_collect_hook_patterns_covers_key_commands(self): "aws ec2 describe-instances", "gcloud compute instances list", "az vm list", + # Ansible + "ansible-playbook site.yml", + "ansible all -m ping", + # Helm + "helm install my-release chart/", + "helm upgrade my-release chart/", + "helm list", + "helm template chart/", + "helm status my-release", + # Syslog + "journalctl -u nginx", + "dmesg", ] for cmd in test_commands: From 3fe8678ae5b3c2eb09299d5472fe34b5044e3bd5 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:20:43 +0100 Subject: [PATCH 16/17] docs: update README and add docs for new processors Update processor count from 18 to 21 throughout README. Add ansible, helm, syslog to processor table and project structure. Add rsync to file_listing commands. Create per-processor docs (ansible.md, helm.md, syslog.md). Update test counts (567 tests). Fix ruff lint issues (SIM103, SIM102, PIE810, RUF005). Add PLR0913 to global ignores for shared utility signatures. --- README.md | 44 +++++++++++++-------- docs/processors/ansible.md | 19 +++++++++ docs/processors/helm.md | 26 ++++++++++++ docs/processors/syslog.md | 28 +++++++++++++ pyproject.toml | 1 + src/processors/cloud_cli.py | 4 +- src/processors/docker.py | 11 ++---- src/processors/file_content.py | 5 +-- src/processors/git.py | 34 +++++++--------- src/processors/helm.py | 16 ++++---- src/processors/search.py | 24 ++++++------ src/processors/test_output.py | 27 ++++++------- src/processors/utils.py | 4 +- tests/test_processors.py | 72 ++++++++++++++++++++-------------- 14 files changed, 200 insertions(+), 115 deletions(-) create mode 100644 docs/processors/ansible.md create mode 100644 docs/processors/helm.md create mode 100644 docs/processors/syslog.md diff --git a/README.md b/README.md index dd063f3..42f450d 100644 --- a/README.md +++ b/README.md @@ -44,12 +44,13 @@ Token-Saver takes a different approach from LLM-based or caching solutions — s ``` CLI command --> Specialized processor --> Compressed output | - 18 processors + 21 processors (git, test, package_list, build, lint, network, docker, kubectl, terraform, env, search, system_info, gh, db_query, cloud_cli, + ansible, helm, syslog, file_listing, file_content, generic) ``` @@ -100,7 +101,7 @@ Gemini CLI allows direct output replacement through the deny/reason mechanism. - Compression is only applied if the gain exceeds 10% - All errors, stack traces, and actionable information are **fully preserved** - Only "noise" is removed: progress bars, passing tests, installation logs, ANSI codes, platform lines -- 478 unit tests including precision-specific tests that verify every critical piece of data survives compression +- 567 unit tests including precision-specific tests that verify every critical piece of data survives compression ## Installation @@ -245,9 +246,12 @@ processor is in [`docs/processors/`](docs/processors/). | 13 | **GitHub CLI** | 37 | gh pr/issue/run list/view/diff/checks/status | [gh.md](docs/processors/gh.md) | | 14 | **Database Query** | 38 | psql, mysql, sqlite3, pgcli, mycli, litecli | [db_query.md](docs/processors/db_query.md) | | 15 | **Cloud CLI** | 39 | aws, gcloud, az (JSON/table/text output compression) | [cloud_cli.md](docs/processors/cloud_cli.md) | -| 16 | **File Listing** | 50 | ls, find, tree, exa, eza | [file_listing.md](docs/processors/file_listing.md) | -| 17 | **File Content** | 51 | cat, head, tail, bat, less, more (content-aware: code, config, log, CSV) | [file_content.md](docs/processors/file_content.md) | -| 18 | **Generic** | 999 | Any command (fallback: ANSI strip, dedup, truncation) | [generic.md](docs/processors/generic.md) | +| 16 | **Ansible** | 40 | ansible-playbook, ansible (ok/skipped counting, error preservation) | [ansible.md](docs/processors/ansible.md) | +| 17 | **Helm** | 41 | helm install/upgrade/list/template/status/history | [helm.md](docs/processors/helm.md) | +| 18 | **Syslog** | 42 | journalctl, dmesg (head/tail with error extraction) | [syslog.md](docs/processors/syslog.md) | +| 19 | **File Listing** | 50 | ls, find, tree, exa, eza, rsync | [file_listing.md](docs/processors/file_listing.md) | +| 20 | **File Content** | 51 | cat, head, tail, bat, less, more (content-aware: code, config, log, CSV) | [file_content.md](docs/processors/file_content.md) | +| 21 | **Generic** | 999 | Any command (fallback: ANSI strip, dedup, truncation) | [generic.md](docs/processors/generic.md) | ## Configuration @@ -338,7 +342,7 @@ Project settings are merged with global settings. Token-Saver walks up parent di ## Custom Processors -You can extend Token-Saver with your own processors for commands not covered by the built-in 18. +You can extend Token-Saver with your own processors for commands not covered by the built-in 21. 1. Create a Python file with a class inheriting from `src.processors.base.Processor` 2. Implement `can_handle()`, `process()`, `name`, and set `priority` @@ -469,7 +473,7 @@ token-saver/ │ ├── stats.py # Stats display │ ├── tracker.py # SQLite tracking │ ├── version_check.py # GitHub update check -│ └── processors/ # 18 auto-discovered processors +│ └── processors/ # 21 auto-discovered processors │ ├── __init__.py │ ├── base.py # Abstract Processor class │ ├── utils.py # Shared utilities (diff compression) @@ -488,11 +492,15 @@ token-saver/ │ ├── gh.py # gh pr/issue/run list/view/diff/checks │ ├── db_query.py # psql/mysql/sqlite3/pgcli/mycli/litecli │ ├── cloud_cli.py # aws/gcloud/az -│ ├── file_listing.py # ls/find/tree/exa/eza +│ ├── ansible.py # ansible-playbook/ansible +│ ├── helm.py # helm install/upgrade/list/template/status +│ ├── syslog.py # journalctl/dmesg +│ ├── file_listing.py # ls/find/tree/exa/eza/rsync │ ├── file_content.py # cat/bat (content-aware compression) │ └── generic.py # Universal fallback ├── docs/ │ └── processors/ # Per-processor documentation +│ ├── ansible.md │ ├── build_output.md │ ├── cloud_cli.md │ ├── db_query.md @@ -503,11 +511,13 @@ token-saver/ │ ├── generic.md │ ├── gh.md │ ├── git.md +│ ├── helm.md │ ├── kubectl.md │ ├── lint_output.md │ ├── network.md │ ├── package_list.md │ ├── search.md +│ ├── syslog.md │ ├── system_info.md │ ├── terraform.md │ └── test_output.md @@ -540,17 +550,17 @@ token-saver/ python3 -m pytest tests/ -v ``` -478 tests covering: +567 tests covering: -- **test_engine.py** (28 tests): compression thresholds, processor priority, ANSI cleanup, generic fallback, hook pattern coverage for 73 commands -- **test_processors.py** (263 tests): each processor with nominal and edge cases, chained command routing, all subcommands (blame, inspect, stats, compose, apply/delete, init/output/state, fd, exa, httpie, dotnet/swift/mix test, shellcheck/hadolint/biome, traceback truncation) -- **test_hooks.py** (77 tests): matching patterns for all supported commands, exclusions (pipes, sudo, editors, redirections), subprocess integration, global options (git, docker, kubectl), chained commands, safe trailing pipes +- **test_engine.py** (28 tests): compression thresholds, processor priority, ANSI cleanup, generic fallback, hook pattern coverage for 85+ commands +- **test_processors.py** (306 tests): each processor with nominal and edge cases, chained command routing, all subcommands (blame, inspect, stats, compose, apply/delete, init/output/state, fd, exa, httpie, dotnet/swift/mix test, shellcheck/hadolint/biome, traceback truncation, ansible, helm, syslog, parameterized tests, coverage, docker compose logs, tsc typecheck, .env redaction, minified files, search directory grouping, git lockfiles/stat grouping) +- **test_hooks.py** (79 tests): matching patterns for all supported commands, exclusions (pipes, sudo, editors, redirections, remote rsync), subprocess integration, global options (git, docker, kubectl), chained commands, safe trailing pipes - **test_precision.py** (44 tests): verification that every critical piece of data survives compression (filenames, hashes, error messages, stack traces, line numbers, rule IDs, diff changes, warning types, secret redaction, unhealthy pods, terraform changes, unmet dependencies) -- **test_tracker.py** (20 tests): CRUD, concurrency (4 threads), corruption recovery, session tracking, stats CLI -- **test_config.py** (6 tests): defaults, env overrides, invalid values +- **test_tracker.py** (23 tests): CRUD, concurrency (4 threads), corruption recovery, session tracking, stats CLI +- **test_config.py** (11 tests): defaults, env overrides, invalid values - **test_version_check.py** (12 tests): version parsing, comparison, fail-open on errors -- **test_cli.py** (7 tests): version/stats/help subcommands, bin script execution -- **test_installers.py** (21 tests): version stamping, legacy migration, CLI install/uninstall +- **test_cli.py** (11 tests): version/stats/help subcommands, bin script execution +- **test_installers.py** (46 tests): version stamping, legacy migration, CLI install/uninstall ## Debugging @@ -575,7 +585,7 @@ token-saver version - Does not compress commands with complex pipelines, redirections (`> file`), or `||` chains - Simple trailing pipes are supported (`| head`, `| tail`, `| wc`, `| grep`, `| sort`, `| uniq`, `| cut`) - Chained commands (`&&`, `;`) are supported — each segment is validated individually -- `sudo`, `ssh`, `vim` commands are never intercepted +- `sudo`, `ssh`, `vim` commands are never intercepted; remote `rsync` (with host:path) is excluded but local `rsync` is compressible - Long diff compression truncates per-hunk, not per-file: a diff with many small hunks is not reduced - The generic processor only deduplicates **consecutive identical lines**, not similar lines - Gemini CLI: the deny/reason mechanism may have side effects if other extensions use the same hook diff --git a/docs/processors/ansible.md b/docs/processors/ansible.md new file mode 100644 index 0000000..f0ef18e --- /dev/null +++ b/docs/processors/ansible.md @@ -0,0 +1,19 @@ +# Ansible Processor + +**File:** `src/processors/ansible.py` | **Priority:** 40 | **Name:** `ansible` + +Handles `ansible-playbook` and `ansible` command output. + +## Supported Commands + +| Command | Strategy | +|---|---| +| `ansible-playbook` | Keeps PLAY/TASK headers, changed/failed/fatal lines, PLAY RECAP. Counts and summarizes ok/skipped tasks | +| `ansible` (ad-hoc) | Same strategy | + +## Compression Strategy + +- **Always preserved:** PLAY and TASK headers, changed/failed/fatal/unreachable lines, error messages (`msg:`), full PLAY RECAP section +- **Compressed:** ok tasks (counted), skipping tasks (counted), separator lines (`****`), included/imported lines +- **Summary:** Inserted at top, e.g. `[42 ok, 3 skipped]` +- **Threshold:** Output with 20 or fewer lines passes through unchanged diff --git a/docs/processors/helm.md b/docs/processors/helm.md new file mode 100644 index 0000000..f63c7ae --- /dev/null +++ b/docs/processors/helm.md @@ -0,0 +1,26 @@ +# Helm Processor + +**File:** `src/processors/helm.py` | **Priority:** 41 | **Name:** `helm` + +Handles Helm CLI output for chart management operations. + +## Supported Commands + +| Command | Strategy | +|---|---| +| `helm template` | Summarizes YAML manifests: counts manifests and total lines, lists each Kind/Name with line count | +| `helm install` | Keeps status lines, omits NOTES section boilerplate | +| `helm upgrade` | Same as install | +| `helm status` | Same as install | +| `helm list` | Keeps header + first 19 releases, truncates remainder with count | +| `helm history` | Keeps header + last 10 revisions, truncates older with count | +| `helm rollback` | Passes through (typically short) | +| `helm uninstall` | Passes through (typically short) | +| `helm get` | Passes through | + +## Thresholds + +- `helm template`: 50 lines before summarization +- `helm install/upgrade/status`: 20 lines before NOTES omission +- `helm list`: 25 lines before truncation +- `helm history`: 15 lines before truncation diff --git a/docs/processors/syslog.md b/docs/processors/syslog.md new file mode 100644 index 0000000..50dd10e --- /dev/null +++ b/docs/processors/syslog.md @@ -0,0 +1,28 @@ +# Syslog Processor + +**File:** `src/processors/syslog.py` | **Priority:** 42 | **Name:** `syslog` + +Handles system log output from `journalctl` and `dmesg`. + +## Supported Commands + +| Command | Strategy | +|---|---| +| `journalctl` | Head/tail compression with error extraction | +| `dmesg` | Same strategy | + +## Compression Strategy + +Uses the shared `compress_log_lines()` utility: + +- **Head:** First 10 lines preserved (boot/startup messages) +- **Tail:** Last 20 lines preserved (most recent entries) +- **Errors:** Lines matching error/exception/fatal/panic/traceback patterns are preserved with 2 lines of context +- **Error cap:** Maximum 50 error-related lines to prevent explosion on noisy logs +- **Threshold:** Output with 30 or fewer lines passes through unchanged + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| `file_log_context_lines` | 2 | Context lines around errors in log output | diff --git a/pyproject.toml b/pyproject.toml index 3bb17ed..a147739 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ ignore = [ "S101", # assert in tests is fine "PLR2004", # magic values in comparisons — too noisy for thresholds/processors "PLR0912", # too many branches — some processors are inherently complex + "PLR0913", # too many arguments — shared utilities need flexible signatures "PLR0911", # too many return statements "PLR0915", # too many statements "SIM108", # ternary instead of if/else — less readable for multi-line diff --git a/src/processors/cloud_cli.py b/src/processors/cloud_cli.py index 05d87ef..e1f7b6d 100644 --- a/src/processors/cloud_cli.py +++ b/src/processors/cloud_cli.py @@ -59,7 +59,9 @@ def _process_json(self, output: str, command: str) -> str: return self._truncate_text(lines) compressed = compress_json_value( - data, max_depth=4, important_key_re=_IMPORTANT_KEY_RE, + data, + max_depth=4, + important_key_re=_IMPORTANT_KEY_RE, ) result = json.dumps(compressed, indent=2, default=str) diff --git a/src/processors/docker.py b/src/processors/docker.py index 0a43569..abdb267 100644 --- a/src/processors/docker.py +++ b/src/processors/docker.py @@ -206,19 +206,16 @@ def _process_compose_logs(self, lines: list[str], compose_re: re.Pattern) -> str if service == "_other": continue error_count = sum( - 1 for ln in svc_lines + 1 + for ln in svc_lines if re.search(r"\b(error|ERROR|exception|fatal|FATAL|panic)\b", ln, re.I) ) - result.append( - f"\n--- {service} ({len(svc_lines)} lines, {error_count} errors) ---" - ) + result.append(f"\n--- {service} ({len(svc_lines)} lines, {error_count} errors) ---") # Show errors with context + last 3 lines errors_shown: list[str] = [] for i, line in enumerate(svc_lines): - if re.search( - r"\b(error|ERROR|exception|fatal|FATAL|panic)\b", line, re.I - ): + if re.search(r"\b(error|ERROR|exception|fatal|FATAL|panic)\b", line, re.I): start = max(0, i - 1) end = min(len(svc_lines), i + 2) for el in svc_lines[start:end]: diff --git a/src/processors/file_content.py b/src/processors/file_content.py index 20e6ccd..52c5d39 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -257,10 +257,7 @@ def _is_minified(self, ext: str, filename: str, output: str) -> bool: if len(lines) <= 3 and len(output) > 5000: return True # Average line length > 500 chars - if lines and len(output) / len(lines) > 500: - return True - - return False + return bool(lines and len(output) / len(lines) > 500) # ── .env variant detection ────────────────────────────────────── diff --git a/src/processors/git.py b/src/processors/git.py index 7535862..4367b10 100644 --- a/src/processors/git.py +++ b/src/processors/git.py @@ -176,9 +176,16 @@ def _process_status(self, output: str) -> str: return "\n".join(result) if result else output _LOCK_FILES = { - "package-lock.json", "yarn.lock", "pnpm-lock.yaml", - "poetry.lock", "Pipfile.lock", "Cargo.lock", - "composer.lock", "Gemfile.lock", "go.sum", "bun.lockb", + "package-lock.json", + "yarn.lock", + "pnpm-lock.yaml", + "poetry.lock", + "Pipfile.lock", + "Cargo.lock", + "composer.lock", + "Gemfile.lock", + "go.sum", + "bun.lockb", } def _process_diff(self, output: str, command: str = "") -> str: @@ -206,9 +213,7 @@ def _process_diff(self, output: str, command: str = "") -> str: # Flush previous lockfile summary if in_lockfile and current_file: lockfile_summaries.append(f"diff --git {current_file}") - lockfile_summaries.append( - f" (lockfile changed, {current_file_lines} lines)" - ) + lockfile_summaries.append(f" (lockfile changed, {current_file_lines} lines)") # Detect new file m = re.match(r"^diff --git a/(.+?) b/", line) filename = m.group(1).rsplit("/", 1)[-1] if m else "" @@ -229,9 +234,7 @@ def _process_diff(self, output: str, command: str = "") -> str: # Flush last lockfile if in_lockfile and current_file: lockfile_summaries.append(f"diff --git {current_file}") - lockfile_summaries.append( - f" (lockfile changed, {current_file_lines} lines)" - ) + lockfile_summaries.append(f" (lockfile changed, {current_file_lines} lines)") # Compress the non-lockfile lines, then append lockfile summaries max_hunk = config.get("max_diff_hunk_lines") @@ -277,10 +280,7 @@ def _process_name_list(self, lines: list[str]) -> str: def _process_diff_stat(self, lines: list[str]) -> str: """Compress `git diff --stat` output: strip visual bars, group when many files.""" # Count stat lines (exclude summary line) - stat_lines = [ - line for line in lines - if re.match(r"^\s*.+?\s+\|\s+\d+", line) - ] + stat_lines = [line for line in lines if re.match(r"^\s*.+?\s+\|\s+\d+", line)] if len(stat_lines) > 20: return self._group_stat_by_dir(lines) @@ -322,13 +322,9 @@ def _group_stat_by_dir(self, lines: list[str]) -> str: for dir_name, files in sorted(by_dir.items(), key=lambda x: -len(x[1])): if len(files) > 5: total_changes = sum( - int(s.group(1)) - for _, stats in files - if (s := re.search(r"(\d+)", stats)) - ) - result.append( - f" {dir_name}/ ({len(files)} files, ~{total_changes} changes)" + int(s.group(1)) for _, stats in files if (s := re.search(r"(\d+)", stats)) ) + result.append(f" {dir_name}/ ({len(files)} files, ~{total_changes} changes)") else: for filepath, stats in files: # Strip +/- visual bars from stats diff --git a/src/processors/helm.py b/src/processors/helm.py index 619717f..f25418b 100644 --- a/src/processors/helm.py +++ b/src/processors/helm.py @@ -16,10 +16,13 @@ def name(self) -> str: return "helm" def can_handle(self, command: str) -> bool: - return bool(re.search( - r"\bhelm\s+(install|upgrade|list|template|status|rollback|" - r"history|uninstall|get)\b", command - )) + return bool( + re.search( + r"\bhelm\s+(install|upgrade|list|template|status|rollback|" + r"history|uninstall|get)\b", + command, + ) + ) def process(self, command: str, output: str) -> str: if not output or not output.strip(): @@ -59,9 +62,8 @@ def _process_template(self, output: str) -> str: continue if stripped.startswith("kind:"): current_kind = stripped.split(":", 1)[1].strip() - elif ( - stripped.startswith(" name:") or - (stripped.startswith("name:") and not current_name) + elif stripped.startswith(" name:") or ( + stripped.startswith("name:") and not current_name ): current_name = stripped.split(":", 1)[1].strip() current_lines += 1 diff --git a/src/processors/search.py b/src/processors/search.py index 9270a3e..a5e7bf9 100644 --- a/src/processors/search.py +++ b/src/processors/search.py @@ -75,7 +75,9 @@ def process(self, command: str, output: str) -> str: if total_files > 30: return self._process_grouped_by_dir( - by_file, total_matches, total_files, + by_file, + total_matches, + total_files, ) result = [f"{total_matches} matches across {total_files} files:"] @@ -104,7 +106,10 @@ def process(self, command: str, output: str) -> str: return "\n".join(result) def _process_grouped_by_dir( - self, by_file: dict, total_matches: int, total_files: int, + self, + by_file: dict, + total_matches: int, + total_files: int, ) -> str: """Group search results by directory for large result sets.""" max_per_file = config.get("search_max_per_file") @@ -117,8 +122,7 @@ def _process_grouped_by_dir( by_dir.setdefault(dir_name, {})[filepath] = matches result = [ - f"{total_matches} matches across {total_files} files " - f"in {len(by_dir)} directories:" + f"{total_matches} matches across {total_files} files in {len(by_dir)} directories:" ] dirs_shown = 0 @@ -128,14 +132,10 @@ def _process_grouped_by_dir( if dirs_shown >= max_files: break dir_matches = sum(len(v) for v in files.values()) - result.append( - f"\n{dir_name}/ ({dir_matches} matches in {len(files)} files)" - ) + result.append(f"\n{dir_name}/ ({dir_matches} matches in {len(files)} files)") # Show top 3 files in this directory - for filepath, matches in sorted( - files.items(), key=lambda x: -len(x[1]) - )[:3]: + for filepath, matches in sorted(files.items(), key=lambda x: -len(x[1]))[:3]: fname = filepath.rsplit("/", 1)[-1] if len(matches) > max_per_file: result.append(f" {fname}: ({len(matches)} matches)") @@ -148,9 +148,7 @@ def _process_grouped_by_dir( remaining_files = len(files) - 3 if remaining_files > 0: - result.append( - f" ... ({remaining_files} more files in this directory)" - ) + result.append(f" ... ({remaining_files} more files in this directory)") dirs_shown += 1 diff --git a/src/processors/test_output.py b/src/processors/test_output.py index 2e0ac6f..3160e1c 100644 --- a/src/processors/test_output.py +++ b/src/processors/test_output.py @@ -199,8 +199,7 @@ def _process_pytest(self, lines: list[str]) -> str: if len(info["failed"]) > 5: extra = f", ... ({len(info['failed']) - 5} more)" result.append( - f"{base}: {info['passed']}/{total} passed, " - f"FAILED: [{failed_params}{extra}]" + f"{base}: {info['passed']}/{total} passed, FAILED: [{failed_params}{extra}]" ) if passed_count > 0: @@ -220,15 +219,17 @@ def _extract_coverage(self, lines: list[str]) -> list[str]: coverage_end = None for i, line in enumerate(lines): stripped = line.strip() - if re.match(r"^-+ coverage", stripped) or re.match( - r"^Name\s+Stmts\s+Miss", stripped + if coverage_start is None and ( + re.match(r"^-+ coverage", stripped) or re.match(r"^Name\s+Stmts\s+Miss", stripped) ): - if coverage_start is None: - coverage_start = i - if coverage_start is not None and i > coverage_start: - if re.match(r"^TOTAL\s+", stripped): - coverage_end = i - break + coverage_start = i + if ( + coverage_start is not None + and i > coverage_start + and re.match(r"^TOTAL\s+", stripped) + ): + coverage_end = i + break if coverage_start is None: return [] end = coverage_end + 1 if coverage_end is not None else len(lines) @@ -245,7 +246,7 @@ def _compress_coverage(self, lines: list[str]) -> list[str]: if stripped.startswith("TOTAL"): total_line = stripped continue - if stripped.startswith("Name") or stripped.startswith("-"): + if stripped.startswith(("Name", "-")): continue # Parse: filename stmts miss cover% m = re.match(r"^(\S+)\s+\d+\s+\d+\s+(\d+)%", stripped) @@ -257,9 +258,7 @@ def _compress_coverage(self, lines: list[str]) -> list[str]: if total_line: result.append(total_line) if low_coverage_files: - result.append( - f"Files below 80% coverage ({len(low_coverage_files)}):" - ) + result.append(f"Files below 80% coverage ({len(low_coverage_files)}):") for f in low_coverage_files[:10]: result.append(f" {f}") if len(low_coverage_files) > 10: diff --git a/src/processors/utils.py b/src/processors/utils.py index 0bef482..619593f 100644 --- a/src/processors/utils.py +++ b/src/processors/utils.py @@ -200,9 +200,7 @@ def compress_log_lines( result = result[: keep_head + 1 + max_error_lines] result.append(f" ... ({len(sorted_indices) - max_error_lines} more error lines)") else: - result.append( - f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n" - ) + result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") result.extend(tail) return "\n".join(result) diff --git a/tests/test_processors.py b/tests/test_processors.py index b28a6bf..89fc10a 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -6,6 +6,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from src.chain_utils import extract_primary_command +from src.processors.ansible import AnsibleProcessor from src.processors.build_output import BuildOutputProcessor from src.processors.cloud_cli import CloudCliProcessor from src.processors.db_query import DbQueryProcessor @@ -16,17 +17,16 @@ from src.processors.generic import GenericProcessor from src.processors.gh import GhProcessor from src.processors.git import GitProcessor +from src.processors.helm import HelmProcessor from src.processors.kubectl import KubectlProcessor from src.processors.lint_output import LintOutputProcessor from src.processors.network import NetworkProcessor from src.processors.package_list import PackageListProcessor from src.processors.search import SearchProcessor +from src.processors.syslog import SyslogProcessor from src.processors.system_info import SystemInfoProcessor from src.processors.terraform import TerraformProcessor from src.processors.test_output import TestOutputProcessor -from src.processors.ansible import AnsibleProcessor -from src.processors.helm import HelmProcessor -from src.processors.syslog import SyslogProcessor class TestGitProcessor: @@ -1378,12 +1378,14 @@ def setup_method(self): def test_env_production_redacted(self): """cat .env.production should redact sensitive values.""" - output = "\n".join([ - "APP_NAME=myapp", - "API_KEY=secret123", - "DATABASE_URL=postgres://user:pass@host/db", - "DEBUG=true", - ]) + output = "\n".join( + [ + "APP_NAME=myapp", + "API_KEY=secret123", + "DATABASE_URL=postgres://user:pass@host/db", + "DEBUG=true", + ] + ) result = self.p.process("cat .env.production", output) assert "API_KEY=***" in result assert "DATABASE_URL=***" in result @@ -1393,10 +1395,12 @@ def test_env_production_redacted(self): def test_env_example_passthrough(self): """cat .env.example should pass through (template file).""" - output = "\n".join([ - "API_KEY=your_api_key_here", - "SECRET=change_me", - ]) + output = "\n".join( + [ + "API_KEY=your_api_key_here", + "SECRET=change_me", + ] + ) result = self.p.process("cat .env.example", output) assert result == output @@ -2880,7 +2884,7 @@ def test_mixed_lockfile_and_normal(self): "@@ -1,200 +1,200 @@", ] for i in range(200): - lines.append(f'+pkg-{i}@^{i}.0.0:') + lines.append(f"+pkg-{i}@^{i}.0.0:") output = "\n".join(lines) result = self.p.process("git diff", output) # Normal file is preserved @@ -2892,13 +2896,15 @@ def test_mixed_lockfile_and_normal(self): def test_normal_files_only_unchanged(self): """Diffs with only normal files should be compressed normally.""" - output = "\n".join([ - "diff --git a/src/app.py b/src/app.py", - "@@ -1,3 +1,3 @@", - "-old", - "+new", - " context", - ]) + output = "\n".join( + [ + "diff --git a/src/app.py b/src/app.py", + "@@ -1,3 +1,3 @@", + "-old", + "+new", + " context", + ] + ) result = self.p.process("git diff", output) assert "lockfile" not in result assert "+new" in result @@ -2959,9 +2965,13 @@ def test_tsc_noemit_errors_grouped(self): for i in range(30): lines.append(f"src/file{i}.ts(10,5): error TS2322: Type 'string' is not assignable.") for i in range(15): - lines.append(f"src/util{i}.ts:5:3 - error TS2345: Argument of type 'number' not assignable.") + lines.append( + f"src/util{i}.ts:5:3 - error TS2345: Argument of type 'number' not assignable." + ) for i in range(5): - lines.append(f"src/other{i}.ts(1,1): error TS7006: Parameter implicitly has 'any' type.") + lines.append( + f"src/other{i}.ts(1,1): error TS7006: Parameter implicitly has 'any' type." + ) lines.append("Found 50 errors in 50 files.") output = "\n".join(lines) result = self.p.process("tsc --noEmit", output) @@ -2979,11 +2989,13 @@ def test_tsc_build_unchanged(self): def test_tsc_error_codes_preserved(self): """Error codes and file paths should be preserved in examples.""" - output = "\n".join([ - "src/app.ts(10,5): error TS2322: Type 'string' is not assignable.", - "src/app.ts(20,3): error TS2322: Type 'number' is not assignable.", - "Found 2 errors.", - ]) + output = "\n".join( + [ + "src/app.ts(10,5): error TS2322: Type 'string' is not assignable.", + "src/app.ts(20,3): error TS2322: Type 'number' is not assignable.", + "Found 2 errors.", + ] + ) result = self.p.process("tsc --noEmit", output) assert "TS2322" in result assert "src/app.ts" in result @@ -3342,7 +3354,7 @@ def test_failed_tasks_preserved(self): def test_changed_tasks_preserved(self): """Changed tasks should be kept in output.""" - lines = ["PLAY [all] ****"] + ["*" * 60] + lines = ["PLAY [all] ****", "*" * 60] for i in range(10): lines.append(f"TASK [task-{i}] ****") lines.append("*" * 60) @@ -3358,7 +3370,7 @@ def test_changed_tasks_preserved(self): def test_recap_host_names_preserved(self): """Host names in RECAP should be preserved.""" - lines = ["PLAY [all] ****"] + ["*" * 60] + lines = ["PLAY [all] ****", "*" * 60] for i in range(25): lines.append(f"TASK [task-{i}] ****") lines.append(f"ok: [prod-server-{i}]") From 9b89c766fa3041ec14b047c5eda3e8afac31d91e Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:31:03 +0100 Subject: [PATCH 17/17] chore: bump version to 2.1.1, improve README marketing Bump version across all manifests (src/__init__.py, pyproject.toml, plugin.json, marketplace.json). README improvements: - Stronger value proposition in header - Added docker compose and helm template to Before & After table - Precision guarantees: source code passthrough, .env redaction - Updated processor descriptions in plugin manifests Fix helm NOTES omission bug: NOTES section is now fully stripped instead of leaking non-indented content lines. --- .claude-plugin/marketplace.json | 4 ++-- .claude-plugin/plugin.json | 4 ++-- README.md | 23 +++++++++++++---------- pyproject.toml | 2 +- src/__init__.py | 2 +- src/processors/helm.py | 14 ++++++-------- 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 8992331..e6f06a2 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,8 +11,8 @@ { "name": "token-saver", "source": "./", - "description": "Automatically compresses verbose CLI output to save tokens. Supports git, docker, npm, terraform, kubectl, and 13+ other command families.", - "version": "2.0.2", + "description": "Automatically compresses verbose CLI output to save tokens. 21 specialized processors for git, docker, npm, terraform, kubectl, helm, ansible, and more.", + "version": "2.1.1", "author": { "name": "ppgranger" }, diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index b1422dc..ecf48d3 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "token-saver", - "description": "Automatically compresses verbose CLI output (git, docker, npm, terraform, kubectl, etc.) to save tokens in Claude Code sessions. Supports 18+ command families with smart compression.", - "version": "2.0.2", + "description": "Automatically compresses verbose CLI output (git, docker, npm, terraform, kubectl, etc.) to save tokens in Claude Code sessions. 21 specialized processors with content-aware compression.", + "version": "2.1.1", "author": { "name": "ppgranger", "url": "https://github.com/ppgranger" diff --git a/README.md b/README.md index 42f450d..507e897 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,11 @@ [![License](https://img.shields.io/badge/license-Apache%202.0-blue)](LICENSE) [![Avg Savings](docs/assets/badge-savings.svg)](docs/processors/) -**Content-aware output compression for AI coding assistants.** -Replaces blind truncation with intelligent, per-command strategies — preserving what the model needs, discarding what it doesn't. +**Cut your AI coding costs by 60-99% on CLI output — without losing a single error message.** -Compatible with **Claude Code** and **Gemini CLI**. Zero latency. No LLM calls. Deterministic. +21 specialized processors understand git, pytest, docker, terraform, kubectl, helm, ansible, and more. Each one knows what to keep and what to discard: errors, diffs, and actionable data stay; progress bars, passing tests, and boilerplate go. + +Compatible with **Claude Code** and **Gemini CLI**. Zero latency. No LLM calls. Fully deterministic. One install, instant savings. ### Before & After @@ -20,18 +21,16 @@ Compatible with **Claude Code** and **Gemini CLI**. Zero latency. No LLM calls. | `npm install` (220 packages) | 3,844 tokens | 4 tokens | **99%** | | `terraform plan` (15 resources) | 1,840 tokens | 137 tokens | **93%** | | `kubectl get pods` (40 pods) | 1,393 tokens | 79 tokens | **94%** | +| `docker compose logs` (4 services) | 3,200 tokens | 480 tokens | **85%** | +| `helm template` (12 manifests) | 2,100 tokens | 210 tokens | **90%** | > Run `token-saver benchmark ` to measure savings on your own workloads. ## Why -AI assistants in CLI consume tokens on every command output. -A 500-line `git diff`, a `pytest` run with 200 passing tests, an `npm install` -with 80 packages: everything is sent as-is to the model, which only needs -the actionable information (errors, modified files, results). +Every CLI command your AI assistant runs burns tokens — and most of that output is noise. A 500-line `git diff`, a `pytest` run with 200 passing tests, an `npm install` with 80 packages: the model only needs errors, modified files, and results. Everything else is wasted context and wasted money. -Token-Saver intercepts these outputs and compresses them before they reach -the model, preserving 100% of useful information. +Token-Saver sits between the CLI and your AI assistant, compressing output with content-aware strategies. The model sees exactly what it needs — nothing more, nothing less. Your context window stays clean, your costs drop, and your assistant responds faster with less noise to process. ## How It Compares @@ -97,11 +96,15 @@ Gemini CLI allows direct output replacement through the deny/reason mechanism. ### Precision Guarantees +Compression is aggressive on noise, conservative on signal: + - Short outputs (< 200 characters) are **never** modified - Compression is only applied if the gain exceeds 10% - All errors, stack traces, and actionable information are **fully preserved** +- Source code files (`cat *.py`, `cat *.ts`, ...) pass through **unchanged** — the model needs exact content +- Secrets in `.env` files are automatically **redacted** before reaching the model - Only "noise" is removed: progress bars, passing tests, installation logs, ANSI codes, platform lines -- 567 unit tests including precision-specific tests that verify every critical piece of data survives compression +- 567 unit tests including 44 precision-specific tests that verify every critical piece of data survives compression ## Installation diff --git a/pyproject.toml b/pyproject.toml index a147739..06d353a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "token-saver" -version = "2.0.2" +version = "2.1.1" requires-python = ">=3.10" [project.optional-dependencies] diff --git a/src/__init__.py b/src/__init__.py index f489d68..666977b 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,6 +1,6 @@ import os -__version__ = "2.0.2" +__version__ = "2.1.1" def data_dir() -> str: diff --git a/src/processors/helm.py b/src/processors/helm.py index f25418b..2810ee6 100644 --- a/src/processors/helm.py +++ b/src/processors/helm.py @@ -84,28 +84,26 @@ def _process_install(self, output: str) -> str: result = [] in_notes = False + notes_count = 0 for line in lines: stripped = line.strip() if stripped.startswith("NOTES:"): in_notes = True - result.append("[NOTES section omitted]") + notes_count = 0 continue if in_notes: - if ( - stripped - and not line.startswith((" ", "\t")) - and not stripped.startswith("NOTES:") - ): - in_notes = False - result.append(line) + notes_count += 1 continue if stripped: result.append(line) + if notes_count > 0: + result.append(f"[NOTES section omitted ({notes_count} lines)]") + return "\n".join(result) if result else output def _process_list(self, output: str) -> str: