diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 8992331..e6f06a2 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,8 +11,8 @@ { "name": "token-saver", "source": "./", - "description": "Automatically compresses verbose CLI output to save tokens. Supports git, docker, npm, terraform, kubectl, and 13+ other command families.", - "version": "2.0.2", + "description": "Automatically compresses verbose CLI output to save tokens. 21 specialized processors for git, docker, npm, terraform, kubectl, helm, ansible, and more.", + "version": "2.1.1", "author": { "name": "ppgranger" }, diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index b1422dc..ecf48d3 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "token-saver", - "description": "Automatically compresses verbose CLI output (git, docker, npm, terraform, kubectl, etc.) to save tokens in Claude Code sessions. Supports 18+ command families with smart compression.", - "version": "2.0.2", + "description": "Automatically compresses verbose CLI output (git, docker, npm, terraform, kubectl, etc.) to save tokens in Claude Code sessions. 21 specialized processors with content-aware compression.", + "version": "2.1.1", "author": { "name": "ppgranger", "url": "https://github.com/ppgranger" diff --git a/README.md b/README.md index dd063f3..507e897 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,11 @@ [![License](https://img.shields.io/badge/license-Apache%202.0-blue)](LICENSE) [![Avg Savings](docs/assets/badge-savings.svg)](docs/processors/) -**Content-aware output compression for AI coding assistants.** -Replaces blind truncation with intelligent, per-command strategies — preserving what the model needs, discarding what it doesn't. +**Cut your AI coding costs by 60-99% on CLI output — without losing a single error message.** -Compatible with **Claude Code** and **Gemini CLI**. Zero latency. No LLM calls. Deterministic. +21 specialized processors understand git, pytest, docker, terraform, kubectl, helm, ansible, and more. Each one knows what to keep and what to discard: errors, diffs, and actionable data stay; progress bars, passing tests, and boilerplate go. + +Compatible with **Claude Code** and **Gemini CLI**. Zero latency. No LLM calls. Fully deterministic. One install, instant savings. ### Before & After @@ -20,18 +21,16 @@ Compatible with **Claude Code** and **Gemini CLI**. Zero latency. No LLM calls. | `npm install` (220 packages) | 3,844 tokens | 4 tokens | **99%** | | `terraform plan` (15 resources) | 1,840 tokens | 137 tokens | **93%** | | `kubectl get pods` (40 pods) | 1,393 tokens | 79 tokens | **94%** | +| `docker compose logs` (4 services) | 3,200 tokens | 480 tokens | **85%** | +| `helm template` (12 manifests) | 2,100 tokens | 210 tokens | **90%** | > Run `token-saver benchmark ` to measure savings on your own workloads. ## Why -AI assistants in CLI consume tokens on every command output. -A 500-line `git diff`, a `pytest` run with 200 passing tests, an `npm install` -with 80 packages: everything is sent as-is to the model, which only needs -the actionable information (errors, modified files, results). +Every CLI command your AI assistant runs burns tokens — and most of that output is noise. A 500-line `git diff`, a `pytest` run with 200 passing tests, an `npm install` with 80 packages: the model only needs errors, modified files, and results. Everything else is wasted context and wasted money. -Token-Saver intercepts these outputs and compresses them before they reach -the model, preserving 100% of useful information. +Token-Saver sits between the CLI and your AI assistant, compressing output with content-aware strategies. The model sees exactly what it needs — nothing more, nothing less. Your context window stays clean, your costs drop, and your assistant responds faster with less noise to process. ## How It Compares @@ -44,12 +43,13 @@ Token-Saver takes a different approach from LLM-based or caching solutions — s ``` CLI command --> Specialized processor --> Compressed output | - 18 processors + 21 processors (git, test, package_list, build, lint, network, docker, kubectl, terraform, env, search, system_info, gh, db_query, cloud_cli, + ansible, helm, syslog, file_listing, file_content, generic) ``` @@ -96,11 +96,15 @@ Gemini CLI allows direct output replacement through the deny/reason mechanism. ### Precision Guarantees +Compression is aggressive on noise, conservative on signal: + - Short outputs (< 200 characters) are **never** modified - Compression is only applied if the gain exceeds 10% - All errors, stack traces, and actionable information are **fully preserved** +- Source code files (`cat *.py`, `cat *.ts`, ...) pass through **unchanged** — the model needs exact content +- Secrets in `.env` files are automatically **redacted** before reaching the model - Only "noise" is removed: progress bars, passing tests, installation logs, ANSI codes, platform lines -- 478 unit tests including precision-specific tests that verify every critical piece of data survives compression +- 567 unit tests including 44 precision-specific tests that verify every critical piece of data survives compression ## Installation @@ -245,9 +249,12 @@ processor is in [`docs/processors/`](docs/processors/). | 13 | **GitHub CLI** | 37 | gh pr/issue/run list/view/diff/checks/status | [gh.md](docs/processors/gh.md) | | 14 | **Database Query** | 38 | psql, mysql, sqlite3, pgcli, mycli, litecli | [db_query.md](docs/processors/db_query.md) | | 15 | **Cloud CLI** | 39 | aws, gcloud, az (JSON/table/text output compression) | [cloud_cli.md](docs/processors/cloud_cli.md) | -| 16 | **File Listing** | 50 | ls, find, tree, exa, eza | [file_listing.md](docs/processors/file_listing.md) | -| 17 | **File Content** | 51 | cat, head, tail, bat, less, more (content-aware: code, config, log, CSV) | [file_content.md](docs/processors/file_content.md) | -| 18 | **Generic** | 999 | Any command (fallback: ANSI strip, dedup, truncation) | [generic.md](docs/processors/generic.md) | +| 16 | **Ansible** | 40 | ansible-playbook, ansible (ok/skipped counting, error preservation) | [ansible.md](docs/processors/ansible.md) | +| 17 | **Helm** | 41 | helm install/upgrade/list/template/status/history | [helm.md](docs/processors/helm.md) | +| 18 | **Syslog** | 42 | journalctl, dmesg (head/tail with error extraction) | [syslog.md](docs/processors/syslog.md) | +| 19 | **File Listing** | 50 | ls, find, tree, exa, eza, rsync | [file_listing.md](docs/processors/file_listing.md) | +| 20 | **File Content** | 51 | cat, head, tail, bat, less, more (content-aware: code, config, log, CSV) | [file_content.md](docs/processors/file_content.md) | +| 21 | **Generic** | 999 | Any command (fallback: ANSI strip, dedup, truncation) | [generic.md](docs/processors/generic.md) | ## Configuration @@ -338,7 +345,7 @@ Project settings are merged with global settings. Token-Saver walks up parent di ## Custom Processors -You can extend Token-Saver with your own processors for commands not covered by the built-in 18. +You can extend Token-Saver with your own processors for commands not covered by the built-in 21. 1. Create a Python file with a class inheriting from `src.processors.base.Processor` 2. Implement `can_handle()`, `process()`, `name`, and set `priority` @@ -469,7 +476,7 @@ token-saver/ │ ├── stats.py # Stats display │ ├── tracker.py # SQLite tracking │ ├── version_check.py # GitHub update check -│ └── processors/ # 18 auto-discovered processors +│ └── processors/ # 21 auto-discovered processors │ ├── __init__.py │ ├── base.py # Abstract Processor class │ ├── utils.py # Shared utilities (diff compression) @@ -488,11 +495,15 @@ token-saver/ │ ├── gh.py # gh pr/issue/run list/view/diff/checks │ ├── db_query.py # psql/mysql/sqlite3/pgcli/mycli/litecli │ ├── cloud_cli.py # aws/gcloud/az -│ ├── file_listing.py # ls/find/tree/exa/eza +│ ├── ansible.py # ansible-playbook/ansible +│ ├── helm.py # helm install/upgrade/list/template/status +│ ├── syslog.py # journalctl/dmesg +│ ├── file_listing.py # ls/find/tree/exa/eza/rsync │ ├── file_content.py # cat/bat (content-aware compression) │ └── generic.py # Universal fallback ├── docs/ │ └── processors/ # Per-processor documentation +│ ├── ansible.md │ ├── build_output.md │ ├── cloud_cli.md │ ├── db_query.md @@ -503,11 +514,13 @@ token-saver/ │ ├── generic.md │ ├── gh.md │ ├── git.md +│ ├── helm.md │ ├── kubectl.md │ ├── lint_output.md │ ├── network.md │ ├── package_list.md │ ├── search.md +│ ├── syslog.md │ ├── system_info.md │ ├── terraform.md │ └── test_output.md @@ -540,17 +553,17 @@ token-saver/ python3 -m pytest tests/ -v ``` -478 tests covering: +567 tests covering: -- **test_engine.py** (28 tests): compression thresholds, processor priority, ANSI cleanup, generic fallback, hook pattern coverage for 73 commands -- **test_processors.py** (263 tests): each processor with nominal and edge cases, chained command routing, all subcommands (blame, inspect, stats, compose, apply/delete, init/output/state, fd, exa, httpie, dotnet/swift/mix test, shellcheck/hadolint/biome, traceback truncation) -- **test_hooks.py** (77 tests): matching patterns for all supported commands, exclusions (pipes, sudo, editors, redirections), subprocess integration, global options (git, docker, kubectl), chained commands, safe trailing pipes +- **test_engine.py** (28 tests): compression thresholds, processor priority, ANSI cleanup, generic fallback, hook pattern coverage for 85+ commands +- **test_processors.py** (306 tests): each processor with nominal and edge cases, chained command routing, all subcommands (blame, inspect, stats, compose, apply/delete, init/output/state, fd, exa, httpie, dotnet/swift/mix test, shellcheck/hadolint/biome, traceback truncation, ansible, helm, syslog, parameterized tests, coverage, docker compose logs, tsc typecheck, .env redaction, minified files, search directory grouping, git lockfiles/stat grouping) +- **test_hooks.py** (79 tests): matching patterns for all supported commands, exclusions (pipes, sudo, editors, redirections, remote rsync), subprocess integration, global options (git, docker, kubectl), chained commands, safe trailing pipes - **test_precision.py** (44 tests): verification that every critical piece of data survives compression (filenames, hashes, error messages, stack traces, line numbers, rule IDs, diff changes, warning types, secret redaction, unhealthy pods, terraform changes, unmet dependencies) -- **test_tracker.py** (20 tests): CRUD, concurrency (4 threads), corruption recovery, session tracking, stats CLI -- **test_config.py** (6 tests): defaults, env overrides, invalid values +- **test_tracker.py** (23 tests): CRUD, concurrency (4 threads), corruption recovery, session tracking, stats CLI +- **test_config.py** (11 tests): defaults, env overrides, invalid values - **test_version_check.py** (12 tests): version parsing, comparison, fail-open on errors -- **test_cli.py** (7 tests): version/stats/help subcommands, bin script execution -- **test_installers.py** (21 tests): version stamping, legacy migration, CLI install/uninstall +- **test_cli.py** (11 tests): version/stats/help subcommands, bin script execution +- **test_installers.py** (46 tests): version stamping, legacy migration, CLI install/uninstall ## Debugging @@ -575,7 +588,7 @@ token-saver version - Does not compress commands with complex pipelines, redirections (`> file`), or `||` chains - Simple trailing pipes are supported (`| head`, `| tail`, `| wc`, `| grep`, `| sort`, `| uniq`, `| cut`) - Chained commands (`&&`, `;`) are supported — each segment is validated individually -- `sudo`, `ssh`, `vim` commands are never intercepted +- `sudo`, `ssh`, `vim` commands are never intercepted; remote `rsync` (with host:path) is excluded but local `rsync` is compressible - Long diff compression truncates per-hunk, not per-file: a diff with many small hunks is not reduced - The generic processor only deduplicates **consecutive identical lines**, not similar lines - Gemini CLI: the deny/reason mechanism may have side effects if other extensions use the same hook diff --git a/docs/processors/ansible.md b/docs/processors/ansible.md new file mode 100644 index 0000000..f0ef18e --- /dev/null +++ b/docs/processors/ansible.md @@ -0,0 +1,19 @@ +# Ansible Processor + +**File:** `src/processors/ansible.py` | **Priority:** 40 | **Name:** `ansible` + +Handles `ansible-playbook` and `ansible` command output. + +## Supported Commands + +| Command | Strategy | +|---|---| +| `ansible-playbook` | Keeps PLAY/TASK headers, changed/failed/fatal lines, PLAY RECAP. Counts and summarizes ok/skipped tasks | +| `ansible` (ad-hoc) | Same strategy | + +## Compression Strategy + +- **Always preserved:** PLAY and TASK headers, changed/failed/fatal/unreachable lines, error messages (`msg:`), full PLAY RECAP section +- **Compressed:** ok tasks (counted), skipping tasks (counted), separator lines (`****`), included/imported lines +- **Summary:** Inserted at top, e.g. `[42 ok, 3 skipped]` +- **Threshold:** Output with 20 or fewer lines passes through unchanged diff --git a/docs/processors/helm.md b/docs/processors/helm.md new file mode 100644 index 0000000..f63c7ae --- /dev/null +++ b/docs/processors/helm.md @@ -0,0 +1,26 @@ +# Helm Processor + +**File:** `src/processors/helm.py` | **Priority:** 41 | **Name:** `helm` + +Handles Helm CLI output for chart management operations. + +## Supported Commands + +| Command | Strategy | +|---|---| +| `helm template` | Summarizes YAML manifests: counts manifests and total lines, lists each Kind/Name with line count | +| `helm install` | Keeps status lines, omits NOTES section boilerplate | +| `helm upgrade` | Same as install | +| `helm status` | Same as install | +| `helm list` | Keeps header + first 19 releases, truncates remainder with count | +| `helm history` | Keeps header + last 10 revisions, truncates older with count | +| `helm rollback` | Passes through (typically short) | +| `helm uninstall` | Passes through (typically short) | +| `helm get` | Passes through | + +## Thresholds + +- `helm template`: 50 lines before summarization +- `helm install/upgrade/status`: 20 lines before NOTES omission +- `helm list`: 25 lines before truncation +- `helm history`: 15 lines before truncation diff --git a/docs/processors/syslog.md b/docs/processors/syslog.md new file mode 100644 index 0000000..50dd10e --- /dev/null +++ b/docs/processors/syslog.md @@ -0,0 +1,28 @@ +# Syslog Processor + +**File:** `src/processors/syslog.py` | **Priority:** 42 | **Name:** `syslog` + +Handles system log output from `journalctl` and `dmesg`. + +## Supported Commands + +| Command | Strategy | +|---|---| +| `journalctl` | Head/tail compression with error extraction | +| `dmesg` | Same strategy | + +## Compression Strategy + +Uses the shared `compress_log_lines()` utility: + +- **Head:** First 10 lines preserved (boot/startup messages) +- **Tail:** Last 20 lines preserved (most recent entries) +- **Errors:** Lines matching error/exception/fatal/panic/traceback patterns are preserved with 2 lines of context +- **Error cap:** Maximum 50 error-related lines to prevent explosion on noisy logs +- **Threshold:** Output with 30 or fewer lines passes through unchanged + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| `file_log_context_lines` | 2 | Context lines around errors in log output | diff --git a/pyproject.toml b/pyproject.toml index 3bb17ed..06d353a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "token-saver" -version = "2.0.2" +version = "2.1.1" requires-python = ">=3.10" [project.optional-dependencies] @@ -44,6 +44,7 @@ ignore = [ "S101", # assert in tests is fine "PLR2004", # magic values in comparisons — too noisy for thresholds/processors "PLR0912", # too many branches — some processors are inherently complex + "PLR0913", # too many arguments — shared utilities need flexible signatures "PLR0911", # too many return statements "PLR0915", # too many statements "SIM108", # ternary instead of if/else — less readable for multi-line diff --git a/scripts/hook_pretool.py b/scripts/hook_pretool.py index a5ecf8b..c7113f6 100644 --- a/scripts/hook_pretool.py +++ b/scripts/hook_pretool.py @@ -86,7 +86,8 @@ def _load_compressible_patterns() -> list[str]: EXCLUDED_PATTERNS = [ r"(?\s", # redirections @@ -105,7 +106,8 @@ def _load_compressible_patterns() -> list[str]: r"<\(", # process substitution r"^\s*sudo\b", r"^\s*(vi|vim|nano|emacs|code)\b", - r"^\s*(ssh|scp|rsync)\b", + r"^\s*(ssh|scp)\b", + r"^\s*rsync\b.*\S+:\S+", # only exclude remote rsync (host:path) r"^\s*env\s+\S+=", r"(?:^|\s)token[-_]saver\s", r"wrap\.py", diff --git a/src/__init__.py b/src/__init__.py index f489d68..666977b 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,6 +1,6 @@ import os -__version__ = "2.0.2" +__version__ = "2.1.1" def data_dir() -> str: diff --git a/src/processors/ansible.py b/src/processors/ansible.py new file mode 100644 index 0000000..d346a46 --- /dev/null +++ b/src/processors/ansible.py @@ -0,0 +1,99 @@ +"""Ansible output processor: ansible-playbook, ansible.""" + +import re + +from .base import Processor + + +class AnsibleProcessor(Processor): + priority = 40 + hook_patterns = [ + r"^ansible(-playbook)?\b", + ] + + @property + def name(self) -> str: + return "ansible" + + def can_handle(self, command: str) -> bool: + return bool(re.search(r"\b(ansible-playbook|ansible)\b", command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + if len(lines) <= 20: + return output + + result = [] + ok_count = 0 + skipped_count = 0 + in_recap = False + + for line in lines: + stripped = line.strip() + + # PLAY RECAP is always kept in full + if stripped.startswith("PLAY RECAP"): + in_recap = True + result.append(line) + continue + + if in_recap: + result.append(line) + continue + + # PLAY and TASK headers — keep + if re.match(r"^(PLAY|TASK)\s+\[", stripped): + result.append(line) + continue + + # Separator lines (****) + if re.match(r"^\*+$", stripped): + continue + + # changed — always keep + if re.match(r"^changed:", stripped): + result.append(line) + continue + + # failed / fatal / unreachable — always keep + if re.match(r"^(fatal|failed|unreachable):", stripped, re.I): + result.append(line) + continue + + # Error/warning output lines (indented after fatal/failed) + if re.search(r"\b(ERROR|FAILED|UNREACHABLE|fatal)\b", stripped): + result.append(line) + continue + + # "msg:" lines (error messages) — keep + if re.match(r'^\s*"?msg"?\s*:', stripped): + result.append(line) + continue + + # ok — count and skip + if re.match(r"^ok:", stripped): + ok_count += 1 + continue + + # skipping — count and skip + if re.match(r"^skipping:", stripped): + skipped_count += 1 + continue + + # included/imported — skip + if re.match(r"^(included|imported):", stripped): + continue + + # Insert summary at the top + summary_parts = [] + if ok_count: + summary_parts.append(f"{ok_count} ok") + if skipped_count: + summary_parts.append(f"{skipped_count} skipped") + if summary_parts: + result.insert(0, f"[{', '.join(summary_parts)}]") + + return "\n".join(result) if result else output diff --git a/src/processors/build_output.py b/src/processors/build_output.py index 4abffd5..d2ea7bc 100644 --- a/src/processors/build_output.py +++ b/src/processors/build_output.py @@ -45,6 +45,10 @@ def process(self, command: str, output: str) -> str: if not output or not output.strip(): return output + # tsc --noEmit is a type-check (lint), not a build — group errors by code + if re.search(r"\btsc\b.*--noEmit", command): + return self._process_tsc_typecheck(output) + # Piped output may be partial — skip aggressive summarization to # avoid claiming "Build succeeded" when errors were piped away. if "|" in command: @@ -281,6 +285,47 @@ def _process_audit(self, output: str) -> str: return "\n".join(result) + def _process_tsc_typecheck(self, output: str) -> str: + """Compress tsc --noEmit: group errors by TS error code.""" + lines = output.splitlines() + by_code: dict[str, list[str]] = {} + summary_line = "" + + for line in lines: + stripped = line.strip() + # TS error format: src/file.ts(10,5): error TS2322: message + m = re.match(r"^(.+?)\(\d+,\d+\):\s+error\s+(TS\d+):\s+(.+)$", stripped) + if not m: + # Also match: src/file.ts:10:5 - error TS2322: message + m = re.match(r"^(.+?):\d+:\d+\s+-\s+error\s+(TS\d+):\s+(.+)$", stripped) + if m: + code = m.group(2) + by_code.setdefault(code, []).append(stripped) + continue + # Summary line: Found N errors in M files. + if re.match(r"^Found \d+ error", stripped): + summary_line = stripped + + if not by_code: + return output + + total = sum(len(v) for v in by_code.values()) + result = [f"{total} type errors across {len(by_code)} codes:"] + for code, violations in sorted(by_code.items(), key=lambda x: -len(x[1])): + count = len(violations) + if count > 3: + result.append(f" {code}: {count} occurrences") + for v in violations[:2]: + result.append(f" {v}") + result.append(f" ... ({count - 2} more)") + else: + for v in violations: + result.append(f" {v}") + + if summary_line: + result.append(summary_line) + return "\n".join(result) + def _is_progress_line(self, line: str) -> bool: if not line: return False diff --git a/src/processors/cloud_cli.py b/src/processors/cloud_cli.py index afd364d..e1f7b6d 100644 --- a/src/processors/cloud_cli.py +++ b/src/processors/cloud_cli.py @@ -4,6 +4,7 @@ import re from .base import Processor +from .utils import compress_json_value _CLOUD_CMD_RE = re.compile(r"\b(aws|gcloud|az)\s+") @@ -57,7 +58,11 @@ def _process_json(self, output: str, command: str) -> str: return output return self._truncate_text(lines) - compressed = self._compress_json_value(data, depth=0, max_depth=4) + compressed = compress_json_value( + data, + max_depth=4, + important_key_re=_IMPORTANT_KEY_RE, + ) result = json.dumps(compressed, indent=2, default=str) # Add summary if significant compression @@ -68,42 +73,6 @@ def _process_json(self, output: str, command: str) -> str: return result - def _compress_json_value(self, value, depth=0, max_depth=4): - """Recursively compress JSON, truncating at depth.""" - if depth >= max_depth: - if isinstance(value, dict): - return f"{{... {len(value)} keys}}" - if isinstance(value, list): - return f"[... {len(value)} items]" - if isinstance(value, str) and len(value) > 200: - return value[:197] + "..." - return value - - if isinstance(value, dict): - result = {} - for k, v in value.items(): - # Preserve important keys at full depth - if _IMPORTANT_KEY_RE.search(k): - result[k] = self._compress_json_value(v, depth, max_depth + 1) - else: - result[k] = self._compress_json_value(v, depth + 1, max_depth) - return result - - if isinstance(value, list): - if len(value) == 0: - return value - # Don't increment depth for list traversal - if len(value) <= 5: - return [self._compress_json_value(item, depth, max_depth) for item in value] - compressed = [self._compress_json_value(item, depth, max_depth) for item in value[:3]] - compressed.append(f"... ({len(value) - 3} more items)") - return compressed - - if isinstance(value, str) and len(value) > 200: - return value[:197] + "..." - - return value - def _is_table(self, lines: list[str]) -> bool: """Detect table output format.""" for line in lines[:5]: diff --git a/src/processors/docker.py b/src/processors/docker.py index a107f4a..abdb267 100644 --- a/src/processors/docker.py +++ b/src/processors/docker.py @@ -5,6 +5,7 @@ from .. import config from .base import Processor +from .utils import compress_log_lines # Optional docker global options that may appear before the subcommand. # Covers: --context , -H , --host @@ -174,36 +175,59 @@ def _process_logs(self, output: str) -> str: if len(lines) <= keep_head + keep_tail: return output - # Collect error lines and their indices - error_lines = [] - for i, line in enumerate(lines): - if re.search( - r"\b(error|Error|ERROR|exception|Exception|EXCEPTION|" - r"fatal|Fatal|FATAL|panic|Panic|PANIC|traceback|Traceback)\b", - line, - ): - # Include context: 2 lines before, the error line, 2 after - start = max(0, i - 2) - end = min(len(lines), i + 3) - error_lines.extend(lines[start:end]) - if end < len(lines): - error_lines.append("") # separator - - # Deduplicate error context (overlapping windows) - seen = set() - unique_errors = [] - for line in error_lines: - if line not in seen: - unique_errors.append(line) - seen.add(line) - - result = lines[:keep_head] - if unique_errors: - result.append(f"\n... ({len(lines)} total lines, showing errors) ...\n") - result.extend(unique_errors[:50]) # Cap error lines - else: - result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") - result.extend(lines[-keep_tail:]) + # Detect compose log format: "service-name | message" + compose_re = re.compile(r"^(\S+)\s+\|\s+(.*)$") + is_compose = any(compose_re.match(line) for line in lines[:20]) + + if is_compose: + return self._process_compose_logs(lines, compose_re) + + return compress_log_lines( + lines, + keep_head=keep_head, + keep_tail=keep_tail, + context_lines=2, + ) + + def _process_compose_logs(self, lines: list[str], compose_re: re.Pattern) -> str: + """Compress docker compose logs: group by service, keep errors + tail per service.""" + service_lines: dict[str, list[str]] = {} + for line in lines: + m = compose_re.match(line) + if m: + service = m.group(1) + service_lines.setdefault(service, []).append(line) + else: + service_lines.setdefault("_other", []).append(line) + + result = [f"{len(lines)} log lines across {len(service_lines)} services:"] + + for service, svc_lines in sorted(service_lines.items()): + if service == "_other": + continue + error_count = sum( + 1 + for ln in svc_lines + if re.search(r"\b(error|ERROR|exception|fatal|FATAL|panic)\b", ln, re.I) + ) + result.append(f"\n--- {service} ({len(svc_lines)} lines, {error_count} errors) ---") + + # Show errors with context + last 3 lines + errors_shown: list[str] = [] + for i, line in enumerate(svc_lines): + if re.search(r"\b(error|ERROR|exception|fatal|FATAL|panic)\b", line, re.I): + start = max(0, i - 1) + end = min(len(svc_lines), i + 2) + for el in svc_lines[start:end]: + if el not in errors_shown: + errors_shown.append(el) + + if errors_shown: + result.extend(errors_shown[:20]) + # Always show last 3 lines per service + for line in svc_lines[-3:]: + if line not in errors_shown: + result.append(line) return "\n".join(result) diff --git a/src/processors/file_content.py b/src/processors/file_content.py index a0d49ce..52c5d39 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -16,6 +16,7 @@ from .. import config from .base import Processor +from .utils import compress_json_value, compress_log_lines # ── File type sets ─────────────────────────────────────────────────── @@ -145,6 +146,22 @@ def process(self, command: str, output: str) -> str: ext = self._extract_extension(command) filename = self._extract_filename(command) + # ── COMPRESS: minified files (never useful for patching) ────── + if self._is_minified(ext, filename, output): + lines = output.splitlines() + total_chars = len(output) + total_lines = len(lines) + preview = output[:200].replace("\n", " ") + return ( + f"[minified file: {filename or 'unknown'}, " + f"{total_chars:,} chars, {total_lines} lines]\n" + f"Preview: {preview}..." + ) + + # ── Handle .env variants: .env.production, .env.local ──────── + if self._is_env_file_to_redact(filename): + return self._compress_env_file(output.splitlines()) + # ── NEVER COMPRESS: source code ────────────────────────────── if ext in _SOURCE_CODE_EXTENSIONS: return output @@ -225,6 +242,60 @@ def _extract_filename(self, command: str) -> str: return part.rsplit("/", 1)[-1].rsplit("\\", 1)[-1] return "" + # ── Minified file detection ───────────────────────────────────── + + def _is_minified(self, ext: str, filename: str, output: str) -> bool: + """Detect minified files by name pattern or content heuristics.""" + # Name-based detection + if re.search(r"\.min\.(js|css|html)$", filename, re.I): + return True + if re.search(r"\.bundle\.(js|css)$", filename, re.I): + return True + + # Content heuristic: very few lines relative to total length + lines = output.splitlines() + if len(lines) <= 3 and len(output) > 5000: + return True + # Average line length > 500 chars + return bool(lines and len(output) / len(lines) > 500) + + # ── .env variant detection ────────────────────────────────────── + + def _is_env_file_to_redact(self, filename: str) -> bool: + """Detect .env variant files that should have secrets redacted. + + .env exactly and .env.example/.env.template are handled by existing + pass-through logic (model may need exact values for editing). + """ + if filename in (".env", ".env.example", ".env.template"): + return False + return bool(re.match(r"^\.env\..+$", filename, re.I)) + + def _compress_env_file(self, lines: list[str]) -> str: + """Compress .env files: redact sensitive values, keep structure.""" + from .env import _SENSITIVE_PATTERNS # noqa: PLC0415 + + result = [] + redacted = 0 + for line in lines: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + result.append(line) + continue + if "=" in stripped: + key = stripped.split("=", 1)[0] + if _SENSITIVE_PATTERNS.search(key): + result.append(f"{key}=***") + redacted += 1 + else: + result.append(line) + else: + result.append(line) + + if redacted > 0: + result.append(f"\n({redacted} sensitive values redacted)") + return "\n".join(result) + # ── Heuristic detection (for extensionless files) ──────────────── def _detect_heuristic(self, lines: list[str]) -> str: @@ -442,34 +513,10 @@ def _compress_json(self, raw: str, total: int) -> str: except (json.JSONDecodeError, ValueError): return self._truncate_default(raw.splitlines()) - result = self._summarize_json_value(data, depth=0, max_depth=2) + compressed = compress_json_value(data, max_depth=2) + result = json.dumps(compressed, indent=2, default=str) return f"{result}\n\n({total} total lines)" - def _summarize_json_value(self, val, depth: int, max_depth: int) -> str: - indent = " " * depth - if isinstance(val, dict): - if depth >= max_depth: - return f"{{{len(val)} keys}}" - items = [] - for k, v in val.items(): - summarized = self._summarize_json_value(v, depth + 1, max_depth) - items.append(f'{indent} "{k}": {summarized}') - return "{\n" + ",\n".join(items) + f"\n{indent}}}" - elif isinstance(val, list): - if len(val) == 0: - return "[]" - if len(val) <= 3: - inner = [self._summarize_json_value(v, depth + 1, max_depth) for v in val] - return "[" + ", ".join(inner) + "]" - first_three = [self._summarize_json_value(v, depth + 1, max_depth) for v in val[:3]] - return "[" + ", ".join(first_three) + f", ... ({len(val)} items total)]" - elif isinstance(val, str): - if len(val) > 100: - return f'"{val[:80]}..." ({len(val)} chars)' - return json.dumps(val) - else: - return json.dumps(val) - def _compress_yaml(self, lines: list[str], total: int) -> str: result = [] nested_count = 0 @@ -536,59 +583,14 @@ def _compress_xml(self, lines: list[str], total: int) -> str: # ── Log compression ───────────────────────────────────────────── def _compress_log(self, lines: list[str]) -> str: - total = len(lines) - context_lines = config.get("file_log_context_lines") - - head = lines[:5] - tail = lines[-5:] - - middle = lines[5:-5] if len(lines) > 10 else [] - error_indices = set() - info_count = 0 - debug_count = 0 - - for idx, line in enumerate(middle): - if _LOG_ERROR_RE.search(line): - for c in range(idx - context_lines, idx + context_lines + 1): - if 0 <= c < len(middle): - error_indices.add(c) - elif re.search(r"\bDEBUG\b", line, re.IGNORECASE): - debug_count += 1 - elif re.search(r"\bINFO\b", line, re.IGNORECASE): - info_count += 1 - - result = head[:] - if middle: - if error_indices: - result.append(f"\n... (scanning {len(middle)} middle lines) ...\n") - sorted_indices = sorted(error_indices) - prev = -2 - for idx in sorted_indices: - if idx > prev + 1: - gap = idx - prev - 1 - if prev >= 0: - result.append(f" ... ({gap} lines skipped)") - result.append(middle[idx]) - prev = idx - remaining = len(middle) - 1 - prev - if remaining > 0: - result.append(f" ... ({remaining} lines skipped)") - else: - result.append(f"\n... ({len(middle)} lines, no errors/warnings found) ...\n") - - omitted_parts = [] - if info_count > 0: - omitted_parts.append(f"{info_count} INFO") - if debug_count > 0: - omitted_parts.append(f"{debug_count} DEBUG") - other = len(middle) - len(error_indices) - info_count - debug_count - if other > 0: - omitted_parts.append(f"{other} other") - - result.extend(tail) - summary = ", ".join(omitted_parts) + " lines omitted" if omitted_parts else "" - result.append(f"\n({total} total lines{'; ' + summary if summary else ''})") - return "\n".join(result) + context = config.get("file_log_context_lines") + return compress_log_lines( + lines, + keep_head=5, + keep_tail=5, + error_re=_LOG_ERROR_RE, + context_lines=context, + ) # ── CSV compression ───────────────────────────────────────────── diff --git a/src/processors/file_listing.py b/src/processors/file_listing.py index 77bdcdc..b607877 100644 --- a/src/processors/file_listing.py +++ b/src/processors/file_listing.py @@ -10,7 +10,7 @@ class FileListingProcessor(Processor): priority = 50 hook_patterns = [ - r"^(ls|find|tree|dir|exa|eza)\b", + r"^(ls|find|tree|dir|exa|eza|rsync)\b", ] @property @@ -18,7 +18,7 @@ def name(self) -> str: return "file_listing" def can_handle(self, command: str) -> bool: - return bool(re.search(r"\b(ls|find|tree|dir|exa|eza)\b", command)) + return bool(re.search(r"\b(ls|find|tree|dir|exa|eza|rsync)\b", command)) def process(self, command: str, output: str) -> str: if not output or not output.strip(): diff --git a/src/processors/git.py b/src/processors/git.py index 4fba591..4367b10 100644 --- a/src/processors/git.py +++ b/src/processors/git.py @@ -175,6 +175,19 @@ def _process_status(self, output: str) -> str: return "\n".join(result) if result else output + _LOCK_FILES = { + "package-lock.json", + "yarn.lock", + "pnpm-lock.yaml", + "poetry.lock", + "Pipfile.lock", + "Cargo.lock", + "composer.lock", + "Gemfile.lock", + "go.sum", + "bun.lockb", + } + def _process_diff(self, output: str, command: str = "") -> str: lines = output.splitlines() @@ -188,10 +201,51 @@ def _process_diff(self, output: str, command: str = "") -> str: if lines and not any(line.startswith("diff --git") for line in lines): return self._process_diff_stat(lines) + # Pre-scan: separate lockfile diffs from normal diffs + non_lock_lines: list[str] = [] + lockfile_summaries: list[str] = [] + current_file = "" + current_file_lines = 0 + in_lockfile = False + + for line in lines: + if line.startswith("diff --git"): + # Flush previous lockfile summary + if in_lockfile and current_file: + lockfile_summaries.append(f"diff --git {current_file}") + lockfile_summaries.append(f" (lockfile changed, {current_file_lines} lines)") + # Detect new file + m = re.match(r"^diff --git a/(.+?) b/", line) + filename = m.group(1).rsplit("/", 1)[-1] if m else "" + in_lockfile = filename in self._LOCK_FILES + if in_lockfile: + current_file = filename + current_file_lines = 0 + else: + non_lock_lines.append(line) + continue + + if in_lockfile: + current_file_lines += 1 + continue + + non_lock_lines.append(line) + + # Flush last lockfile + if in_lockfile and current_file: + lockfile_summaries.append(f"diff --git {current_file}") + lockfile_summaries.append(f" (lockfile changed, {current_file_lines} lines)") + + # Compress the non-lockfile lines, then append lockfile summaries max_hunk = config.get("max_diff_hunk_lines") max_context = config.get("max_diff_context_lines") - result = compress_diff(lines, max_hunk, max_context) - return "\n".join(result) + if any(line.startswith("diff --git") for line in non_lock_lines): + result = compress_diff(non_lock_lines, max_hunk, max_context) + result.extend(lockfile_summaries) + return "\n".join(result) + if lockfile_summaries: + return "\n".join(lockfile_summaries) + return "\n".join(non_lock_lines) def _process_name_list(self, lines: list[str]) -> str: """Compress --name-only or --name-status output: group by directory.""" @@ -224,7 +278,13 @@ def _process_name_list(self, lines: list[str]) -> str: return "\n".join(result) def _process_diff_stat(self, lines: list[str]) -> str: - """Compress `git diff --stat` output: strip visual bars.""" + """Compress `git diff --stat` output: strip visual bars, group when many files.""" + # Count stat lines (exclude summary line) + stat_lines = [line for line in lines if re.match(r"^\s*.+?\s+\|\s+\d+", line)] + + if len(stat_lines) > 20: + return self._group_stat_by_dir(lines) + result = [] for line in lines: # Match stat lines: " path/file | 5 ++-" -> " path/file | 5" @@ -235,6 +295,46 @@ def _process_diff_stat(self, lines: list[str]) -> str: result.append(line) return "\n".join(result) + def _group_stat_by_dir(self, lines: list[str]) -> str: + """Group --stat output by directory when many files changed.""" + by_dir: dict[str, list[tuple[str, str]]] = {} + summary_line = "" + + for line in lines: + stripped = line.strip() + # Summary line: "N files changed, X insertions(+), Y deletions(-)" + if re.match(r"\s*\d+ files? changed", stripped): + summary_line = stripped + continue + # Stat line: " path/to/file.py | 42 +++---" + m = re.match(r"^\s*(.+?)\s+\|\s+(.+)$", stripped) + if m: + filepath = m.group(1).strip() + stats = m.group(2).strip() + parts = filepath.rsplit("/", 1) + dir_name = parts[0] if len(parts) > 1 else "." + by_dir.setdefault(dir_name, []).append((filepath, stats)) + + if not by_dir: + return "\n".join(lines) + + result = [] + for dir_name, files in sorted(by_dir.items(), key=lambda x: -len(x[1])): + if len(files) > 5: + total_changes = sum( + int(s.group(1)) for _, stats in files if (s := re.search(r"(\d+)", stats)) + ) + result.append(f" {dir_name}/ ({len(files)} files, ~{total_changes} changes)") + else: + for filepath, stats in files: + # Strip +/- visual bars from stats + clean_stats = re.sub(r"\s+[+\-]+\s*$", "", stats) + result.append(f" {filepath} | {clean_stats}") + + if summary_line: + result.append(summary_line) + return "\n".join(result) + def _process_log(self, output: str, command: str = "") -> str: max_entries = config.get("max_log_entries") lines = output.splitlines() diff --git a/src/processors/helm.py b/src/processors/helm.py new file mode 100644 index 0000000..2810ee6 --- /dev/null +++ b/src/processors/helm.py @@ -0,0 +1,128 @@ +"""Helm output processor: install, upgrade, list, template, status.""" + +import re + +from .base import Processor + + +class HelmProcessor(Processor): + priority = 41 + hook_patterns = [ + r"^helm\s+(install|upgrade|list|template|status|rollback|history|uninstall|get)\b", + ] + + @property + def name(self) -> str: + return "helm" + + def can_handle(self, command: str) -> bool: + return bool( + re.search( + r"\bhelm\s+(install|upgrade|list|template|status|rollback|" + r"history|uninstall|get)\b", + command, + ) + ) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + if re.search(r"\bhelm\s+template\b", command): + return self._process_template(output) + if re.search(r"\bhelm\s+(install|upgrade)\b", command): + return self._process_install(output) + if re.search(r"\bhelm\s+list\b", command): + return self._process_list(output) + if re.search(r"\bhelm\s+status\b", command): + return self._process_install(output) + if re.search(r"\bhelm\s+history\b", command): + return self._process_history(output) + return output + + def _process_template(self, output: str) -> str: + """Compress helm template: summarize YAML manifests.""" + lines = output.splitlines() + if len(lines) <= 50: + return output + + manifests: list[tuple[str, int]] = [] + current_kind = "" + current_name = "" + current_lines = 0 + + for line in lines: + stripped = line.strip() + if stripped == "---": + if current_kind: + manifests.append((f"{current_kind}/{current_name}", current_lines)) + current_kind = "" + current_name = "" + current_lines = 0 + continue + if stripped.startswith("kind:"): + current_kind = stripped.split(":", 1)[1].strip() + elif stripped.startswith(" name:") or ( + stripped.startswith("name:") and not current_name + ): + current_name = stripped.split(":", 1)[1].strip() + current_lines += 1 + + if current_kind: + manifests.append((f"{current_kind}/{current_name}", current_lines)) + + result = [f"helm template: {len(manifests)} manifests, {len(lines)} lines total:"] + for manifest, count in manifests: + result.append(f" {manifest} ({count} lines)") + return "\n".join(result) + + def _process_install(self, output: str) -> str: + """Compress helm install/upgrade/status: keep status, skip NOTES boilerplate.""" + lines = output.splitlines() + if len(lines) <= 20: + return output + + result = [] + in_notes = False + notes_count = 0 + + for line in lines: + stripped = line.strip() + + if stripped.startswith("NOTES:"): + in_notes = True + notes_count = 0 + continue + + if in_notes: + notes_count += 1 + continue + + if stripped: + result.append(line) + + if notes_count > 0: + result.append(f"[NOTES section omitted ({notes_count} lines)]") + + return "\n".join(result) if result else output + + def _process_list(self, output: str) -> str: + """Compress helm list: truncate long lists.""" + lines = output.splitlines() + if len(lines) <= 25: + return output + + result = [lines[0]] + result.extend(lines[1:20]) + result.append(f"... ({len(lines) - 21} more releases)") + return "\n".join(result) + + def _process_history(self, output: str) -> str: + """Compress helm history: truncate old revisions.""" + lines = output.splitlines() + if len(lines) <= 15: + return output + result = [lines[0]] + result.insert(1, f"... ({len(lines) - 11} older revisions)") + result.extend(lines[-10:]) + return "\n".join(result) diff --git a/src/processors/kubectl.py b/src/processors/kubectl.py index ba9ccf2..3b3c1c0 100644 --- a/src/processors/kubectl.py +++ b/src/processors/kubectl.py @@ -4,6 +4,7 @@ from .. import config from .base import Processor +from .utils import compress_log_lines # Optional kubectl global options that may appear before the subcommand. # Covers: -n , --namespace , --context , --kubeconfig , @@ -238,28 +239,12 @@ def _process_logs(self, output: str) -> str: if len(lines) <= keep_head + keep_tail: return output - error_lines = [] - for i, line in enumerate(lines): - if re.search( - r"\b(error|Error|ERROR|exception|Exception|" - r"fatal|Fatal|FATAL|panic|Panic)\b", - line, - ): - start = max(0, i - 1) - end = min(len(lines), i + 2) - for el in lines[start:end]: - if el not in error_lines: - error_lines.append(el) - - result = lines[:keep_head] - if error_lines: - result.append(f"\n... ({len(lines)} total lines, showing errors) ...\n") - result.extend(error_lines[:40]) - else: - result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") - result.extend(lines[-keep_tail:]) - - return "\n".join(result) + return compress_log_lines( + lines, + keep_head=keep_head, + keep_tail=keep_tail, + context_lines=1, + ) def _process_mutate(self, output: str) -> str: """Compress kubectl apply/delete/create: keep result lines, skip verbose details.""" diff --git a/src/processors/network.py b/src/processors/network.py index f4232a4..18cdb1b 100644 --- a/src/processors/network.py +++ b/src/processors/network.py @@ -4,6 +4,7 @@ import re from .base import Processor +from .utils import compress_json_value class NetworkProcessor(Processor): @@ -167,38 +168,13 @@ def _maybe_compress_json(self, text: str) -> str: return text # Only compress if the JSON is large - if len(stripped) < 500: + if len(stripped) < 1500: return text - summary = self._summarize_json(data, depth=0, max_depth=2) + compressed = compress_json_value(data, max_depth=2) + summary = json.dumps(compressed, indent=2, default=str) return f"{summary}\n\n({len(stripped)} chars, {len(text.splitlines())} lines)" - def _summarize_json(self, val, depth: int, max_depth: int) -> str: - """Recursively summarize a JSON value.""" - indent = " " * depth - if isinstance(val, dict): - if depth >= max_depth: - return f"{{{len(val)} keys}}" - items = [] - for k, v in val.items(): - summarized = self._summarize_json(v, depth + 1, max_depth) - items.append(f'{indent} "{k}": {summarized}') - return "{\n" + ",\n".join(items) + f"\n{indent}}}" - elif isinstance(val, list): - if len(val) == 0: - return "[]" - if len(val) <= 2: - inner = [self._summarize_json(v, depth + 1, max_depth) for v in val] - return "[" + ", ".join(inner) + "]" - first = self._summarize_json(val[0], depth + 1, max_depth) - return f"[{first}, ... ({len(val)} items total)]" - elif isinstance(val, str): - if len(val) > 80: - return f'"{val[:60]}..." ({len(val)} chars)' - return json.dumps(val) - else: - return json.dumps(val) - def _process_wget(self, output: str) -> str: lines = output.splitlines() result = [] diff --git a/src/processors/search.py b/src/processors/search.py index 83d4437..a5e7bf9 100644 --- a/src/processors/search.py +++ b/src/processors/search.py @@ -73,6 +73,13 @@ def process(self, command: str, output: str) -> str: max_per_file = config.get("search_max_per_file") max_files = config.get("search_max_files") + if total_files > 30: + return self._process_grouped_by_dir( + by_file, + total_matches, + total_files, + ) + result = [f"{total_matches} matches across {total_files} files:"] sorted_files = sorted(by_file.items(), key=lambda x: -len(x[1])) @@ -98,6 +105,59 @@ def process(self, command: str, output: str) -> str: return "\n".join(result) + def _process_grouped_by_dir( + self, + by_file: dict, + total_matches: int, + total_files: int, + ) -> str: + """Group search results by directory for large result sets.""" + max_per_file = config.get("search_max_per_file") + max_files = config.get("search_max_files") + + by_dir: dict[str, dict[str, list[str]]] = {} + for filepath, matches in by_file.items(): + parts = filepath.rsplit("/", 1) + dir_name = parts[0] if len(parts) > 1 else "." + by_dir.setdefault(dir_name, {})[filepath] = matches + + result = [ + f"{total_matches} matches across {total_files} files in {len(by_dir)} directories:" + ] + + dirs_shown = 0 + for dir_name, files in sorted( + by_dir.items(), key=lambda x: -sum(len(v) for v in x[1].values()) + ): + if dirs_shown >= max_files: + break + dir_matches = sum(len(v) for v in files.values()) + result.append(f"\n{dir_name}/ ({dir_matches} matches in {len(files)} files)") + + # Show top 3 files in this directory + for filepath, matches in sorted(files.items(), key=lambda x: -len(x[1]))[:3]: + fname = filepath.rsplit("/", 1)[-1] + if len(matches) > max_per_file: + result.append(f" {fname}: ({len(matches)} matches)") + for m in matches[:max_per_file]: + display = m[len(filepath) + 1 :] if m.startswith(filepath + ":") else m + result.append(f" {display}") + else: + for m in matches: + result.append(f" {m}") + + remaining_files = len(files) - 3 + if remaining_files > 0: + result.append(f" ... ({remaining_files} more files in this directory)") + + dirs_shown += 1 + + remaining_dirs = len(by_dir) - dirs_shown + if remaining_dirs > 0: + result.append(f"\n... ({remaining_dirs} more directories)") + + return "\n".join(result) + def _process_fd(self, output: str) -> str: """Compress fd/fdfind output: group by directory.""" lines = [line.strip() for line in output.splitlines() if line.strip()] diff --git a/src/processors/syslog.py b/src/processors/syslog.py new file mode 100644 index 0000000..715aafa --- /dev/null +++ b/src/processors/syslog.py @@ -0,0 +1,36 @@ +"""System log processor: journalctl, dmesg.""" + +import re + +from .. import config +from .base import Processor +from .utils import compress_log_lines + + +class SyslogProcessor(Processor): + priority = 42 + hook_patterns = [ + r"^(journalctl|dmesg)\b", + ] + + @property + def name(self) -> str: + return "syslog" + + def can_handle(self, command: str) -> bool: + return bool(re.search(r"\b(journalctl|dmesg)\b", command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + if len(lines) <= 30: + return output + + return compress_log_lines( + lines, + keep_head=10, + keep_tail=20, + context_lines=config.get("file_log_context_lines"), + ) diff --git a/src/processors/test_output.py b/src/processors/test_output.py index 87c76ec..3160e1c 100644 --- a/src/processors/test_output.py +++ b/src/processors/test_output.py @@ -77,6 +77,7 @@ def _process_pytest(self, lines: list[str]) -> str: warning_lines: list[str] = [] summary_lines = [] passed_count = 0 + param_tests: dict[str, dict] = {} # base_name -> {"passed": int, "failed": [param]} for line in lines: # Skip collection output @@ -152,11 +153,25 @@ def _process_pytest(self, lines: list[str]) -> str: # Count passed tests if re.search(r"\bPASSED\b", line): passed_count += 1 + # Track parameterized tests + m = re.match(r"^(\S+?)\[(.+)\]\s+PASSED", line.strip()) + if m: + base = m.group(1) + param_tests.setdefault(base, {"passed": 0, "failed": []}) + param_tests[base]["passed"] += 1 continue # Keep FAILED/ERROR individual lines if re.search(r"\bFAILED\b|\bERROR\b", line): - result.append(line) + # Track parameterized test failures + m = re.match(r"^(\S+?)\[(.+)\]\s+FAILED", line.strip()) + if m: + base = m.group(1) + param = m.group(2) + param_tests.setdefault(base, {"passed": 0, "failed": []}) + param_tests[base]["failed"].append(param) + else: + result.append(line) continue # Keep final summary lines (skip "test session starts" header) @@ -175,12 +190,82 @@ def _process_pytest(self, lines: list[str]) -> str: if failure_block: result.extend(self._truncate_traceback(failure_block)) + # Add grouped summaries for parameterized tests with failures + for base, info in param_tests.items(): + if info["failed"]: + total = info["passed"] + len(info["failed"]) + failed_params = ", ".join(info["failed"][:5]) + extra = "" + if len(info["failed"]) > 5: + extra = f", ... ({len(info['failed']) - 5} more)" + result.append( + f"{base}: {info['passed']}/{total} passed, FAILED: [{failed_params}{extra}]" + ) + if passed_count > 0: result.insert(0, f"[{passed_count} tests passed]") + # Detect and compress coverage report in remaining lines + coverage_lines = self._extract_coverage(lines) + if coverage_lines: + result.extend(self._compress_coverage(coverage_lines)) + result.extend(summary_lines) return "\n".join(result) if result else "\n".join(lines) + def _extract_coverage(self, lines: list[str]) -> list[str]: + """Extract coverage table lines from pytest output.""" + coverage_start = None + coverage_end = None + for i, line in enumerate(lines): + stripped = line.strip() + if coverage_start is None and ( + re.match(r"^-+ coverage", stripped) or re.match(r"^Name\s+Stmts\s+Miss", stripped) + ): + coverage_start = i + if ( + coverage_start is not None + and i > coverage_start + and re.match(r"^TOTAL\s+", stripped) + ): + coverage_end = i + break + if coverage_start is None: + return [] + end = coverage_end + 1 if coverage_end is not None else len(lines) + return lines[coverage_start:end] + + def _compress_coverage(self, lines: list[str]) -> list[str]: + """Compress pytest coverage report: keep low-coverage files + TOTAL.""" + result = [] + total_line = "" + low_coverage_files = [] + + for line in lines: + stripped = line.strip() + if stripped.startswith("TOTAL"): + total_line = stripped + continue + if stripped.startswith(("Name", "-")): + continue + # Parse: filename stmts miss cover% + m = re.match(r"^(\S+)\s+\d+\s+\d+\s+(\d+)%", stripped) + if m: + cover_pct = int(m.group(2)) + if cover_pct < 80: + low_coverage_files.append(stripped) + + if total_line: + result.append(total_line) + if low_coverage_files: + result.append(f"Files below 80% coverage ({len(low_coverage_files)}):") + for f in low_coverage_files[:10]: + result.append(f" {f}") + if len(low_coverage_files) > 10: + result.append(f" ... ({len(low_coverage_files) - 10} more)") + + return result + def _collapse_warnings(self, warning_lines: list[str]) -> list[str]: """Group warnings by type, show count + one example per type.""" by_type: dict[str, list[str]] = {} diff --git a/src/processors/utils.py b/src/processors/utils.py index a1d89c3..619593f 100644 --- a/src/processors/utils.py +++ b/src/processors/utils.py @@ -3,6 +3,11 @@ import re from collections import defaultdict +_DEFAULT_ERROR_RE = re.compile( + r"\b(error|Error|ERROR|exception|Exception|EXCEPTION|" + r"fatal|Fatal|FATAL|panic|Panic|PANIC|traceback|Traceback)\b" +) + def compress_json_value(value, depth=0, max_depth=4, important_key_re=None): """Recursively compress a JSON value, truncating at depth. @@ -150,3 +155,52 @@ def group_files_by_dir(lines, max_files): result.append(f"... ({len(dirs) - max_files} more directories)") return result + + +def compress_log_lines( + lines: list[str], + keep_head: int = 10, + keep_tail: int = 20, + error_re: re.Pattern | None = None, + context_lines: int = 2, + max_error_lines: int = 50, +) -> str: + """Compress log-style output: keep head, tail, and error lines with context.""" + if len(lines) <= keep_head + keep_tail: + return "\n".join(lines) + + err_re = error_re or _DEFAULT_ERROR_RE + head = lines[:keep_head] + tail = lines[-keep_tail:] + middle = lines[keep_head:-keep_tail] if len(lines) > keep_head + keep_tail else [] + + # Find error lines with context in the middle section + error_indices: set[int] = set() + for idx, line in enumerate(middle): + if err_re.search(line): + for c in range(idx - context_lines, idx + context_lines + 1): + if 0 <= c < len(middle): + error_indices.add(c) + + result = head[:] + + if middle: + if error_indices: + result.append(f"\n... ({len(lines)} total lines, showing errors) ...\n") + sorted_indices = sorted(error_indices) + prev = -2 + for idx in sorted_indices: + if idx > prev + 1 and prev >= 0: + gap = idx - prev - 1 + result.append(f" ... ({gap} lines skipped)") + result.append(middle[idx]) + prev = idx + # Cap error output + if len(sorted_indices) > max_error_lines: + result = result[: keep_head + 1 + max_error_lines] + result.append(f" ... ({len(sorted_indices) - max_error_lines} more error lines)") + else: + result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") + + result.extend(tail) + return "\n".join(result) diff --git a/tests/test_engine.py b/tests/test_engine.py index 6b1e6b1..183026a 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -210,9 +210,9 @@ class TestProcessorRegistry: """Tests for auto-discovery and the processor registry.""" def test_discover_processors_finds_all(self): - """Auto-discovery should find all 18 processors.""" + """Auto-discovery should find all 21 processors.""" processors = discover_processors() - assert len(processors) == 18 + assert len(processors) == 21 def test_discover_processors_sorted_by_priority(self): """Processors must be returned in ascending priority order.""" @@ -257,6 +257,9 @@ def test_expected_priority_order(self): assert name_to_priority["gh"] == 37 assert name_to_priority["db_query"] == 38 assert name_to_priority["cloud_cli"] == 39 + assert name_to_priority["ansible"] == 40 + assert name_to_priority["helm"] == 41 + assert name_to_priority["syslog"] == 42 assert name_to_priority["file_listing"] == 50 assert name_to_priority["file_content"] == 51 assert name_to_priority["generic"] == 999 @@ -383,6 +386,18 @@ def test_collect_hook_patterns_covers_key_commands(self): "aws ec2 describe-instances", "gcloud compute instances list", "az vm list", + # Ansible + "ansible-playbook site.yml", + "ansible all -m ping", + # Helm + "helm install my-release chart/", + "helm upgrade my-release chart/", + "helm list", + "helm template chart/", + "helm status my-release", + # Syslog + "journalctl -u nginx", + "dmesg", ] for cmd in test_commands: diff --git a/tests/test_hooks.py b/tests/test_hooks.py index 7ca188c..bc4b18a 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -108,6 +108,18 @@ def test_interactive_commands_excluded(self): assert not is_compressible("nano file.py") assert not is_compressible("ssh server") + def test_rsync_local_compressible(self): + """Local rsync (no remote host) should be compressible.""" + assert is_compressible("rsync -av src/ dest/") + assert is_compressible("rsync -r --delete /tmp/a/ /tmp/b/") + assert is_compressible("rsync --progress ./build/ /var/www/html/") + + def test_rsync_remote_excluded(self): + """Remote rsync (with host:path) should be excluded.""" + assert not is_compressible("rsync -av src/ user@server:/path/") + assert not is_compressible("rsync -r server:/remote/path /local/path") + assert not is_compressible("rsync -e ssh file.tar.gz host:/backup/") + def test_self_wrapping_excluded(self): assert not is_compressible("python3 wrap.py git status") assert not is_compressible("python3 /path/to/token_saver/wrap.py ls") diff --git a/tests/test_processors.py b/tests/test_processors.py index 8831b17..89fc10a 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -6,6 +6,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from src.chain_utils import extract_primary_command +from src.processors.ansible import AnsibleProcessor from src.processors.build_output import BuildOutputProcessor from src.processors.cloud_cli import CloudCliProcessor from src.processors.db_query import DbQueryProcessor @@ -16,11 +17,13 @@ from src.processors.generic import GenericProcessor from src.processors.gh import GhProcessor from src.processors.git import GitProcessor +from src.processors.helm import HelmProcessor from src.processors.kubectl import KubectlProcessor from src.processors.lint_output import LintOutputProcessor from src.processors.network import NetworkProcessor from src.processors.package_list import PackageListProcessor from src.processors.search import SearchProcessor +from src.processors.syslog import SyslogProcessor from src.processors.system_info import SystemInfoProcessor from src.processors.terraform import TerraformProcessor from src.processors.test_output import TestOutputProcessor @@ -568,6 +571,107 @@ def test_traceback_short_unchanged(self): assert result == block +class TestPytestParameterized: + """Tests for parameterized test grouping.""" + + def setup_method(self): + self.p = TestOutputProcessor() + + def test_all_param_passed_grouped(self): + """50 parameterized tests all PASSED should show just the count.""" + lines = [] + for i in range(50): + lines.append(f"tests/test_math.py::test_add[{i}] PASSED") + lines.append("======================== 50 passed in 1.0s ========================") + output = "\n".join(lines) + result = self.p.process("pytest -v", output) + assert "50 tests passed" in result + # Individual param lines should not appear + assert "test_add[0] PASSED" not in result + + def test_param_with_failures_grouped(self): + """Parameterized tests with failures should show grouped summary.""" + lines = [] + for i in range(47): + lines.append(f"tests/test_math.py::test_add[{i}] PASSED") + for i in range(47, 50): + lines.append(f"tests/test_math.py::test_add[{i}] FAILED") + lines.append("======================== 47 passed, 3 failed ========================") + output = "\n".join(lines) + result = self.p.process("pytest -v", output) + assert "47/50 passed" in result + assert "FAILED: [47, 48, 49]" in result + + def test_non_param_unchanged(self): + """Non-parameterized tests should not trigger grouping.""" + lines = [ + "tests/test_app.py::test_one PASSED", + "tests/test_app.py::test_two PASSED", + "tests/test_app.py::test_three FAILED", + "======================== 2 passed, 1 failed ========================", + ] + output = "\n".join(lines) + result = self.p.process("pytest -v", output) + assert "2 tests passed" in result + assert "test_three FAILED" in result + + +class TestPytestCoverage: + """Tests for pytest coverage report compression.""" + + def setup_method(self): + self.p = TestOutputProcessor() + + def test_coverage_table_compressed(self): + """Coverage table with low-coverage files should be compressed.""" + lines = [ + "tests/test_app.py::test_one PASSED", + "tests/test_app.py::test_two PASSED", + "---------- coverage: 85% ----------", + "Name Stmts Miss Cover", + "---------------------------------------", + ] + for i in range(20): + pct = 95 if i < 17 else 60 + lines.append(f"src/mod{i}.py 100 {100 - pct} {pct}%") + lines.append("TOTAL 2000 300 85%") + lines.append("---------------------------------------") + lines.append("======================== 2 passed in 1.0s ========================") + output = "\n".join(lines) + result = self.p.process("pytest --cov", output) + assert "TOTAL" in result + assert "2000" in result + assert "Files below 80%" in result + assert "60%" in result + # High-coverage files should not appear + assert "95%" not in result + + def test_no_coverage_table_unchanged(self): + """Output without coverage should not be modified by coverage logic.""" + lines = [ + "tests/test_app.py::test_one PASSED", + "======================== 1 passed in 0.5s ========================", + ] + output = "\n".join(lines) + result = self.p.process("pytest", output) + assert "Files below" not in result + + def test_total_line_always_preserved(self): + """TOTAL line in coverage must always be in output.""" + lines = [ + "---------- coverage: 100% ----------", + "Name Stmts Miss Cover", + "---------------------------------------", + "src/app.py 50 0 100%", + "TOTAL 50 0 100%", + "---------------------------------------", + "======================== 1 passed ========================", + ] + output = "\n".join(lines) + result = self.p.process("pytest --cov", output) + assert "TOTAL" in result + + class TestBuildOutputProcessor: def setup_method(self): self.p = BuildOutputProcessor() @@ -1266,6 +1370,75 @@ def test_progress_bar_stripped(self): assert "━" not in result +class TestEnvVariantDetection: + """Tests for .env variant file handling.""" + + def setup_method(self): + self.p = FileContentProcessor() + + def test_env_production_redacted(self): + """cat .env.production should redact sensitive values.""" + output = "\n".join( + [ + "APP_NAME=myapp", + "API_KEY=secret123", + "DATABASE_URL=postgres://user:pass@host/db", + "DEBUG=true", + ] + ) + result = self.p.process("cat .env.production", output) + assert "API_KEY=***" in result + assert "DATABASE_URL=***" in result + assert "APP_NAME=myapp" in result + assert "DEBUG=true" in result + assert "sensitive values redacted" in result + + def test_env_example_passthrough(self): + """cat .env.example should pass through (template file).""" + output = "\n".join( + [ + "API_KEY=your_api_key_here", + "SECRET=change_me", + ] + ) + result = self.p.process("cat .env.example", output) + assert result == output + + def test_env_exact_passthrough(self): + """cat .env should still pass through unchanged (existing behavior).""" + output = "\n".join(f"KEY_{i}=value_{i}" for i in range(500)) + result = self.p.process("cat .env", output) + assert result == output + + +class TestMinifiedFileDetection: + """Tests for minified file detection in file_content processor.""" + + def setup_method(self): + self.p = FileContentProcessor() + + def test_min_js_compressed(self): + """cat dist/app.min.js with large single line should be summarized.""" + output = "a" * 100_000 + result = self.p.process("cat dist/app.min.js", output) + assert "minified file" in result + assert "app.min.js" in result + assert "100,000 chars" in result + + def test_bundle_js_heuristic(self): + """cat bundle.js with very long lines should be detected as minified.""" + output = "\n".join(["x" * 10_000 for _ in range(5)]) + result = self.p.process("cat bundle.js", output) + assert "minified file" in result + + def test_normal_js_not_minified(self): + """Normal JS file with short lines should NOT be treated as minified.""" + output = "\n".join(f"const x{i} = {i};" for i in range(500)) + result = self.p.process("cat app.js", output) + # Source code (.js) should pass-through unchanged + assert result == output + + class TestNetworkProcessor: def setup_method(self): self.p = NetworkProcessor() @@ -1779,6 +1952,80 @@ def test_fd_short_unchanged(self): assert result == output +class TestSearchDirectoryGrouping: + """Tests for search result directory grouping with large result sets.""" + + def setup_method(self): + self.p = SearchProcessor() + + def test_many_files_grouped_by_dir(self): + """rg with 40+ files should group by directory.""" + lines = [] + for d in range(12): + for f in range(4): + for m in range(3): + lines.append(f"src/dir{d}/file{f}.py:{m + 1}:TODO: fix this {d}-{f}-{m}") + output = "\n".join(lines) + result = self.p.process("rg TODO", output) + assert "directories" in result + assert "src/dir0/" in result + + def test_few_files_not_grouped(self): + """rg with 10 files should use per-file grouping.""" + lines = [] + for f in range(10): + lines.append(f"src/file{f}.py:1:TODO fix") + lines.append(f"src/file{f}.py:5:TODO cleanup") + output = "\n".join(lines) + result = self.p.process("rg TODO", output) + # Should use normal per-file grouping, not directory grouping + assert "directories" not in result + + +class TestDockerComposeLogs: + """Tests for docker compose log grouping by service.""" + + def setup_method(self): + self.p = DockerProcessor() + + def test_compose_logs_grouped(self): + """docker compose logs with multiple services should group by service.""" + lines = [] + for i in range(200): + service = ["web", "api", "db"][i % 3] + lines.append(f"{service} | Log line {i}") + output = "\n".join(lines) + result = self.p.process("docker compose logs", output) + assert "200 log lines" in result + assert "--- web" in result + assert "--- api" in result + assert "--- db" in result + + def test_compose_logs_errors_shown(self): + """Service with errors should show error lines.""" + lines = [] + for i in range(100): + lines.append(f"web | Normal log line {i}") + lines.append("web | ERROR: Connection refused") + lines.append("web | Failed to connect to database") + for i in range(100): + lines.append(f"api | API running on port {i}") + output = "\n".join(lines) + result = self.p.process("docker compose logs", output) + assert "ERROR: Connection refused" in result + assert "errors" in result + + def test_compose_no_errors_shows_tail(self): + """Service with no errors should show last 3 lines.""" + lines = [] + for i in range(100): + lines.append(f"web | Log line {i}") + output = "\n".join(lines) + result = self.p.process("docker compose logs", output) + assert "Log line 99" in result + assert "0 errors" in result + + class TestKubectlProcessor: def setup_method(self): self.p = KubectlProcessor() @@ -2567,6 +2814,102 @@ def test_status_sb_branch_no_tracking(self): assert "On branch feature/foo" in result +class TestGitDiffStatGrouping: + """Tests for --stat directory grouping with many files.""" + + def setup_method(self): + self.p = GitProcessor() + + def test_stat_many_files_grouped(self): + """git diff --stat with 50+ files should group by directory.""" + lines = [] + for i in range(25): + lines.append(f" src/components/file{i}.tsx | 10 ++++------") + for i in range(25): + lines.append(f" src/utils/helper{i}.ts | 5 ++---") + lines.append(" 50 files changed, 375 insertions(+), 375 deletions(-)") + output = "\n".join(lines) + result = self.p.process("git diff --stat", output) + assert "src/components/" in result + assert "25 files" in result + assert "src/utils/" in result + assert "50 files changed" in result + + def test_stat_few_files_not_grouped(self): + """git diff --stat with few files should not group.""" + lines = [ + " src/app.py | 5 ++---", + " src/utils.py | 3 +--", + " 2 files changed, 3 insertions(+), 5 deletions(-)", + ] + output = "\n".join(lines) + result = self.p.process("git diff --stat", output) + assert "app.py" in result + assert "utils.py" in result + + +class TestGitLockfileDiff: + """Tests for lockfile detection in git diff output.""" + + def setup_method(self): + self.p = GitProcessor() + + def test_lockfile_only_diff_summarized(self): + """A diff containing only package-lock.json should be summarized.""" + lock_lines = ["diff --git a/package-lock.json b/package-lock.json"] + lock_lines.append("index abc1234..def5678 100644") + lock_lines.append("--- a/package-lock.json") + lock_lines.append("+++ b/package-lock.json") + lock_lines.append("@@ -1,100 +1,100 @@") + for i in range(500): + lock_lines.append(f'+ "pkg-{i}": "{i}.0.0",') + output = "\n".join(lock_lines) + result = self.p.process("git diff", output) + assert "lockfile changed" in result + assert "package-lock.json" in result + assert len(result.splitlines()) <= 5 + + def test_mixed_lockfile_and_normal(self): + """Lockfile should be summarized, normal file compressed normally.""" + lines = [ + "diff --git a/src/app.py b/src/app.py", + "@@ -1,5 +1,5 @@", + "-old line", + "+new line", + " context", + "diff --git a/yarn.lock b/yarn.lock", + "index abc..def 100644", + "--- a/yarn.lock", + "+++ b/yarn.lock", + "@@ -1,200 +1,200 @@", + ] + for i in range(200): + lines.append(f"+pkg-{i}@^{i}.0.0:") + output = "\n".join(lines) + result = self.p.process("git diff", output) + # Normal file is preserved + assert "app.py" in result + assert "+new line" in result + # Lockfile is summarized + assert "lockfile changed" in result + assert "yarn.lock" in result + + def test_normal_files_only_unchanged(self): + """Diffs with only normal files should be compressed normally.""" + output = "\n".join( + [ + "diff --git a/src/app.py b/src/app.py", + "@@ -1,3 +1,3 @@", + "-old", + "+new", + " context", + ] + ) + result = self.p.process("git diff", output) + assert "lockfile" not in result + assert "+new" in result + + class TestBuildBunCanHandle: """Test bun command handling in build processor.""" @@ -2610,6 +2953,54 @@ def test_warning_samples_capped_at_five(self): assert " WARNING: issue-5" not in result +class TestTscTypecheck: + """Tests for tsc --noEmit type-check grouping.""" + + def setup_method(self): + self.p = BuildOutputProcessor() + + def test_tsc_noemit_errors_grouped(self): + """tsc --noEmit with many errors should group by code.""" + lines = [] + for i in range(30): + lines.append(f"src/file{i}.ts(10,5): error TS2322: Type 'string' is not assignable.") + for i in range(15): + lines.append( + f"src/util{i}.ts:5:3 - error TS2345: Argument of type 'number' not assignable." + ) + for i in range(5): + lines.append( + f"src/other{i}.ts(1,1): error TS7006: Parameter implicitly has 'any' type." + ) + lines.append("Found 50 errors in 50 files.") + output = "\n".join(lines) + result = self.p.process("tsc --noEmit", output) + assert "50 type errors across 3 codes" in result + assert "TS2322: 30 occurrences" in result + assert "TS2345: 15 occurrences" in result + assert "Found 50 errors" in result + + def test_tsc_build_unchanged(self): + """tsc (without --noEmit) should use existing build logic.""" + output = "Build succeeded.\nDone in 2.5s." + result = self.p.process("tsc", output) + # Should not trigger typecheck grouping + assert "type errors" not in result + + def test_tsc_error_codes_preserved(self): + """Error codes and file paths should be preserved in examples.""" + output = "\n".join( + [ + "src/app.ts(10,5): error TS2322: Type 'string' is not assignable.", + "src/app.ts(20,3): error TS2322: Type 'number' is not assignable.", + "Found 2 errors.", + ] + ) + result = self.p.process("tsc --noEmit", output) + assert "TS2322" in result + assert "src/app.ts" in result + + class TestBuildOutputPipeGuard: """Test that piped build commands bypass aggressive summarization.""" @@ -2901,3 +3292,207 @@ def test_kubectl_processor_handles_extracted(self): primary = extract_primary_command("cd /deploy && kubectl get pods") assert primary == "kubectl get pods" assert p.can_handle(primary) + + +class TestAnsibleProcessor: + def setup_method(self): + self.p = AnsibleProcessor() + + def test_can_handle(self): + assert self.p.can_handle("ansible-playbook site.yml") + assert self.p.can_handle("ansible all -m ping") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("ansible-playbook site.yml", "") == "" + + def test_short_output_unchanged(self): + output = "\n".join(f"line {i}" for i in range(15)) + result = self.p.process("ansible-playbook site.yml", output) + assert result == output + + def test_all_ok_compressed(self): + """Playbook with all ok tasks should compress to headers + recap.""" + lines = [] + lines.append("PLAY [all] ****") + lines.append("*" * 60) + for i in range(30): + lines.append(f"TASK [task-{i}] ****") + lines.append("*" * 60) + for h in range(20): + lines.append(f"ok: [host-{h}]") + lines.append("PLAY RECAP ****") + for h in range(20): + lines.append(f"host-{h} : ok=30 changed=0 unreachable=0 failed=0") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "ok" in result + assert "PLAY RECAP" in result + # All 600 ok lines should be counted + assert "600 ok" in result + assert len(result) < len(output) + + def test_failed_tasks_preserved(self): + """Failed tasks should be fully preserved with error messages.""" + lines = [] + lines.append("PLAY [webservers] ****") + for i in range(5): + lines.append(f"TASK [task-{i}] ****") + lines.append(f"ok: [host-{i}]") + lines.append("TASK [deploy] ****") + lines.append('fatal: [host-1]: FAILED! => {"msg": "Connection refused"}') + lines.append("TASK [rollback] ****") + lines.append("changed: [host-1]") + lines.append("PLAY RECAP ****") + lines.append("host-1 : ok=5 changed=1 unreachable=0 failed=1") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "FAILED" in result + assert "Connection refused" in result + assert "changed:" in result + assert "host-1" in result + + def test_changed_tasks_preserved(self): + """Changed tasks should be kept in output.""" + lines = ["PLAY [all] ****", "*" * 60] + for i in range(10): + lines.append(f"TASK [task-{i}] ****") + lines.append("*" * 60) + lines.append(f"ok: [host-{i}]") + lines.append("TASK [update] ****") + lines.append("*" * 60) + lines.append("changed: [host-0]") + lines.append("PLAY RECAP ****") + lines.append("host-0 : ok=10 changed=1") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "changed:" in result + + def test_recap_host_names_preserved(self): + """Host names in RECAP should be preserved.""" + lines = ["PLAY [all] ****", "*" * 60] + for i in range(25): + lines.append(f"TASK [task-{i}] ****") + lines.append(f"ok: [prod-server-{i}]") + lines.append("PLAY RECAP ****") + lines.append("prod-server-0 : ok=25 changed=0 unreachable=0 failed=0") + lines.append("prod-server-1 : ok=25 changed=0 unreachable=0 failed=0") + output = "\n".join(lines) + result = self.p.process("ansible-playbook site.yml", output) + assert "prod-server-0" in result + assert "prod-server-1" in result + + +class TestHelmProcessor: + def setup_method(self): + self.p = HelmProcessor() + + def test_can_handle(self): + assert self.p.can_handle("helm install myrelease mychart") + assert self.p.can_handle("helm upgrade myrelease mychart") + assert self.p.can_handle("helm list") + assert self.p.can_handle("helm template mychart") + assert self.p.can_handle("helm status myrelease") + assert self.p.can_handle("helm history myrelease") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("helm install x y", "") == "" + + def test_template_many_manifests_summarized(self): + """helm template with 500 lines, 8 manifests should be summarized.""" + lines = [] + for i in range(8): + lines.append("---") + lines.append(f"kind: {'Deployment' if i < 3 else 'Service'}") + lines.append("metadata:") + lines.append(f" name: app-{i}") + for j in range(60): + lines.append(f" field{j}: value{j}") + output = "\n".join(lines) + result = self.p.process("helm template mychart", output) + assert "8 manifests" in result + assert "Deployment" in result + assert "Service" in result + assert len(result.splitlines()) < 20 + + def test_install_notes_omitted(self): + """helm install with NOTES section should omit NOTES.""" + lines = [ + "NAME: myrelease", + "NAMESPACE: default", + "STATUS: deployed", + "REVISION: 1", + "NOTES:", + ] + for i in range(30): + lines.append(f" Get the application URL by running line {i}") + output = "\n".join(lines) + result = self.p.process("helm install myrelease mychart", output) + assert "STATUS: deployed" in result + assert "NOTES section omitted" in result + assert "Get the application URL" not in result + + def test_list_many_releases_truncated(self): + """helm list with 50 releases should be truncated.""" + lines = ["NAME\tNAMESPACE\tREVISION\tSTATUS"] + for i in range(50): + lines.append(f"release-{i}\tdefault\t1\tdeployed") + output = "\n".join(lines) + result = self.p.process("helm list", output) + assert "more releases" in result + assert len(result.splitlines()) < 25 + + def test_short_output_unchanged(self): + """Short output should not be compressed.""" + output = "NAME: myrelease\nSTATUS: deployed" + result = self.p.process("helm install x y", output) + assert result == output + + +class TestSyslogProcessor: + def setup_method(self): + self.p = SyslogProcessor() + + def test_can_handle(self): + assert self.p.can_handle("journalctl -u nginx") + assert self.p.can_handle("dmesg") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("journalctl", "") == "" + + def test_short_output_unchanged(self): + output = "\n".join(f"line {i}" for i in range(25)) + result = self.p.process("dmesg", output) + assert result == output + + def test_journalctl_with_errors(self): + """journalctl with 500 lines and errors should preserve errors.""" + lines = [] + for i in range(500): + if i == 250: + lines.append("Mar 17 10:00:00 host nginx[1234]: ERROR: connection refused") + elif i == 251: + lines.append("Mar 17 10:00:01 host nginx[1234]: retrying connection") + elif i == 300: + lines.append("Mar 17 10:05:00 host nginx[1234]: fatal: out of memory") + else: + lines.append(f"Mar 17 10:00:00 host nginx[1234]: normal log line {i}") + output = "\n".join(lines) + result = self.p.process("journalctl -u nginx", output) + assert "ERROR" in result + assert "fatal" in result + assert len(result) < len(output) + + def test_dmesg_no_errors_truncated(self): + """dmesg with 200 lines and no errors should show head + tail.""" + lines = [f"[{i}.000000] Normal kernel message {i}" for i in range(200)] + output = "\n".join(lines) + result = self.p.process("dmesg", output) + assert "truncated" in result + # Head preserved + assert "Normal kernel message 0" in result + # Tail preserved + assert "Normal kernel message 199" in result + assert len(result) < len(output)