From 5c8fe2b4e847888818c84ead75d96dbd0d805dcb Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Fri, 27 Mar 2026 08:52:56 +0100 Subject: [PATCH 1/6] feat: add cargo, go, ssh, jq/yq processors and multi-processor chaining - Cargo processor (priority 22): cargo build/check/doc/update/bench with warning grouping by type and full error preservation - Go processor (priority 23): go build/vet/mod/generate/install with download collapsing and error grouping - SSH processor (priority 43): non-interactive ssh and scp with progress compression, narrowed hook exclusion to interactive-only - JQ/YQ processor (priority 44): JSON structure compression via compress_json_value, YAML heuristic summarization - Engine chaining: chain_to attribute on Processor base class, _by_name lookup dict, max depth 1 with self-reference guard --- docs/processors/cargo.md | 34 +++ docs/processors/go.md | 36 +++ docs/processors/jq_yq.md | 29 +++ docs/processors/ssh.md | 29 +++ scripts/hook_pretool.py | 4 +- src/config.py | 3 + src/engine.py | 13 ++ src/processors/base.py | 1 + src/processors/build_output.py | 3 + src/processors/cargo.py | 295 +++++++++++++++++++++++++ src/processors/go.py | 181 ++++++++++++++++ src/processors/jq_yq.py | 156 +++++++++++++ src/processors/ssh.py | 74 +++++++ tests/test_engine.py | 43 +++- tests/test_hooks.py | 67 ++++++ tests/test_precision.py | 111 ++++++++++ tests/test_processors.py | 385 ++++++++++++++++++++++++++++++++- 17 files changed, 1459 insertions(+), 5 deletions(-) create mode 100644 docs/processors/cargo.md create mode 100644 docs/processors/go.md create mode 100644 docs/processors/jq_yq.md create mode 100644 docs/processors/ssh.md create mode 100644 src/processors/cargo.py create mode 100644 src/processors/go.py create mode 100644 src/processors/jq_yq.py create mode 100644 src/processors/ssh.py diff --git a/docs/processors/cargo.md b/docs/processors/cargo.md new file mode 100644 index 0000000..c9a16ba --- /dev/null +++ b/docs/processors/cargo.md @@ -0,0 +1,34 @@ +# Cargo Processor + +**File:** `src/processors/cargo.py` | **Priority:** 22 | **Name:** `cargo` + +Dedicated processor for Rust's cargo build system. + +## Supported Commands + +cargo build, cargo check, cargo doc, cargo update, cargo bench. + +## Strategy + +| Subcommand | Strategy | +|---|---| +| **build/check** | Collapse `Compiling X v1.0` lines into count. Group warnings by type (unused_variable, unused_import, dead_code, unused_mut, lifetime, borrow_checker). Show first N examples per type. Keep ALL errors with full span context (`-->`, `|`, `^^` markers). Keep `Finished` summary | +| **doc** | Collapse `Documenting X` and `Compiling X` lines. Keep doc warnings, errors, `Finished`, and `Generated` lines | +| **update** | Show all major version bumps explicitly (breaking changes). Collapse minor/patch bumps into count. Keep `Adding` and `Removing` lines | +| **bench** | Keep benchmark result lines (`bench: N ns/iter`). Strip `Compiling` and `Running` noise. Keep `test result:` summary | + +## Exclusions + +- `cargo test` is routed to `TestOutputProcessor` +- `cargo clippy` is routed to `LintOutputProcessor` + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| cargo_warning_example_count | 2 | Number of example warnings to show per category | +| cargo_warning_group_threshold | 3 | Minimum occurrences before warnings are grouped | + +## Removed Noise + +`Compiling X v1.0.0` lines, `Downloading X v1.0.0` lines, `Running` lines (bench), intermediate blank lines between warnings. diff --git a/docs/processors/go.md b/docs/processors/go.md new file mode 100644 index 0000000..4ff73ba --- /dev/null +++ b/docs/processors/go.md @@ -0,0 +1,36 @@ +# Go Processor + +**File:** `src/processors/go.py` | **Priority:** 23 | **Name:** `go` + +Dedicated processor for Go toolchain commands. + +## Supported Commands + +go build, go vet, go mod tidy, go mod download, go generate, go install. + +## Strategy + +| Subcommand | Strategy | +|---|---| +| **build/install** | Keep all `file.go:line:col: message` errors. For multi-package builds with many `# package` headers, truncate to first 3. Pass through unchanged if no errors (successful builds produce no output) | +| **vet** | Group warnings by type (printf, unreachable, shadow, unused, nil, loop). Show first N examples per type. Keep `# package` headers for context | +| **mod tidy/download** | Collapse `go: downloading X v1.0` lines into count. Keep `go: added/upgraded/downgraded/removed` lines (important dependency changes) | +| **generate** | Collapse `running` lines into count. Keep errors and generator output | + +## Exclusions + +- `go test` is routed to `TestOutputProcessor` +- `golangci-lint` is routed to `LintOutputProcessor` + +## Configuration + +Uses existing parameters: + +| Parameter | Default | Description | +|---|---|---| +| lint_example_count | 2 | Examples per warning type (go vet) | +| lint_group_threshold | 3 | Minimum occurrences before grouping | + +## Removed Noise + +`go: downloading X` lines, `# package` headers (when redundant), `running` lines from go generate. diff --git a/docs/processors/jq_yq.md b/docs/processors/jq_yq.md new file mode 100644 index 0000000..0c3a7b6 --- /dev/null +++ b/docs/processors/jq_yq.md @@ -0,0 +1,29 @@ +# JQ/YQ Processor + +**File:** `src/processors/jq_yq.py` | **Priority:** 44 | **Name:** `jq_yq` + +Compresses large JSON and YAML outputs from jq and yq. + +## Supported Commands + +jq, yq. + +## Strategy + +| Output Type | Strategy | +|---|---| +| **Small output** (< 50 lines) | Pass through unchanged | +| **jq JSON** | Parse as JSON, compress with `compress_json_value()` (truncate arrays > 5 items, summarize deeply nested objects). Re-serialize with indent | +| **jq streaming** (one JSON per line) | Detect repeated structure (same keys), show first 3 + count. Fallback to head/tail | +| **yq YAML** | Count top-level keys and list items. Collapse large arrays (> 3 items at same indent) to count. Add structure summary header | + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| jq_passthrough_threshold | 50 | Lines below which output passes through unchanged | + +## Notes + +- No runtime dependencies: JSON parsing uses stdlib `json` module, YAML uses heuristic analysis (no PyYAML dependency) +- Streaming jq output (one value per line) is detected and compressed separately from single-document output diff --git a/docs/processors/ssh.md b/docs/processors/ssh.md new file mode 100644 index 0000000..f1bcf48 --- /dev/null +++ b/docs/processors/ssh.md @@ -0,0 +1,29 @@ +# SSH Processor + +**File:** `src/processors/ssh.py` | **Priority:** 43 | **Name:** `ssh` + +Handles non-interactive SSH and SCP command output. + +## Supported Commands + +- `ssh host 'command'` or `ssh host "command"` (non-interactive SSH with quoted remote command) +- `ssh -o Option=value host 'command'` (with SSH options) +- `scp` (all forms — always non-interactive) + +## Not Supported + +- `ssh host` (interactive SSH — no remote command) remains excluded from compression + +## Strategy + +| Command | Strategy | +|---|---| +| **SSH remote** | Apply log-style compression: keep first 10 + last 20 lines, preserve error lines with context in the middle section | +| **SCP** | Collapse progress bar lines (containing `%` and transfer rates) to final status per file. Keep error lines (permission denied, connection refused, etc.) | + +## How It Works + +The SSH/SCP exclusion in `hook_pretool.py` was narrowed from a blanket `ssh|scp` exclusion to only exclude interactive SSH (no quoted command). This allows: +- `ssh host 'ls -la'` — compressed (non-interactive) +- `scp file host:/path` — compressed (always non-interactive) +- `ssh host` — still excluded (interactive) diff --git a/scripts/hook_pretool.py b/scripts/hook_pretool.py index c7113f6..348f5a6 100644 --- a/scripts/hook_pretool.py +++ b/scripts/hook_pretool.py @@ -86,7 +86,7 @@ def _load_compressible_patterns() -> list[str]: EXCLUDED_PATTERNS = [ r"(? list[str]: r"<\(", # process substitution r"^\s*sudo\b", r"^\s*(vi|vim|nano|emacs|code)\b", - r"^\s*(ssh|scp)\b", + r"^\s*ssh\s+(?:-\S+\s+)*\S+\s*$", # interactive ssh only r"^\s*rsync\b.*\S+:\S+", # only exclude remote rsync (host:path) r"^\s*env\s+\S+=", r"(?:^|\s)token[-_]saver\s", diff --git a/src/config.py b/src/config.py index 289ac9d..633adb9 100644 --- a/src/config.py +++ b/src/config.py @@ -48,6 +48,9 @@ "db_prune_days": 90, "chars_per_token": 4, "user_processors_dir": "", + "cargo_warning_example_count": 2, + "cargo_warning_group_threshold": 3, + "jq_passthrough_threshold": 50, "debug": False, } diff --git a/src/engine.py b/src/engine.py index a951f48..abd8a79 100644 --- a/src/engine.py +++ b/src/engine.py @@ -20,10 +20,12 @@ class CompressionEngine: processors: list[Processor] _generic: Processor + _by_name: dict[str, Processor] def __init__(self) -> None: self.processors = discover_processors() self._generic = self.processors[-1] # Last = GenericProcessor (priority 999) + self._by_name = {p.name: p for p in self.processors} def compress(self, command: str, output: str) -> tuple[str, str, bool]: """Compress output for a given command. @@ -49,6 +51,17 @@ def compress(self, command: str, output: str) -> tuple[str, str, bool]: if compressed is output or compressed == output: return output, processor.name, False + # Chain to secondary processor if declared (max depth = 1) + if ( + processor.chain_to + and processor.chain_to != processor.name + and processor.chain_to in self._by_name + ): + secondary = self._by_name[processor.chain_to] + chained = secondary.process(command, compressed) + if chained is not compressed and chained != compressed: + compressed = chained + # If a specialized processor handled it, also run generic # cleanup (ANSI strip, blank line collapse) but not truncation if processor is not self._generic: diff --git a/src/processors/base.py b/src/processors/base.py index 9101fc3..3d84861 100644 --- a/src/processors/base.py +++ b/src/processors/base.py @@ -19,6 +19,7 @@ class Processor(ABC): priority: int = 50 hook_patterns: list[str] = [] + chain_to: str | None = None @abstractmethod def can_handle(self, command: str) -> bool: diff --git a/src/processors/build_output.py b/src/processors/build_output.py index d2ea7bc..be0fbb9 100644 --- a/src/processors/build_output.py +++ b/src/processors/build_output.py @@ -28,6 +28,9 @@ def can_handle(self, command: str) -> bool: # Exclude cargo clippy (handled by LintOutputProcessor) if re.search(r"\bcargo\s+clippy\b", command): return False + # Exclude cargo build/check (handled by CargoProcessor) + if re.search(r"\bcargo\s+(build|check)\b", command): + return False return bool( re.search( r"\b(npm\s+(run|install|ci|build|audit)|yarn\s+(run|install|build|add|audit)|pnpm\s+(run|install|build|add|audit)|" diff --git a/src/processors/cargo.py b/src/processors/cargo.py new file mode 100644 index 0000000..27ca223 --- /dev/null +++ b/src/processors/cargo.py @@ -0,0 +1,295 @@ +"""Cargo processor: cargo build, check, doc, update, bench.""" + +import re +from collections import defaultdict + +from .. import config +from .base import Processor + +_CARGO_CMD_RE = re.compile(r"\bcargo\s+(build|check|doc|update|bench)\b") +_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") +_DOWNLOADING_RE = re.compile(r"^\s*Downloading\s+\S+\s+v") +_DOCUMENTING_RE = re.compile(r"^\s*Documenting\s+\S+\s+v") +_RUNNING_RE = re.compile(r"^\s*Running\s+") +_FINISHED_RE = re.compile(r"^\s*Finished\s+") +_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") +_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") +_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") +_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") +_UPDATE_LINE_RE = re.compile( + r"^\s*(Updating|Removing|Adding)\s+(\S+)\s+v([\d.]+)(?:\s*->\s*v([\d.]+))?" +) + + +class CargoProcessor(Processor): + priority = 22 + hook_patterns = [ + r"^cargo\s+(build|check|doc|update|bench)\b", + ] + + @property + def name(self) -> str: + return "cargo" + + def can_handle(self, command: str) -> bool: + if re.search(r"\bcargo\s+(test|clippy)\b", command): + return False + return bool(_CARGO_CMD_RE.search(command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + m = _CARGO_CMD_RE.search(command) + if not m: + return output + + subcmd = m.group(1) + if subcmd in ("build", "check"): + return self._process_cargo_build(output) + if subcmd == "doc": + return self._process_cargo_doc(output) + if subcmd == "update": + return self._process_cargo_update(output) + if subcmd == "bench": + return self._process_cargo_bench(output) + return output + + def _categorize_warning(self, msg: str) -> str: + if "unused variable" in msg: + return "unused_variable" + if "unused import" in msg: + return "unused_import" + if "dead_code" in msg or "never read" in msg or "never used" in msg: + return "dead_code" + if "does not need to be mutable" in msg: + return "unused_mut" + if "lifetime" in msg: + return "lifetime" + if "borrow" in msg: + return "borrow_checker" + m = re.search(r"\[(\w+(?:::\w+)*)\]", msg) + return m.group(1) if m else "other" + + def _process_cargo_build(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + compiling_count = 0 + downloading_count = 0 + + warnings_by_type: dict[str, list[list[str]]] = defaultdict(list) + current_block: list[str] = [] + current_type: str | None = None + in_error = False + error_blocks: list[list[str]] = [] + current_error: list[str] = [] + finished_lines: list[str] = [] + warning_summary_lines: list[str] = [] + + for line in lines: + stripped = line.strip() + + if _COMPILING_RE.match(stripped): + compiling_count += 1 + continue + if _DOWNLOADING_RE.match(stripped): + downloading_count += 1 + continue + + # Error start + if _ERROR_START_RE.match(stripped): + # Flush current warning block + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + current_block = [] + current_type = None + # Start error block + if in_error and current_error: + error_blocks.append(current_error) + in_error = True + current_error = [line] + continue + + # Warning start + wm = _WARNING_START_RE.match(stripped) + if wm and not _WARNING_SUMMARY_RE.match(stripped): + # Flush previous + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + + rule = wm.group(1) or "" + msg = wm.group(2) + current_type = self._categorize_warning(rule + " " + msg) + current_block = [line] + continue + + if _WARNING_SUMMARY_RE.match(stripped): + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + current_block = [] + current_type = None + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + warning_summary_lines.append(line) + continue + + if _FINISHED_RE.match(stripped): + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + current_block = [] + current_type = None + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + finished_lines.append(line) + continue + + # Context lines (spans, code, etc.) + if in_error: + current_error.append(line) + elif current_type: + current_block.append(line) + + # Flush remaining + if in_error and current_error: + error_blocks.append(current_error) + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + + # Build compressed output + if downloading_count > 0: + result.append(f"[{downloading_count} crates downloaded]") + if compiling_count > 0: + result.append(f"[{compiling_count} crates compiled]") + + # All errors (kept in full) + for block in error_blocks: + result.extend(block) + + # Grouped warnings + example_count = config.get("cargo_warning_example_count") + group_threshold = config.get("cargo_warning_group_threshold") + for wtype, blocks in sorted(warnings_by_type.items(), key=lambda x: -len(x[1])): + count = len(blocks) + if count >= group_threshold: + result.append(f"warning: {wtype} ({count} occurrences)") + for block in blocks[:example_count]: + result.extend(f" {l}" for l in block) + if count > example_count: + result.append(f" ... ({count - example_count} more)") + else: + for block in blocks: + result.extend(block) + + result.extend(warning_summary_lines) + result.extend(finished_lines) + + return "\n".join(result) if result else output + + def _process_cargo_doc(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + compiling_count = 0 + documenting_count = 0 + + for line in lines: + stripped = line.strip() + if _COMPILING_RE.match(stripped): + compiling_count += 1 + elif _DOCUMENTING_RE.match(stripped): + documenting_count += 1 + elif _FINISHED_RE.match(stripped): + result.append(line) + elif re.match(r"^\s*Generated\s+", stripped): + result.append(line) + elif re.search(r"\bwarning\b", stripped): + result.append(line) + elif _ERROR_START_RE.match(stripped): + result.append(line) + elif _SPAN_LINE_RE.match(stripped) and result: + result.append(line) + + summary_parts = [] + if compiling_count > 0: + summary_parts.append(f"{compiling_count} compiled") + if documenting_count > 0: + summary_parts.append(f"{documenting_count} documented") + if summary_parts: + result.insert(0, f"[{', '.join(summary_parts)}]") + + return "\n".join(result) if result else output + + def _process_cargo_update(self, output: str) -> str: + lines = output.splitlines() + updates: list[str] = [] + major_bumps: list[str] = [] + additions: list[str] = [] + removals: list[str] = [] + + for line in lines: + m = _UPDATE_LINE_RE.match(line.strip()) + if m: + action, pkg, old_ver, new_ver = m.groups() + if action == "Adding": + additions.append(f" + {pkg} v{old_ver}") + elif action == "Removing": + removals.append(f" - {pkg} v{old_ver}") + elif action == "Updating" and new_ver: + old_major = old_ver.split(".")[0] + new_major = new_ver.split(".")[0] + if old_major != new_major: + major_bumps.append(f" {pkg}: v{old_ver} -> v{new_ver} (MAJOR)") + else: + updates.append(pkg) + + result = [] + total = len(updates) + len(major_bumps) + result.append(f"[{total} dependencies updated]") + + if major_bumps: + result.append("Major version bumps:") + result.extend(major_bumps) + + if updates: + result.append(f"Minor/patch updates: {len(updates)} packages") + + if additions: + result.append("Added:") + result.extend(additions) + if removals: + result.append("Removed:") + result.extend(removals) + + return "\n".join(result) if result else output + + def _process_cargo_bench(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + compiling_count = 0 + + for line in lines: + stripped = line.strip() + if _COMPILING_RE.match(stripped): + compiling_count += 1 + elif _RUNNING_RE.match(stripped): + continue + elif re.match(r"^test\s+.+\s+bench:", stripped): + result.append(line) + elif re.match(r"^test result:", stripped): + result.append(line) + elif _FINISHED_RE.match(stripped): + result.append(line) + elif _ERROR_START_RE.match(stripped): + result.append(line) + + if compiling_count > 0: + result.insert(0, f"[{compiling_count} crates compiled]") + + return "\n".join(result) if result else output diff --git a/src/processors/go.py b/src/processors/go.py new file mode 100644 index 0000000..e0204d4 --- /dev/null +++ b/src/processors/go.py @@ -0,0 +1,181 @@ +"""Go processor: go build, vet, mod, generate, install.""" + +import re +from collections import defaultdict + +from .. import config +from .base import Processor + +_GO_CMD_RE = re.compile(r"\bgo\s+(build|vet|mod|generate|install)\b") +_GO_ERROR_RE = re.compile(r"^(\S+\.go):(\d+):(\d+):\s+(.+)$") +_GO_PACKAGE_RE = re.compile(r"^#\s+(\S+)") +_GO_DOWNLOADING_RE = re.compile(r"^go:\s+downloading\s+(\S+)\s+v") +_GO_MOD_ACTION_RE = re.compile(r"^go:\s+(added|upgraded|downgraded|removed)\s+") +_GO_GENERATE_RUN_RE = re.compile(r"^(\S+\.go):\d+:\s+running\s+") + + +class GoProcessor(Processor): + priority = 23 + hook_patterns = [ + r"^go\s+(build|vet|mod|generate|install)\b", + ] + + @property + def name(self) -> str: + return "go" + + def can_handle(self, command: str) -> bool: + if re.search(r"\bgo\s+test\b", command): + return False + if re.search(r"\bgolangci-lint\b", command): + return False + return bool(_GO_CMD_RE.search(command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + m = _GO_CMD_RE.search(command) + if not m: + return output + + subcmd = m.group(1) + if subcmd == "build": + return self._process_go_build(output) + if subcmd == "install": + return self._process_go_build(output) + if subcmd == "vet": + return self._process_go_vet(output) + if subcmd == "mod": + return self._process_go_mod(output) + if subcmd == "generate": + return self._process_go_generate(output) + return output + + def _process_go_build(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + package_lines: list[str] = [] + error_lines: list[str] = [] + has_errors = False + + for line in lines: + stripped = line.strip() + + if _GO_ERROR_RE.match(stripped): + has_errors = True + error_lines.append(line) + elif _GO_PACKAGE_RE.match(stripped): + package_lines.append(line) + elif stripped and has_errors: + # Context lines after an error (e.g., code snippet, notes) + error_lines.append(line) + + if not has_errors: + # No errors but output exists — could be warnings or linker errors + return output + + # For multi-package builds, keep package headers + if len(package_lines) > 1: + result.extend(package_lines[:3]) + if len(package_lines) > 3: + result.append(f"... ({len(package_lines) - 3} more packages)") + + result.extend(error_lines) + return "\n".join(result) if result else output + + def _categorize_vet_warning(self, msg: str) -> str: + msg_lower = msg.lower() + if "printf" in msg_lower: + return "printf" + if "unreachable" in msg_lower: + return "unreachable" + if "shadow" in msg_lower: + return "shadow" + if "unused" in msg_lower: + return "unused" + if "nil" in msg_lower: + return "nil" + if "loop" in msg_lower: + return "loop" + return "other" + + def _process_go_vet(self, output: str) -> str: + lines = output.splitlines() + warnings_by_type: dict[str, list[str]] = defaultdict(list) + package_lines: list[str] = [] + + for line in lines: + stripped = line.strip() + m = _GO_ERROR_RE.match(stripped) + if m: + msg = m.group(4) + wtype = self._categorize_vet_warning(msg) + warnings_by_type[wtype].append(line) + elif _GO_PACKAGE_RE.match(stripped): + package_lines.append(line) + + if not warnings_by_type: + return output + + example_count = config.get("lint_example_count") + group_threshold = config.get("lint_group_threshold") + result: list[str] = [] + + if package_lines: + result.extend(package_lines[:2]) + + for wtype, warnings in sorted(warnings_by_type.items(), key=lambda x: -len(x[1])): + count = len(warnings) + if count >= group_threshold: + result.append(f"{wtype}: {count} warnings") + for w in warnings[:example_count]: + result.append(f" {w}") + if count > example_count: + result.append(f" ... ({count - example_count} more)") + else: + result.extend(warnings) + + return "\n".join(result) if result else output + + def _process_go_mod(self, output: str) -> str: + lines = output.splitlines() + download_count = 0 + action_lines: list[str] = [] + other_lines: list[str] = [] + + for line in lines: + stripped = line.strip() + if _GO_DOWNLOADING_RE.match(stripped): + download_count += 1 + elif _GO_MOD_ACTION_RE.match(stripped): + action_lines.append(line) + elif stripped: + other_lines.append(line) + + result: list[str] = [] + if download_count > 0: + result.append(f"[{download_count} packages downloaded]") + result.extend(action_lines) + result.extend(other_lines) + + return "\n".join(result) if result else output + + def _process_go_generate(self, output: str) -> str: + lines = output.splitlines() + generate_count = 0 + result: list[str] = [] + + for line in lines: + stripped = line.strip() + if _GO_GENERATE_RUN_RE.match(stripped): + generate_count += 1 + elif re.search(r"\b(error|Error|ERROR)\b", stripped): + result.append(line) + elif stripped: + result.append(line) + + if generate_count > 0: + result.insert(0, f"[{generate_count} generators ran]") + + return "\n".join(result) if result else output diff --git a/src/processors/jq_yq.py b/src/processors/jq_yq.py new file mode 100644 index 0000000..40aeeed --- /dev/null +++ b/src/processors/jq_yq.py @@ -0,0 +1,156 @@ +"""JQ/YQ processor: compress large JSON and YAML outputs.""" + +import json +import re + +from .. import config +from .base import Processor +from .utils import compress_json_value + +_JQ_RE = re.compile(r"\bjq\b") +_YQ_RE = re.compile(r"\byq\b") + + +class JqYqProcessor(Processor): + priority = 44 + hook_patterns = [ + r"^(jq|yq)\b", + ] + + @property + def name(self) -> str: + return "jq_yq" + + def can_handle(self, command: str) -> bool: + return bool(re.search(r"\b(jq|yq)\b", command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + threshold = config.get("jq_passthrough_threshold") + if len(lines) <= threshold: + return output + + if _JQ_RE.search(command): + return self._process_jq(output, lines) + return self._process_yq(output, lines) + + def _process_jq(self, output: str, lines: list[str]) -> str: + # Try parsing as a single JSON document + try: + data = json.loads(output.strip()) + compressed = compress_json_value(data, max_depth=4) + result = json.dumps(compressed, indent=2) + if len(result) < len(output): + return result + f"\n({len(lines)} lines compressed)" + return output + except (json.JSONDecodeError, ValueError): + pass + + # Streaming mode: one JSON value per line + return self._process_streaming_json(lines) + + def _process_streaming_json(self, lines: list[str]) -> str: + # Try to detect repeated structure + structures: list[str] = [] + for line in lines[:5]: + try: + obj = json.loads(line.strip()) + if isinstance(obj, dict): + structures.append(",".join(sorted(obj.keys()))) + except (json.JSONDecodeError, ValueError): + break + + # If all parsed lines have the same keys, it's a repeated structure + if len(structures) >= 3 and len(set(structures)) == 1: + result = [] + for line in lines[:3]: + result.append(line) + result.append(f"... ({len(lines) - 3} more items with same structure)") + return "\n".join(result) + + # Fallback: head + tail + keep_head = 20 + keep_tail = 10 + if len(lines) <= keep_head + keep_tail: + return "\n".join(lines) + + result = lines[:keep_head] + result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") + result.extend(lines[-keep_tail:]) + return "\n".join(result) + + def _process_yq(self, output: str, lines: list[str]) -> str: + # Count top-level keys and list items + top_level_keys = 0 + list_items = 0 + for line in lines: + if line and not line[0].isspace() and line.rstrip().endswith(":"): + top_level_keys += 1 + elif re.match(r"^- ", line) or re.match(r"^ - ", line): + list_items += 1 + + # Collapse large arrays (lines starting with "- " at consistent indent) + result: list[str] = [] + array_count = 0 + array_indent: int | None = None + array_start_idx = 0 + + for i, line in enumerate(lines): + m = re.match(r"^(\s*)- ", line) + if m: + indent = len(m.group(1)) + if array_indent is None: + array_indent = indent + array_count = 1 + array_start_idx = len(result) + result.append(line) + elif indent == array_indent: + array_count += 1 + if array_count <= 3: + result.append(line) + elif array_count == 4: + result.append(f"{' ' * indent} ... ({array_count} items so far)") + # else: skip, we'll update the count later + else: + # Different indent — nested item, keep if in visible range + if array_count <= 3: + result.append(line) + else: + # Non-array line — flush array count if needed + if array_count > 3: + # Update the "so far" placeholder with final count + for j in range(len(result) - 1, -1, -1): + if "items so far" in result[j] or "items total" in result[j]: + indent_str = " " * (array_indent or 0) + result[j] = f"{indent_str} ... ({array_count} items total)" + break + array_count = 0 + array_indent = None + result.append(line) + + # Final flush + if array_count > 3: + for j in range(len(result) - 1, -1, -1): + if "items so far" in result[j] or "items total" in result[j]: + indent_str = " " * (array_indent or 0) + result[j] = f"{indent_str} ... ({array_count} items total)" + break + + compressed = "\n".join(result) + if len(compressed) < len(output): + summary = f"--- ({len(lines)} lines" + if top_level_keys > 0: + summary += f", {top_level_keys} top-level keys" + summary += ") ---" + return summary + "\n" + compressed + + # Fallback: head + tail + keep_head = 20 + keep_tail = 10 + result_lines = lines[:keep_head] + result_lines.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") + result_lines.extend(lines[-keep_tail:]) + return "\n".join(result_lines) diff --git a/src/processors/ssh.py b/src/processors/ssh.py new file mode 100644 index 0000000..fe857f0 --- /dev/null +++ b/src/processors/ssh.py @@ -0,0 +1,74 @@ +"""SSH processor: non-interactive SSH and SCP commands.""" + +import re + +from .base import Processor +from .utils import compress_log_lines + +_SSH_NON_INTERACTIVE_RE = re.compile( + r"""\bssh\s+.+\s+['"]""" +) +_SCP_RE = re.compile(r"\bscp\b") +_SCP_PROGRESS_RE = re.compile(r"^\s*\S+\s+\d+%") + + +class SshProcessor(Processor): + priority = 43 + hook_patterns = [ + r"^ssh\s+.+\s+['\"]", + r"^scp\b", + ] + + @property + def name(self) -> str: + return "ssh" + + def can_handle(self, command: str) -> bool: + if _SCP_RE.search(command): + return True + if re.search(r"\bssh\b", command): + return bool(_SSH_NON_INTERACTIVE_RE.search(command)) + return False + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + if _SCP_RE.search(command): + return self._process_scp(output) + return self._process_ssh_remote(output) + + def _process_ssh_remote(self, output: str) -> str: + lines = output.splitlines() + return compress_log_lines(lines, keep_head=10, keep_tail=20) + + def _process_scp(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + current_file: str | None = None + last_progress: str | None = None + + for line in lines: + stripped = line.strip() + + if _SCP_PROGRESS_RE.match(stripped): + # Track the file from the progress line + parts = stripped.split() + if parts: + current_file = parts[0] + last_progress = line + elif re.search(r"\b(error|Error|ERROR|denied|refused|No such)\b", stripped): + result.append(line) + elif stripped and not _SCP_PROGRESS_RE.match(stripped): + # Flush last progress for previous file + if last_progress: + result.append(last_progress) + last_progress = None + current_file = None + result.append(line) + + # Flush final progress line + if last_progress: + result.append(last_progress) + + return "\n".join(result) if result else output diff --git a/tests/test_engine.py b/tests/test_engine.py index 183026a..70411f1 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -210,9 +210,9 @@ class TestProcessorRegistry: """Tests for auto-discovery and the processor registry.""" def test_discover_processors_finds_all(self): - """Auto-discovery should find all 21 processors.""" + """Auto-discovery should find all 25 processors.""" processors = discover_processors() - assert len(processors) == 21 + assert len(processors) == 25 def test_discover_processors_sorted_by_priority(self): """Processors must be returned in ascending priority order.""" @@ -245,6 +245,8 @@ def test_expected_priority_order(self): assert name_to_priority["package_list"] == 15 assert name_to_priority["git"] == 20 assert name_to_priority["test"] == 21 + assert name_to_priority["cargo"] == 22 + assert name_to_priority["go"] == 23 assert name_to_priority["build"] == 25 assert name_to_priority["lint"] == 27 assert name_to_priority["network"] == 30 @@ -260,6 +262,8 @@ def test_expected_priority_order(self): assert name_to_priority["ansible"] == 40 assert name_to_priority["helm"] == 41 assert name_to_priority["syslog"] == 42 + assert name_to_priority["ssh"] == 43 + assert name_to_priority["jq_yq"] == 44 assert name_to_priority["file_listing"] == 50 assert name_to_priority["file_content"] == 51 assert name_to_priority["generic"] == 999 @@ -398,6 +402,22 @@ def test_collect_hook_patterns_covers_key_commands(self): # Syslog "journalctl -u nginx", "dmesg", + # Cargo (dedicated processor) + "cargo doc", + "cargo update", + "cargo bench", + # Go (dedicated processor) + "go build ./...", + "go vet ./...", + "go mod tidy", + "go generate ./...", + "go install ./cmd/...", + # JQ/YQ + "jq . file.json", + "yq . config.yaml", + # SSH/SCP (non-interactive) + "ssh host 'ls -la'", + "scp file.txt host:/tmp/", ] for cmd in test_commands: @@ -412,3 +432,22 @@ def test_engine_uses_discovered_processors(self): for ep, dp in zip(engine.processors, discovered, strict=False): assert ep.name == dp.name assert ep.priority == dp.priority + + +class TestProcessorChaining: + """Tests for multi-processor chaining infrastructure.""" + + def setup_method(self): + self.engine = CompressionEngine() + + def test_chain_to_attribute_default_none(self): + for p in self.engine.processors: + assert p.chain_to is None + + def test_processor_by_name_lookup(self): + assert "git" in self.engine._by_name + assert "build" in self.engine._by_name + assert "cargo" in self.engine._by_name + assert "go" in self.engine._by_name + assert "ssh" in self.engine._by_name + assert "jq_yq" in self.engine._by_name diff --git a/tests/test_hooks.py b/tests/test_hooks.py index bc4b18a..dc070b8 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -511,3 +511,70 @@ def test_single_segment_after_split(self): def test_three_segment_chain(self): assert is_compressible("cd /a && cd /b && git status") assert is_compressible("touch f && chmod 644 f && ls -la f") + + def test_cd_then_go_build(self): + assert is_compressible("cd /project && go build ./...") + + def test_cd_then_cargo_bench(self): + assert is_compressible("cd /project && cargo bench") + + +class TestNewProcessorHookPatterns: + """Tests for hook patterns of newly added processors.""" + + # --- Cargo --- + def test_cargo_doc_compressible(self): + assert is_compressible("cargo doc") + assert is_compressible("cargo doc --open") + + def test_cargo_update_compressible(self): + assert is_compressible("cargo update") + + def test_cargo_bench_compressible(self): + assert is_compressible("cargo bench") + + def test_cargo_build_still_compressible(self): + assert is_compressible("cargo build") + assert is_compressible("cargo build --release") + assert is_compressible("cargo check") + + # --- Go --- + def test_go_build_compressible(self): + assert is_compressible("go build ./...") + assert is_compressible("go build -o myapp ./cmd/server") + + def test_go_vet_compressible(self): + assert is_compressible("go vet ./...") + + def test_go_mod_compressible(self): + assert is_compressible("go mod tidy") + assert is_compressible("go mod download") + + def test_go_generate_compressible(self): + assert is_compressible("go generate ./...") + + def test_go_install_compressible(self): + assert is_compressible("go install ./cmd/...") + + # --- SSH non-interactive --- + def test_ssh_non_interactive_compressible(self): + assert is_compressible("ssh host 'ls -la'") + assert is_compressible('ssh host "uname -a"') + assert is_compressible("ssh -o StrictHostKeyChecking=no host 'uptime'") + + def test_ssh_interactive_still_excluded(self): + assert not is_compressible("ssh host") + assert not is_compressible("ssh -p 22 host") + + def test_scp_compressible(self): + assert is_compressible("scp file.txt host:/tmp/") + assert is_compressible("scp -r dir/ user@host:/path/") + + # --- JQ/YQ --- + def test_jq_compressible(self): + assert is_compressible("jq . file.json") + assert is_compressible("jq '.items[]' data.json") + + def test_yq_compressible(self): + assert is_compressible("yq . config.yaml") + assert is_compressible("yq eval '.spec' deployment.yaml") diff --git a/tests/test_precision.py b/tests/test_precision.py index e649e66..a32bfa4 100644 --- a/tests/test_precision.py +++ b/tests/test_precision.py @@ -1024,3 +1024,114 @@ def test_poetry_lock_all_packages_listed(self): assert "40 packages" in compressed assert "lib-0" in compressed assert "lib-39" in compressed + + +class TestCargoPrecision: + def setup_method(self): + self.engine = CompressionEngine() + + def test_cargo_build_preserves_all_errors_with_spans(self): + lines = [f" Compiling dep-{i} v1.0.{i}" for i in range(100)] + lines.extend([ + "error[E0308]: mismatched types", + " --> src/main.rs:10:5", + " |", + "10 | let x: i32 = \"hello\";", + " | ^^^^^^^ expected i32, found &str", + "", + "error[E0425]: cannot find value `y`", + " --> src/lib.rs:20:10", + " |", + "20 | y + 1", + " | ^ not found in this scope", + ]) + output = "\n".join(lines) + compressed, proc, was_compressed = self.engine.compress("cargo build", output) + assert was_compressed + assert proc == "cargo" + assert "mismatched types" in compressed + assert "src/main.rs:10:5" in compressed + assert "expected i32" in compressed + assert "cannot find value" in compressed + assert "src/lib.rs:20:10" in compressed + assert "Compiling dep-" not in compressed + + def test_cargo_build_preserves_warning_types(self): + warnings = [] + for i in range(10): + warnings.extend([ + f"warning: unused variable: `var{i}`", + f" --> src/file{i}.rs:{i + 1}:5", + "", + ]) + for i in range(5): + warnings.extend([ + f"warning: unused import: `mod{i}`", + f" --> src/lib.rs:{i + 10}:5", + "", + ]) + warnings.append("warning: `myapp` (lib) generated 15 warnings") + warnings.append(" Finished dev [unoptimized + debuginfo] target(s)") + output = "\n".join(warnings) + compressed, proc, was_compressed = self.engine.compress("cargo build", output) + assert was_compressed + assert proc == "cargo" + assert "unused_variable" in compressed + assert "unused_import" in compressed + assert "Finished" in compressed + + +class TestGoPrecision: + def setup_method(self): + self.engine = CompressionEngine() + + def test_go_build_preserves_all_errors(self): + # Need enough package headers to trigger compression (multi-package build) + lines = [f"# myapp/pkg/module{i}" for i in range(10)] + lines.extend([ + "# myapp/pkg/handler", + "pkg/handler/main.go:15:2: undefined: DoSomething", + "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", + "# myapp/pkg/db", + "pkg/db/conn.go:5:3: imported and not used: \"fmt\"", + ]) + output = "\n".join(lines) + compressed, proc, was_compressed = self.engine.compress("go build ./...", output) + assert was_compressed + assert proc == "go" + assert "undefined: DoSomething" in compressed + assert "main.go:15:2" in compressed + assert "main.go:20:10" in compressed + assert "conn.go:5:3" in compressed + + def test_go_mod_tidy_preserves_additions(self): + lines = [f"go: downloading github.com/pkg/dep{i} v1.0.{i}" for i in range(50)] + lines.append("go: added github.com/new/important v1.0.0") + lines.append("go: removed github.com/old/unused v0.5.0") + output = "\n".join(lines) + compressed, proc, was_compressed = self.engine.compress("go mod tidy", output) + assert was_compressed + assert proc == "go" + assert "added" in compressed + assert "removed" in compressed + assert "50 packages downloaded" in compressed + + +class TestJqPrecision: + def setup_method(self): + self.engine = CompressionEngine() + + def test_jq_preserves_top_level_structure(self): + import json + data = { + "users": [{"id": i, "name": f"user-{i}", "email": f"user{i}@test.com"} for i in range(50)], + "metadata": {"total": 50, "page": 1}, + "status": "ok", + } + output = json.dumps(data, indent=2) + compressed, proc, was_compressed = self.engine.compress("jq . data.json", output) + assert was_compressed + assert proc == "jq_yq" + assert "users" in compressed + assert "metadata" in compressed + assert "status" in compressed diff --git a/tests/test_processors.py b/tests/test_processors.py index 89fc10a..224ceeb 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -27,6 +27,10 @@ from src.processors.system_info import SystemInfoProcessor from src.processors.terraform import TerraformProcessor from src.processors.test_output import TestOutputProcessor +from src.processors.cargo import CargoProcessor +from src.processors.go import GoProcessor +from src.processors.jq_yq import JqYqProcessor +from src.processors.ssh import SshProcessor class TestGitProcessor: @@ -678,7 +682,7 @@ def setup_method(self): def test_can_handle_build_commands(self): assert self.p.can_handle("npm run build") - assert self.p.can_handle("cargo build") + assert not self.p.can_handle("cargo build") # handled by CargoProcessor assert self.p.can_handle("make") assert self.p.can_handle("pip install -r requirements.txt") assert self.p.can_handle("yarn add lodash") @@ -3496,3 +3500,382 @@ def test_dmesg_no_errors_truncated(self): # Tail preserved assert "Normal kernel message 199" in result assert len(result) < len(output) + + +# --- Cargo Processor --- + + +class TestCargoProcessor: + def setup_method(self): + self.p = CargoProcessor() + + def test_can_handle_cargo_commands(self): + assert self.p.can_handle("cargo build") + assert self.p.can_handle("cargo check") + assert self.p.can_handle("cargo build --release") + assert self.p.can_handle("cargo doc") + assert self.p.can_handle("cargo doc --open") + assert self.p.can_handle("cargo update") + assert self.p.can_handle("cargo bench") + + def test_cannot_handle_cargo_test(self): + assert not self.p.can_handle("cargo test") + + def test_cannot_handle_cargo_clippy(self): + assert not self.p.can_handle("cargo clippy") + + def test_cannot_handle_non_cargo(self): + assert not self.p.can_handle("npm run build") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("cargo build", "") == "" + + def test_cargo_build_collapses_compiling(self): + lines = [f" Compiling dep-{i} v1.{i}.0" for i in range(50)] + lines.append(" Finished dev [unoptimized + debuginfo] target(s) in 12.34s") + output = "\n".join(lines) + result = self.p.process("cargo build", output) + assert "50 crates compiled" in result + assert "Finished" in result + assert "Compiling dep-" not in result + + def test_cargo_build_preserves_all_errors(self): + output = "\n".join([ + " Compiling myapp v0.1.0", + "error[E0308]: mismatched types", + " --> src/main.rs:10:5", + " |", + "10 | let x: i32 = \"hello\";", + " | ^^^^^^^ expected i32, found &str", + "", + "error: aborting due to previous error", + ]) + result = self.p.process("cargo build", output) + assert "mismatched types" in result + assert "src/main.rs:10:5" in result + assert "expected i32" in result + assert "aborting" in result + + def test_cargo_build_groups_warnings_by_type(self): + warnings = [] + for i in range(6): + warnings.extend([ + f"warning: unused variable: `x{i}`", + f" --> src/file{i}.rs:{i + 1}:5", + f" |", + f"{i + 1} | let x{i} = 42;", + f" | ^^ help: if this is intentional, prefix it with an underscore", + "", + ]) + warnings.append("warning: `myapp` (lib) generated 6 warnings") + warnings.append(" Finished dev [unoptimized + debuginfo] target(s)") + output = "\n".join(warnings) + result = self.p.process("cargo build", output) + assert "unused_variable" in result + assert "6 occurrences" in result + assert "Finished" in result + + def test_cargo_build_keeps_finished_line(self): + output = "\n".join([ + " Compiling myapp v0.1.0", + " Finished dev [unoptimized + debuginfo] target(s) in 2.34s", + ]) + result = self.p.process("cargo build", output) + assert "Finished" in result + + def test_cargo_build_mixed_errors_and_warnings(self): + output = "\n".join([ + " Compiling myapp v0.1.0", + "warning: unused import: `std::io`", + " --> src/main.rs:1:5", + "", + "error[E0425]: cannot find value `x`", + " --> src/main.rs:5:5", + " |", + "5 | println!(\"{}\", x);", + " | ^ not found", + "", + "error: aborting due to previous error", + ]) + result = self.p.process("cargo build", output) + assert "cannot find value" in result + assert "src/main.rs:5:5" in result + + def test_cargo_doc_collapses_documenting(self): + lines = [f" Documenting dep-{i} v1.{i}.0" for i in range(20)] + lines.append(" Finished `doc` profile [unoptimized]") + lines.append(" Generated /target/doc/myapp/index.html") + output = "\n".join(lines) + result = self.p.process("cargo doc", output) + assert "20 documented" in result + assert "Finished" in result + assert "Generated" in result + assert "Documenting dep-" not in result + + def test_cargo_update_shows_major_bumps(self): + output = "\n".join([ + " Updating serde v1.0.0 -> v2.0.0", + " Updating tokio v1.28.0 -> v1.29.0", + " Updating rand v0.8.0 -> v0.8.1", + " Adding new-dep v0.1.0", + " Removing old-dep v0.5.0", + ]) + result = self.p.process("cargo update", output) + assert "MAJOR" in result + assert "serde" in result + assert "new-dep" in result + assert "old-dep" in result + + def test_cargo_update_collapses_patch_bumps(self): + lines = [f" Updating dep-{i} v1.0.{i} -> v1.0.{i + 1}" for i in range(20)] + output = "\n".join(lines) + result = self.p.process("cargo update", output) + assert "20 dependencies updated" in result + assert "Minor/patch updates: 20" in result + + def test_cargo_bench_keeps_results(self): + output = "\n".join([ + " Compiling myapp v0.1.0", + " Compiling myapp-bench v0.1.0", + " Running benches/bench.rs", + "test bench_add ... bench: 10 ns/iter (+/- 2)", + "test bench_multiply ... bench: 25 ns/iter (+/- 5)", + "test result: ok. 2 passed; 0 failed; 0 ignored; 2 measured", + ]) + result = self.p.process("cargo bench", output) + assert "bench_add" in result + assert "bench_multiply" in result + assert "10 ns/iter" in result + assert "test result:" in result + assert "2 crates compiled" in result + assert "Running" not in result + + def test_cargo_build_downloading_and_compiling(self): + lines = [f" Downloading dep-{i} v1.0.{i}" for i in range(10)] + lines.extend([f" Compiling dep-{i} v1.0.{i}" for i in range(10)]) + lines.append(" Finished dev [unoptimized] target(s) in 30.5s") + output = "\n".join(lines) + result = self.p.process("cargo build", output) + assert "10 crates downloaded" in result + assert "10 crates compiled" in result + assert "Finished" in result + + +# --- Go Processor --- + + +class TestGoProcessor: + def setup_method(self): + self.p = GoProcessor() + + def test_can_handle_go_commands(self): + assert self.p.can_handle("go build ./...") + assert self.p.can_handle("go build -o myapp ./cmd/server") + assert self.p.can_handle("go vet ./...") + assert self.p.can_handle("go mod tidy") + assert self.p.can_handle("go mod download") + assert self.p.can_handle("go generate ./...") + assert self.p.can_handle("go install ./cmd/...") + + def test_cannot_handle_go_test(self): + assert not self.p.can_handle("go test ./...") + + def test_cannot_handle_golangci_lint(self): + assert not self.p.can_handle("golangci-lint run") + + def test_cannot_handle_non_go(self): + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("go build ./...", "") == "" + + def test_go_build_preserves_errors(self): + output = "\n".join([ + "# myapp/pkg/handler", + "pkg/handler/main.go:15:2: undefined: DoSomething", + "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", + ]) + result = self.p.process("go build ./...", output) + assert "undefined: DoSomething" in result + assert "main.go:15:2" in result + assert "main.go:20:10" in result + + def test_go_vet_groups_warnings(self): + warnings = [] + for i in range(6): + warnings.append(f"pkg/file{i}.go:{i + 1}:5: printf format %d has arg of wrong type") + for i in range(3): + warnings.append(f"pkg/util{i}.go:{i + 1}:3: unreachable code") + output = "\n".join(warnings) + result = self.p.process("go vet ./...", output) + assert "printf" in result + assert "6 warnings" in result + assert "unreachable" in result + + def test_go_mod_tidy_collapses_downloads(self): + lines = [f"go: downloading github.com/pkg/dep{i} v1.0.{i}" for i in range(30)] + lines.append("go: added github.com/new/pkg v0.5.0") + lines.append("go: removed github.com/old/pkg v1.0.0") + output = "\n".join(lines) + result = self.p.process("go mod tidy", output) + assert "30 packages downloaded" in result + assert "added" in result + assert "removed" in result + assert "downloading" not in result + + def test_go_mod_keeps_added_removed(self): + output = "\n".join([ + "go: downloading github.com/pkg/a v1.0.0", + "go: added github.com/pkg/a v1.0.0", + "go: upgraded github.com/pkg/b v1.0.0 => v1.1.0", + ]) + result = self.p.process("go mod tidy", output) + assert "added" in result + assert "upgraded" in result + + def test_go_generate_collapses_running(self): + lines = [f"main.go:{i}: running mockgen" for i in range(10)] + lines.append("Generated output.go") + output = "\n".join(lines) + result = self.p.process("go generate ./...", output) + assert "10 generators ran" in result + assert "Generated output.go" in result + + def test_go_generate_keeps_errors(self): + output = "\n".join([ + "main.go:5: running stringer", + "error: stringer: can't find type Foo", + ]) + result = self.p.process("go generate ./...", output) + assert "error" in result + assert "can't find type Foo" in result + + def test_go_install_delegates_to_build(self): + output = "\n".join([ + "# myapp/cmd/server", + "cmd/server/main.go:10:5: undefined: handler.New", + ]) + result = self.p.process("go install ./cmd/...", output) + assert "undefined: handler.New" in result + + +# --- SSH Processor --- + + +class TestSshProcessor: + def setup_method(self): + self.p = SshProcessor() + + def test_can_handle_non_interactive_ssh(self): + assert self.p.can_handle("ssh host 'ls -la'") + assert self.p.can_handle('ssh host "uname -a"') + assert self.p.can_handle("ssh -o StrictHostKeyChecking=no host 'uptime'") + + def test_cannot_handle_interactive_ssh(self): + assert not self.p.can_handle("ssh host") + assert not self.p.can_handle("ssh -p 22 host") + + def test_can_handle_scp(self): + assert self.p.can_handle("scp file.txt host:/tmp/") + assert self.p.can_handle("scp -r dir/ user@host:/path/") + + def test_empty_output(self): + assert self.p.process("ssh host 'ls'", "") == "" + + def test_ssh_large_output_compressed(self): + lines = [f"line {i}: some output data" for i in range(200)] + output = "\n".join(lines) + result = self.p.process("ssh host 'ls -la'", output) + assert len(result) < len(output) + # Head preserved + assert "line 0" in result + # Tail preserved + assert "line 199" in result + + def test_ssh_preserves_errors(self): + lines = [f"data line {i}" for i in range(50)] + lines.insert(25, "ERROR: connection failed at step 25") + output = "\n".join(lines) + result = self.p.process("ssh host 'run_job'", output) + assert "ERROR: connection failed" in result + + def test_scp_collapses_progress(self): + output = "\n".join([ + "file1.tar.gz 10% 24MB 12.3MB/s 00:02", + "file1.tar.gz 50% 120MB 12.3MB/s 00:10", + "file1.tar.gz 100% 245MB 12.3MB/s 00:20", + "file2.tar.gz 10% 10MB 5.0MB/s 00:01", + "file2.tar.gz 100% 100MB 5.0MB/s 00:20", + ]) + result = self.p.process("scp file1.tar.gz file2.tar.gz host:/tmp/", output) + assert len(result) < len(output) + + def test_scp_keeps_errors(self): + output = "\n".join([ + "file.txt 100% 1MB 1.0MB/s 00:01", + "scp: /remote/path: Permission denied", + ]) + result = self.p.process("scp file.txt host:/remote/path", output) + assert "Permission denied" in result + + +# --- JQ/YQ Processor --- + + +class TestJqYqProcessor: + def setup_method(self): + self.p = JqYqProcessor() + + def test_can_handle_jq(self): + assert self.p.can_handle("jq . file.json") + assert self.p.can_handle("jq '.items[]' data.json") + assert self.p.can_handle("jq -r .name file.json") + + def test_can_handle_yq(self): + assert self.p.can_handle("yq . config.yaml") + assert self.p.can_handle("yq eval '.spec' deployment.yaml") + + def test_cannot_handle_non_jq(self): + assert not self.p.can_handle("cat file.json") + assert not self.p.can_handle("grep pattern file") + + def test_empty_output(self): + assert self.p.process("jq . file.json", "") == "" + + def test_jq_small_output_passthrough(self): + data = {"key": "value", "count": 42} + import json + output = json.dumps(data, indent=2) + result = self.p.process("jq . file.json", output) + assert result == output + + def test_jq_large_json_compressed(self): + import json + data = [{"id": i, "name": f"item-{i}", "data": {"nested": "value" * 10}} for i in range(100)] + output = json.dumps(data, indent=2) + assert len(output.splitlines()) > 50 + result = self.p.process("jq . file.json", output) + assert len(result) < len(output) + + def test_jq_streaming_output(self): + import json + lines = [json.dumps({"id": i, "name": f"item-{i}"}) for i in range(100)] + output = "\n".join(lines) + result = self.p.process("jq -c '.[]' file.json", output) + assert len(result) < len(output) + assert "same structure" in result + + def test_yq_small_output_passthrough(self): + output = "key: value\ncount: 42\n" + result = self.p.process("yq . config.yaml", output) + assert result == output + + def test_yq_large_output_summarized(self): + lines = ["root:"] + for i in range(80): + lines.append(f"- name: item-{i}") + lines.append(f" value: {i}") + output = "\n".join(lines) + result = self.p.process("yq . config.yaml", output) + assert len(result) < len(output) From 4e2b085b4f8129ccb2cea4c9bc77ea7782de9e20 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Fri, 27 Mar 2026 08:57:38 +0100 Subject: [PATCH 2/6] fix: resolve all ruff lint errors in new processors and tests - E741: rename ambiguous variable l -> line - SIM114: combine if branches with same body using logical or - PERF402: use list() instead of append loop - F841: remove unused variables (array_start_idx, current_file) - ERA001: remove commented-out code - B007: remove unused loop variable - PLR5501: use elif instead of else+if - PERF203: extract try/except out of loop - I001: sort imports alphabetically - F541: remove extraneous f-prefix on strings without placeholders - E501: wrap long lines --- src/processors/cargo.py | 31 ++++++++++++++----------------- src/processors/go.py | 2 -- src/processors/jq_yq.py | 36 +++++++++++++++++------------------- src/processors/ssh.py | 7 ------- tests/test_engine.py | 1 - tests/test_precision.py | 5 ++++- tests/test_processors.py | 17 ++++++++++------- 7 files changed, 45 insertions(+), 54 deletions(-) diff --git a/src/processors/cargo.py b/src/processors/cargo.py index 27ca223..f40d52f 100644 --- a/src/processors/cargo.py +++ b/src/processors/cargo.py @@ -181,7 +181,7 @@ def _process_cargo_build(self, output: str) -> str: if count >= group_threshold: result.append(f"warning: {wtype} ({count} occurrences)") for block in blocks[:example_count]: - result.extend(f" {l}" for l in block) + result.extend(f" {line}" for line in block) if count > example_count: result.append(f" ... ({count - example_count} more)") else: @@ -205,15 +205,13 @@ def _process_cargo_doc(self, output: str) -> str: compiling_count += 1 elif _DOCUMENTING_RE.match(stripped): documenting_count += 1 - elif _FINISHED_RE.match(stripped): - result.append(line) - elif re.match(r"^\s*Generated\s+", stripped): - result.append(line) - elif re.search(r"\bwarning\b", stripped): - result.append(line) - elif _ERROR_START_RE.match(stripped): - result.append(line) - elif _SPAN_LINE_RE.match(stripped) and result: + elif ( + _FINISHED_RE.match(stripped) + or re.match(r"^\s*Generated\s+", stripped) + or re.search(r"\bwarning\b", stripped) + or _ERROR_START_RE.match(stripped) + or (_SPAN_LINE_RE.match(stripped) and result) + ): result.append(line) summary_parts = [] @@ -280,13 +278,12 @@ def _process_cargo_bench(self, output: str) -> str: compiling_count += 1 elif _RUNNING_RE.match(stripped): continue - elif re.match(r"^test\s+.+\s+bench:", stripped): - result.append(line) - elif re.match(r"^test result:", stripped): - result.append(line) - elif _FINISHED_RE.match(stripped): - result.append(line) - elif _ERROR_START_RE.match(stripped): + elif ( + re.match(r"^test\s+.+\s+bench:", stripped) + or re.match(r"^test result:", stripped) + or _FINISHED_RE.match(stripped) + or _ERROR_START_RE.match(stripped) + ): result.append(line) if compiling_count > 0: diff --git a/src/processors/go.py b/src/processors/go.py index e0204d4..07cb2be 100644 --- a/src/processors/go.py +++ b/src/processors/go.py @@ -170,8 +170,6 @@ def _process_go_generate(self, output: str) -> str: stripped = line.strip() if _GO_GENERATE_RUN_RE.match(stripped): generate_count += 1 - elif re.search(r"\b(error|Error|ERROR)\b", stripped): - result.append(line) elif stripped: result.append(line) diff --git a/src/processors/jq_yq.py b/src/processors/jq_yq.py index 40aeeed..e1d3976 100644 --- a/src/processors/jq_yq.py +++ b/src/processors/jq_yq.py @@ -52,26 +52,30 @@ def _process_jq(self, output: str, lines: list[str]) -> str: # Streaming mode: one JSON value per line return self._process_streaming_json(lines) + @staticmethod + def _parse_json_keys(line: str) -> str | None: + try: + obj = json.loads(line.strip()) + except (json.JSONDecodeError, ValueError): + return None + if isinstance(obj, dict): + return ",".join(sorted(obj.keys())) + return None + def _process_streaming_json(self, lines: list[str]) -> str: - # Try to detect repeated structure structures: list[str] = [] for line in lines[:5]: - try: - obj = json.loads(line.strip()) - if isinstance(obj, dict): - structures.append(",".join(sorted(obj.keys()))) - except (json.JSONDecodeError, ValueError): + keys = self._parse_json_keys(line) + if keys is None: break + structures.append(keys) # If all parsed lines have the same keys, it's a repeated structure if len(structures) >= 3 and len(set(structures)) == 1: - result = [] - for line in lines[:3]: - result.append(line) + result = list(lines[:3]) result.append(f"... ({len(lines) - 3} more items with same structure)") return "\n".join(result) - # Fallback: head + tail keep_head = 20 keep_tail = 10 if len(lines) <= keep_head + keep_tail: @@ -96,16 +100,14 @@ def _process_yq(self, output: str, lines: list[str]) -> str: result: list[str] = [] array_count = 0 array_indent: int | None = None - array_start_idx = 0 - for i, line in enumerate(lines): + for line in lines: m = re.match(r"^(\s*)- ", line) if m: indent = len(m.group(1)) if array_indent is None: array_indent = indent array_count = 1 - array_start_idx = len(result) result.append(line) elif indent == array_indent: array_count += 1 @@ -113,11 +115,8 @@ def _process_yq(self, output: str, lines: list[str]) -> str: result.append(line) elif array_count == 4: result.append(f"{' ' * indent} ... ({array_count} items so far)") - # else: skip, we'll update the count later - else: - # Different indent — nested item, keep if in visible range - if array_count <= 3: - result.append(line) + elif array_count <= 3: + result.append(line) else: # Non-array line — flush array count if needed if array_count > 3: @@ -147,7 +146,6 @@ def _process_yq(self, output: str, lines: list[str]) -> str: summary += ") ---" return summary + "\n" + compressed - # Fallback: head + tail keep_head = 20 keep_tail = 10 result_lines = lines[:keep_head] diff --git a/src/processors/ssh.py b/src/processors/ssh.py index fe857f0..121c972 100644 --- a/src/processors/ssh.py +++ b/src/processors/ssh.py @@ -45,26 +45,19 @@ def _process_ssh_remote(self, output: str) -> str: def _process_scp(self, output: str) -> str: lines = output.splitlines() result: list[str] = [] - current_file: str | None = None last_progress: str | None = None for line in lines: stripped = line.strip() if _SCP_PROGRESS_RE.match(stripped): - # Track the file from the progress line - parts = stripped.split() - if parts: - current_file = parts[0] last_progress = line elif re.search(r"\b(error|Error|ERROR|denied|refused|No such)\b", stripped): result.append(line) elif stripped and not _SCP_PROGRESS_RE.match(stripped): - # Flush last progress for previous file if last_progress: result.append(last_progress) last_progress = None - current_file = None result.append(line) # Flush final progress line diff --git a/tests/test_engine.py b/tests/test_engine.py index 70411f1..765a2b1 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -415,7 +415,6 @@ def test_collect_hook_patterns_covers_key_commands(self): # JQ/YQ "jq . file.json", "yq . config.yaml", - # SSH/SCP (non-interactive) "ssh host 'ls -la'", "scp file.txt host:/tmp/", ] diff --git a/tests/test_precision.py b/tests/test_precision.py index a32bfa4..abf68d8 100644 --- a/tests/test_precision.py +++ b/tests/test_precision.py @@ -1124,7 +1124,10 @@ def setup_method(self): def test_jq_preserves_top_level_structure(self): import json data = { - "users": [{"id": i, "name": f"user-{i}", "email": f"user{i}@test.com"} for i in range(50)], + "users": [ + {"id": i, "name": f"user-{i}", "email": f"user{i}@test.com"} + for i in range(50) + ], "metadata": {"total": 50, "page": 1}, "status": "ok", } diff --git a/tests/test_processors.py b/tests/test_processors.py index 224ceeb..30b4df9 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -8,6 +8,7 @@ from src.chain_utils import extract_primary_command from src.processors.ansible import AnsibleProcessor from src.processors.build_output import BuildOutputProcessor +from src.processors.cargo import CargoProcessor from src.processors.cloud_cli import CloudCliProcessor from src.processors.db_query import DbQueryProcessor from src.processors.docker import DockerProcessor @@ -17,20 +18,19 @@ from src.processors.generic import GenericProcessor from src.processors.gh import GhProcessor from src.processors.git import GitProcessor +from src.processors.go import GoProcessor from src.processors.helm import HelmProcessor +from src.processors.jq_yq import JqYqProcessor from src.processors.kubectl import KubectlProcessor from src.processors.lint_output import LintOutputProcessor from src.processors.network import NetworkProcessor from src.processors.package_list import PackageListProcessor from src.processors.search import SearchProcessor +from src.processors.ssh import SshProcessor from src.processors.syslog import SyslogProcessor from src.processors.system_info import SystemInfoProcessor from src.processors.terraform import TerraformProcessor from src.processors.test_output import TestOutputProcessor -from src.processors.cargo import CargoProcessor -from src.processors.go import GoProcessor -from src.processors.jq_yq import JqYqProcessor -from src.processors.ssh import SshProcessor class TestGitProcessor: @@ -3563,9 +3563,9 @@ def test_cargo_build_groups_warnings_by_type(self): warnings.extend([ f"warning: unused variable: `x{i}`", f" --> src/file{i}.rs:{i + 1}:5", - f" |", + " |", f"{i + 1} | let x{i} = 42;", - f" | ^^ help: if this is intentional, prefix it with an underscore", + " | ^^ help: if this is intentional, prefix it with an underscore", "", ]) warnings.append("warning: `myapp` (lib) generated 6 warnings") @@ -3852,7 +3852,10 @@ def test_jq_small_output_passthrough(self): def test_jq_large_json_compressed(self): import json - data = [{"id": i, "name": f"item-{i}", "data": {"nested": "value" * 10}} for i in range(100)] + data = [ + {"id": i, "name": f"item-{i}", "data": {"nested": "value" * 10}} + for i in range(100) + ] output = json.dumps(data, indent=2) assert len(output.splitlines()) > 50 result = self.p.process("jq . file.json", output) From 9234c80651fe23694b290ce9190ddb006bff9c78 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Fri, 27 Mar 2026 09:02:29 +0100 Subject: [PATCH 3/6] style: apply ruff format to new files --- src/processors/ssh.py | 4 +- tests/test_precision.py | 72 ++++++++------- tests/test_processors.py | 188 ++++++++++++++++++++++----------------- 3 files changed, 148 insertions(+), 116 deletions(-) diff --git a/src/processors/ssh.py b/src/processors/ssh.py index 121c972..65726a0 100644 --- a/src/processors/ssh.py +++ b/src/processors/ssh.py @@ -5,9 +5,7 @@ from .base import Processor from .utils import compress_log_lines -_SSH_NON_INTERACTIVE_RE = re.compile( - r"""\bssh\s+.+\s+['"]""" -) +_SSH_NON_INTERACTIVE_RE = re.compile(r"""\bssh\s+.+\s+['"]""") _SCP_RE = re.compile(r"\bscp\b") _SCP_PROGRESS_RE = re.compile(r"^\s*\S+\s+\d+%") diff --git a/tests/test_precision.py b/tests/test_precision.py index abf68d8..88b8362 100644 --- a/tests/test_precision.py +++ b/tests/test_precision.py @@ -1032,19 +1032,21 @@ def setup_method(self): def test_cargo_build_preserves_all_errors_with_spans(self): lines = [f" Compiling dep-{i} v1.0.{i}" for i in range(100)] - lines.extend([ - "error[E0308]: mismatched types", - " --> src/main.rs:10:5", - " |", - "10 | let x: i32 = \"hello\";", - " | ^^^^^^^ expected i32, found &str", - "", - "error[E0425]: cannot find value `y`", - " --> src/lib.rs:20:10", - " |", - "20 | y + 1", - " | ^ not found in this scope", - ]) + lines.extend( + [ + "error[E0308]: mismatched types", + " --> src/main.rs:10:5", + " |", + '10 | let x: i32 = "hello";', + " | ^^^^^^^ expected i32, found &str", + "", + "error[E0425]: cannot find value `y`", + " --> src/lib.rs:20:10", + " |", + "20 | y + 1", + " | ^ not found in this scope", + ] + ) output = "\n".join(lines) compressed, proc, was_compressed = self.engine.compress("cargo build", output) assert was_compressed @@ -1059,17 +1061,21 @@ def test_cargo_build_preserves_all_errors_with_spans(self): def test_cargo_build_preserves_warning_types(self): warnings = [] for i in range(10): - warnings.extend([ - f"warning: unused variable: `var{i}`", - f" --> src/file{i}.rs:{i + 1}:5", - "", - ]) + warnings.extend( + [ + f"warning: unused variable: `var{i}`", + f" --> src/file{i}.rs:{i + 1}:5", + "", + ] + ) for i in range(5): - warnings.extend([ - f"warning: unused import: `mod{i}`", - f" --> src/lib.rs:{i + 10}:5", - "", - ]) + warnings.extend( + [ + f"warning: unused import: `mod{i}`", + f" --> src/lib.rs:{i + 10}:5", + "", + ] + ) warnings.append("warning: `myapp` (lib) generated 15 warnings") warnings.append(" Finished dev [unoptimized + debuginfo] target(s)") output = "\n".join(warnings) @@ -1088,13 +1094,15 @@ def setup_method(self): def test_go_build_preserves_all_errors(self): # Need enough package headers to trigger compression (multi-package build) lines = [f"# myapp/pkg/module{i}" for i in range(10)] - lines.extend([ - "# myapp/pkg/handler", - "pkg/handler/main.go:15:2: undefined: DoSomething", - "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", - "# myapp/pkg/db", - "pkg/db/conn.go:5:3: imported and not used: \"fmt\"", - ]) + lines.extend( + [ + "# myapp/pkg/handler", + "pkg/handler/main.go:15:2: undefined: DoSomething", + "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", + "# myapp/pkg/db", + 'pkg/db/conn.go:5:3: imported and not used: "fmt"', + ] + ) output = "\n".join(lines) compressed, proc, was_compressed = self.engine.compress("go build ./...", output) assert was_compressed @@ -1123,10 +1131,10 @@ def setup_method(self): def test_jq_preserves_top_level_structure(self): import json + data = { "users": [ - {"id": i, "name": f"user-{i}", "email": f"user{i}@test.com"} - for i in range(50) + {"id": i, "name": f"user-{i}", "email": f"user{i}@test.com"} for i in range(50) ], "metadata": {"total": 50, "page": 1}, "status": "ok", diff --git a/tests/test_processors.py b/tests/test_processors.py index 30b4df9..90dfc9b 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -3541,16 +3541,18 @@ def test_cargo_build_collapses_compiling(self): assert "Compiling dep-" not in result def test_cargo_build_preserves_all_errors(self): - output = "\n".join([ - " Compiling myapp v0.1.0", - "error[E0308]: mismatched types", - " --> src/main.rs:10:5", - " |", - "10 | let x: i32 = \"hello\";", - " | ^^^^^^^ expected i32, found &str", - "", - "error: aborting due to previous error", - ]) + output = "\n".join( + [ + " Compiling myapp v0.1.0", + "error[E0308]: mismatched types", + " --> src/main.rs:10:5", + " |", + '10 | let x: i32 = "hello";', + " | ^^^^^^^ expected i32, found &str", + "", + "error: aborting due to previous error", + ] + ) result = self.p.process("cargo build", output) assert "mismatched types" in result assert "src/main.rs:10:5" in result @@ -3560,14 +3562,16 @@ def test_cargo_build_preserves_all_errors(self): def test_cargo_build_groups_warnings_by_type(self): warnings = [] for i in range(6): - warnings.extend([ - f"warning: unused variable: `x{i}`", - f" --> src/file{i}.rs:{i + 1}:5", - " |", - f"{i + 1} | let x{i} = 42;", - " | ^^ help: if this is intentional, prefix it with an underscore", - "", - ]) + warnings.extend( + [ + f"warning: unused variable: `x{i}`", + f" --> src/file{i}.rs:{i + 1}:5", + " |", + f"{i + 1} | let x{i} = 42;", + " | ^^ help: if this is intentional, prefix it with an underscore", + "", + ] + ) warnings.append("warning: `myapp` (lib) generated 6 warnings") warnings.append(" Finished dev [unoptimized + debuginfo] target(s)") output = "\n".join(warnings) @@ -3577,27 +3581,31 @@ def test_cargo_build_groups_warnings_by_type(self): assert "Finished" in result def test_cargo_build_keeps_finished_line(self): - output = "\n".join([ - " Compiling myapp v0.1.0", - " Finished dev [unoptimized + debuginfo] target(s) in 2.34s", - ]) + output = "\n".join( + [ + " Compiling myapp v0.1.0", + " Finished dev [unoptimized + debuginfo] target(s) in 2.34s", + ] + ) result = self.p.process("cargo build", output) assert "Finished" in result def test_cargo_build_mixed_errors_and_warnings(self): - output = "\n".join([ - " Compiling myapp v0.1.0", - "warning: unused import: `std::io`", - " --> src/main.rs:1:5", - "", - "error[E0425]: cannot find value `x`", - " --> src/main.rs:5:5", - " |", - "5 | println!(\"{}\", x);", - " | ^ not found", - "", - "error: aborting due to previous error", - ]) + output = "\n".join( + [ + " Compiling myapp v0.1.0", + "warning: unused import: `std::io`", + " --> src/main.rs:1:5", + "", + "error[E0425]: cannot find value `x`", + " --> src/main.rs:5:5", + " |", + '5 | println!("{}", x);', + " | ^ not found", + "", + "error: aborting due to previous error", + ] + ) result = self.p.process("cargo build", output) assert "cannot find value" in result assert "src/main.rs:5:5" in result @@ -3614,13 +3622,15 @@ def test_cargo_doc_collapses_documenting(self): assert "Documenting dep-" not in result def test_cargo_update_shows_major_bumps(self): - output = "\n".join([ - " Updating serde v1.0.0 -> v2.0.0", - " Updating tokio v1.28.0 -> v1.29.0", - " Updating rand v0.8.0 -> v0.8.1", - " Adding new-dep v0.1.0", - " Removing old-dep v0.5.0", - ]) + output = "\n".join( + [ + " Updating serde v1.0.0 -> v2.0.0", + " Updating tokio v1.28.0 -> v1.29.0", + " Updating rand v0.8.0 -> v0.8.1", + " Adding new-dep v0.1.0", + " Removing old-dep v0.5.0", + ] + ) result = self.p.process("cargo update", output) assert "MAJOR" in result assert "serde" in result @@ -3635,14 +3645,16 @@ def test_cargo_update_collapses_patch_bumps(self): assert "Minor/patch updates: 20" in result def test_cargo_bench_keeps_results(self): - output = "\n".join([ - " Compiling myapp v0.1.0", - " Compiling myapp-bench v0.1.0", - " Running benches/bench.rs", - "test bench_add ... bench: 10 ns/iter (+/- 2)", - "test bench_multiply ... bench: 25 ns/iter (+/- 5)", - "test result: ok. 2 passed; 0 failed; 0 ignored; 2 measured", - ]) + output = "\n".join( + [ + " Compiling myapp v0.1.0", + " Compiling myapp-bench v0.1.0", + " Running benches/bench.rs", + "test bench_add ... bench: 10 ns/iter (+/- 2)", + "test bench_multiply ... bench: 25 ns/iter (+/- 5)", + "test result: ok. 2 passed; 0 failed; 0 ignored; 2 measured", + ] + ) result = self.p.process("cargo bench", output) assert "bench_add" in result assert "bench_multiply" in result @@ -3691,11 +3703,13 @@ def test_empty_output(self): assert self.p.process("go build ./...", "") == "" def test_go_build_preserves_errors(self): - output = "\n".join([ - "# myapp/pkg/handler", - "pkg/handler/main.go:15:2: undefined: DoSomething", - "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", - ]) + output = "\n".join( + [ + "# myapp/pkg/handler", + "pkg/handler/main.go:15:2: undefined: DoSomething", + "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", + ] + ) result = self.p.process("go build ./...", output) assert "undefined: DoSomething" in result assert "main.go:15:2" in result @@ -3725,11 +3739,13 @@ def test_go_mod_tidy_collapses_downloads(self): assert "downloading" not in result def test_go_mod_keeps_added_removed(self): - output = "\n".join([ - "go: downloading github.com/pkg/a v1.0.0", - "go: added github.com/pkg/a v1.0.0", - "go: upgraded github.com/pkg/b v1.0.0 => v1.1.0", - ]) + output = "\n".join( + [ + "go: downloading github.com/pkg/a v1.0.0", + "go: added github.com/pkg/a v1.0.0", + "go: upgraded github.com/pkg/b v1.0.0 => v1.1.0", + ] + ) result = self.p.process("go mod tidy", output) assert "added" in result assert "upgraded" in result @@ -3743,19 +3759,23 @@ def test_go_generate_collapses_running(self): assert "Generated output.go" in result def test_go_generate_keeps_errors(self): - output = "\n".join([ - "main.go:5: running stringer", - "error: stringer: can't find type Foo", - ]) + output = "\n".join( + [ + "main.go:5: running stringer", + "error: stringer: can't find type Foo", + ] + ) result = self.p.process("go generate ./...", output) assert "error" in result assert "can't find type Foo" in result def test_go_install_delegates_to_build(self): - output = "\n".join([ - "# myapp/cmd/server", - "cmd/server/main.go:10:5: undefined: handler.New", - ]) + output = "\n".join( + [ + "# myapp/cmd/server", + "cmd/server/main.go:10:5: undefined: handler.New", + ] + ) result = self.p.process("go install ./cmd/...", output) assert "undefined: handler.New" in result @@ -3801,21 +3821,25 @@ def test_ssh_preserves_errors(self): assert "ERROR: connection failed" in result def test_scp_collapses_progress(self): - output = "\n".join([ - "file1.tar.gz 10% 24MB 12.3MB/s 00:02", - "file1.tar.gz 50% 120MB 12.3MB/s 00:10", - "file1.tar.gz 100% 245MB 12.3MB/s 00:20", - "file2.tar.gz 10% 10MB 5.0MB/s 00:01", - "file2.tar.gz 100% 100MB 5.0MB/s 00:20", - ]) + output = "\n".join( + [ + "file1.tar.gz 10% 24MB 12.3MB/s 00:02", + "file1.tar.gz 50% 120MB 12.3MB/s 00:10", + "file1.tar.gz 100% 245MB 12.3MB/s 00:20", + "file2.tar.gz 10% 10MB 5.0MB/s 00:01", + "file2.tar.gz 100% 100MB 5.0MB/s 00:20", + ] + ) result = self.p.process("scp file1.tar.gz file2.tar.gz host:/tmp/", output) assert len(result) < len(output) def test_scp_keeps_errors(self): - output = "\n".join([ - "file.txt 100% 1MB 1.0MB/s 00:01", - "scp: /remote/path: Permission denied", - ]) + output = "\n".join( + [ + "file.txt 100% 1MB 1.0MB/s 00:01", + "scp: /remote/path: Permission denied", + ] + ) result = self.p.process("scp file.txt host:/remote/path", output) assert "Permission denied" in result @@ -3846,15 +3870,16 @@ def test_empty_output(self): def test_jq_small_output_passthrough(self): data = {"key": "value", "count": 42} import json + output = json.dumps(data, indent=2) result = self.p.process("jq . file.json", output) assert result == output def test_jq_large_json_compressed(self): import json + data = [ - {"id": i, "name": f"item-{i}", "data": {"nested": "value" * 10}} - for i in range(100) + {"id": i, "name": f"item-{i}", "data": {"nested": "value" * 10}} for i in range(100) ] output = json.dumps(data, indent=2) assert len(output.splitlines()) > 50 @@ -3863,6 +3888,7 @@ def test_jq_large_json_compressed(self): def test_jq_streaming_output(self): import json + lines = [json.dumps({"id": i, "name": f"item-{i}"}) for i in range(100)] output = "\n".join(lines) result = self.p.process("jq -c '.[]' file.json", output) From 9bb49e2225e10c2f1585932a123921c4b93a965c Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Sat, 28 Mar 2026 16:00:32 +0100 Subject: [PATCH 4/6] feat: add per-processor disable, 4 new processors, and multi-chain support - Per-processor enable/disable via `disabled_processors` config/env var - New processors: python_install (pip/poetry/uv), cargo_clippy (Rust clippy), maven_gradle (mvn/gradle), structured_log (JSON lines/stern/kubetail) - Multi-processor chaining: chain_to supports lists with cycle detection and max_chain_depth config; cargo_clippy chains to lint processor - Fix dead patterns and edge cases in build_output and lint_output --- README.md | 44 ++-- docs/processors/cargo.md | 2 +- docs/processors/cargo_clippy.md | 39 +++ docs/processors/maven_gradle.md | 40 ++++ docs/processors/python_install.md | 29 +++ docs/processors/structured_log.md | 35 +++ src/config.py | 4 + src/engine.py | 36 ++- src/processors/__init__.py | 8 +- src/processors/base.py | 2 +- src/processors/build_output.py | 19 +- src/processors/cargo_clippy.py | 189 +++++++++++++++ src/processors/lint_output.py | 4 +- src/processors/maven_gradle.py | 224 +++++++++++++++++ src/processors/python_install.py | 216 +++++++++++++++++ src/processors/structured_log.py | 159 +++++++++++++ tests/test_config.py | 41 ++++ tests/test_engine.py | 327 ++++++++++++++++++++++++- tests/test_hooks.py | 19 ++ tests/test_processors.py | 383 ++++++++++++++++++++++++++++-- 20 files changed, 1758 insertions(+), 62 deletions(-) create mode 100644 docs/processors/cargo_clippy.md create mode 100644 docs/processors/maven_gradle.md create mode 100644 docs/processors/python_install.md create mode 100644 docs/processors/structured_log.md create mode 100644 src/processors/cargo_clippy.py create mode 100644 src/processors/maven_gradle.py create mode 100644 src/processors/python_install.py create mode 100644 src/processors/structured_log.py diff --git a/README.md b/README.md index 507e897..07525f8 100644 --- a/README.md +++ b/README.md @@ -237,24 +237,28 @@ processor is in [`docs/processors/`](docs/processors/). | 1 | **Package List** | 15 | pip list/freeze, npm ls, conda list, gem list, brew list | [package_list.md](docs/processors/package_list.md) | | 2 | **Git** | 20 | status, diff, log, show, push/pull/fetch, branch, stash, reflog, blame, cherry-pick, rebase, merge | [git.md](docs/processors/git.md) | | 3 | **Test** | 21 | pytest, jest, vitest, mocha, cargo test, go test, rspec, phpunit, bun test, npm/yarn/pnpm test, dotnet test, swift test, mix test | [test_output.md](docs/processors/test_output.md) | -| 4 | **Build** | 25 | npm/yarn/pnpm build/install, cargo build, make, cmake, gradle, mvn, pip install, tsc, webpack, vite, next build, turbo, nx, bazel, sbt, mix compile, docker build | [build_output.md](docs/processors/build_output.md) | -| 5 | **Lint** | 27 | eslint, ruff, flake8, pylint, clippy, mypy, prettier, biome, shellcheck, hadolint, rubocop, golangci-lint | [lint_output.md](docs/processors/lint_output.md) | -| 6 | **Network** | 30 | curl, wget, http/https (httpie) | [network.md](docs/processors/network.md) | -| 7 | **Docker** | 31 | ps, images, logs, pull/push, inspect, stats, compose up/down/build/ps/logs | [docker.md](docs/processors/docker.md) | -| 8 | **Kubernetes** | 32 | kubectl/oc get, describe, logs, top, apply, delete, create | [kubectl.md](docs/processors/kubectl.md) | -| 9 | **Terraform** | 33 | terraform/tofu plan, apply, destroy, init, output, state list/show | [terraform.md](docs/processors/terraform.md) | -| 10 | **Environment** | 34 | env, printenv (with secret redaction) | [env.md](docs/processors/env.md) | -| 11 | **Search** | 35 | grep -r, rg, ag, fd, fdfind | [search.md](docs/processors/search.md) | -| 12 | **System Info** | 36 | du, wc, df | [system_info.md](docs/processors/system_info.md) | -| 13 | **GitHub CLI** | 37 | gh pr/issue/run list/view/diff/checks/status | [gh.md](docs/processors/gh.md) | -| 14 | **Database Query** | 38 | psql, mysql, sqlite3, pgcli, mycli, litecli | [db_query.md](docs/processors/db_query.md) | -| 15 | **Cloud CLI** | 39 | aws, gcloud, az (JSON/table/text output compression) | [cloud_cli.md](docs/processors/cloud_cli.md) | -| 16 | **Ansible** | 40 | ansible-playbook, ansible (ok/skipped counting, error preservation) | [ansible.md](docs/processors/ansible.md) | -| 17 | **Helm** | 41 | helm install/upgrade/list/template/status/history | [helm.md](docs/processors/helm.md) | -| 18 | **Syslog** | 42 | journalctl, dmesg (head/tail with error extraction) | [syslog.md](docs/processors/syslog.md) | -| 19 | **File Listing** | 50 | ls, find, tree, exa, eza, rsync | [file_listing.md](docs/processors/file_listing.md) | -| 20 | **File Content** | 51 | cat, head, tail, bat, less, more (content-aware: code, config, log, CSV) | [file_content.md](docs/processors/file_content.md) | -| 21 | **Generic** | 999 | Any command (fallback: ANSI strip, dedup, truncation) | [generic.md](docs/processors/generic.md) | +| 4 | **Python Install** | 24 | pip install, poetry install/update/add, uv pip install, uv sync | [python_install.md](docs/processors/python_install.md) | +| 5 | **Build** | 25 | npm/yarn/pnpm build/install, cargo build, make, cmake, tsc, webpack, vite, next build, turbo, nx, bazel, sbt, mix compile, docker build | [build_output.md](docs/processors/build_output.md) | +| 6 | **Cargo Clippy** | 26 | cargo clippy (multi-line block grouping with span/help preservation) | [cargo_clippy.md](docs/processors/cargo_clippy.md) | +| 7 | **Lint** | 27 | eslint, ruff, flake8, pylint, clippy, mypy, prettier, biome, shellcheck, hadolint, rubocop, golangci-lint | [lint_output.md](docs/processors/lint_output.md) | +| 8 | **Maven/Gradle** | 28 | mvn, ./mvnw, gradle, ./gradlew (download stripping, task noise removal) | [maven_gradle.md](docs/processors/maven_gradle.md) | +| 9 | **Network** | 30 | curl, wget, http/https (httpie) | [network.md](docs/processors/network.md) | +| 10 | **Docker** | 31 | ps, images, logs, pull/push, inspect, stats, compose up/down/build/ps/logs | [docker.md](docs/processors/docker.md) | +| 11 | **Kubernetes** | 32 | kubectl/oc get, describe, logs, top, apply, delete, create | [kubectl.md](docs/processors/kubectl.md) | +| 12 | **Terraform** | 33 | terraform/tofu plan, apply, destroy, init, output, state list/show | [terraform.md](docs/processors/terraform.md) | +| 13 | **Environment** | 34 | env, printenv (with secret redaction) | [env.md](docs/processors/env.md) | +| 14 | **Search** | 35 | grep -r, rg, ag, fd, fdfind | [search.md](docs/processors/search.md) | +| 15 | **System Info** | 36 | du, wc, df | [system_info.md](docs/processors/system_info.md) | +| 16 | **GitHub CLI** | 37 | gh pr/issue/run list/view/diff/checks/status | [gh.md](docs/processors/gh.md) | +| 17 | **Database Query** | 38 | psql, mysql, sqlite3, pgcli, mycli, litecli | [db_query.md](docs/processors/db_query.md) | +| 18 | **Cloud CLI** | 39 | aws, gcloud, az (JSON/table/text output compression) | [cloud_cli.md](docs/processors/cloud_cli.md) | +| 19 | **Ansible** | 40 | ansible-playbook, ansible (ok/skipped counting, error preservation) | [ansible.md](docs/processors/ansible.md) | +| 20 | **Helm** | 41 | helm install/upgrade/list/template/status/history | [helm.md](docs/processors/helm.md) | +| 21 | **Syslog** | 42 | journalctl, dmesg (head/tail with error extraction) | [syslog.md](docs/processors/syslog.md) | +| 22 | **Structured Log** | 45 | stern, kubetail (JSON Lines grouping by level) | [structured_log.md](docs/processors/structured_log.md) | +| 23 | **File Listing** | 50 | ls, find, tree, exa, eza, rsync | [file_listing.md](docs/processors/file_listing.md) | +| 24 | **File Content** | 51 | cat, head, tail, bat, less, more (content-aware: code, config, log, CSV) | [file_content.md](docs/processors/file_content.md) | +| 25 | **Generic** | 999 | Any command (fallback: ANSI strip, dedup, truncation) | [generic.md](docs/processors/generic.md) | ## Configuration @@ -341,11 +345,13 @@ Project settings are merged with global settings. Token-Saver walks up parent di | `max_traceback_lines` | 30 | Max traceback lines before truncation | | `db_prune_days` | 90 | Stats retention in days | | `user_processors_dir` | `~/.token-saver/processors/` | Directory for custom processors | +| `disabled_processors` | `[]` | List of processor names to disable (env: comma-separated) | +| `max_chain_depth` | 3 | Maximum processor chain depth | | `debug` | false | Enable debug logging | ## Custom Processors -You can extend Token-Saver with your own processors for commands not covered by the built-in 21. +You can extend Token-Saver with your own processors for commands not covered by the built-in 25. 1. Create a Python file with a class inheriting from `src.processors.base.Processor` 2. Implement `can_handle()`, `process()`, `name`, and set `priority` diff --git a/docs/processors/cargo.md b/docs/processors/cargo.md index c9a16ba..2236cb5 100644 --- a/docs/processors/cargo.md +++ b/docs/processors/cargo.md @@ -20,7 +20,7 @@ cargo build, cargo check, cargo doc, cargo update, cargo bench. ## Exclusions - `cargo test` is routed to `TestOutputProcessor` -- `cargo clippy` is routed to `LintOutputProcessor` +- `cargo clippy` is routed to `CargoClippyProcessor` ## Configuration diff --git a/docs/processors/cargo_clippy.md b/docs/processors/cargo_clippy.md new file mode 100644 index 0000000..d8483c4 --- /dev/null +++ b/docs/processors/cargo_clippy.md @@ -0,0 +1,39 @@ +# Cargo Clippy Processor + +**File:** `src/processors/cargo_clippy.py` | **Priority:** 26 | **Name:** `cargo_clippy` + +Dedicated processor for Rust clippy lint output with multi-line block awareness. + +## Supported Commands + +cargo clippy (with any flags like `--all-targets`, `-- -W clippy::all`). + +## Strategy + +Parses clippy's multi-line warning blocks (header + `-->` span + code + `= help:` annotations) as coherent units. Groups warnings by clippy lint rule. Shows N example blocks per rule with full context. Preserves all errors in full. + +| Output Type | Strategy | +|---|---| +| **Warnings** | Group by lint rule (e.g., `clippy::needless_return`). Show count + N example blocks per rule. Categorize as style/correctness/complexity/perf | +| **Errors** | Keep all error blocks in full with spans and context | +| **Checking/Compiling** | Collapse into count (e.g., `[12 checked, 3 compiled]`) | +| **Summary** | Keep `warning: X generated N warnings` summary line | + +## Key Difference from Lint Processor + +The generic `LintOutputProcessor` groups violations as single lines. Clippy output has multi-line blocks with `-->` spans, code snippets, and `= help:` annotations that need to be preserved as coherent units. This processor keeps the block structure intact. + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| cargo_warning_example_count | 2 | Number of example warning blocks to show per rule | +| cargo_warning_group_threshold | 3 | Minimum occurrences before warnings are grouped | + +## Chaining + +After clippy-specific processing, output is chained to the `lint` processor (`chain_to = ["lint"]`). This allows any non-clippy-specific warnings in the output to be grouped by the generic lint rule parser. + +## Fallback + +If this processor is disabled, `cargo clippy` falls back to the `LintOutputProcessor` which handles it at a line-by-line level. diff --git a/docs/processors/maven_gradle.md b/docs/processors/maven_gradle.md new file mode 100644 index 0000000..24bf25e --- /dev/null +++ b/docs/processors/maven_gradle.md @@ -0,0 +1,40 @@ +# Maven/Gradle Processor + +**File:** `src/processors/maven_gradle.py` | **Priority:** 28 | **Name:** `maven_gradle` + +Dedicated processor for Maven and Gradle build output. + +## Supported Commands + +mvn, ./mvnw, gradle, ./gradlew (all subcommands). + +## Strategy + +### Maven + +| Output Type | Strategy | +|---|---| +| **Download lines** | Strip `[INFO] Downloading from` and `[INFO] Downloaded from` lines. Show count | +| **Module lines** | Count `[INFO] Building module-name` lines | +| **Errors** | Keep all `[ERROR]` and `[FATAL]` lines | +| **Warnings** | Keep first 5 `[WARNING]` lines, summarize rest | +| **Test results** | Keep `Tests run: N, Failures: N` lines | +| **Reactor summary** | Keep reactor summary block | +| **Build result** | Keep `BUILD SUCCESS`/`BUILD FAILURE` and timing | + +### Gradle + +| Output Type | Strategy | +|---|---| +| **Task lines** | Strip `UP-TO-DATE`, `NO-SOURCE`, `SKIPPED`, `FROM-CACHE` tasks. Keep executed tasks. Show counts | +| **Errors** | Keep `FAILURE:` blocks, error details, `What went wrong` sections | +| **Test results** | Keep test result summary lines | +| **Build result** | Keep `BUILD SUCCESSFUL`/`BUILD FAILED` and actionable task summary | + +## Configuration + +No dedicated configuration keys. Uses default compression thresholds. + +## Removed Noise + +Maven: `[INFO] Downloading/Downloaded` lines, separator lines (`-----`), empty `[INFO]` lines. Gradle: `UP-TO-DATE`/`NO-SOURCE` task lines, progress indicators. diff --git a/docs/processors/python_install.md b/docs/processors/python_install.md new file mode 100644 index 0000000..d028b1c --- /dev/null +++ b/docs/processors/python_install.md @@ -0,0 +1,29 @@ +# Python Install Processor + +**File:** `src/processors/python_install.py` | **Priority:** 24 | **Name:** `python_install` + +Dedicated processor for Python package installation output. + +## Supported Commands + +pip install, pip3 install, poetry install/update/add, uv pip install, uv sync. + +## Strategy + +| Tool | Strategy | +|---|---| +| **pip install** | Strip `Collecting` and `Downloading` lines. Remove progress bars. Count packages installed. Show `already satisfied` count. Preserve all errors and warnings. Show installed package summary (first 10 + count) | +| **poetry install/update/add** | Strip `Resolving dependencies` progress. Count installed/updated/removed packages. Show package names with versions. Preserve errors | +| **uv pip install/sync** | Strip download progress. Keep `Resolved N packages` and `Installed N packages` summaries. Preserve errors | + +## Exclusions + +- `pip list` and `pip freeze` are routed to `PackageListProcessor` + +## Configuration + +No dedicated configuration keys. Uses default compression thresholds. + +## Removed Noise + +`Collecting X>=1.0` lines, `Downloading X-1.0.whl` lines, pip progress bars, `Installing collected packages:` line, `Using cached` lines, `Resolving dependencies...` output from poetry. diff --git a/docs/processors/structured_log.md b/docs/processors/structured_log.md new file mode 100644 index 0000000..34ff6fd --- /dev/null +++ b/docs/processors/structured_log.md @@ -0,0 +1,35 @@ +# Structured Log Processor + +**File:** `src/processors/structured_log.py` | **Priority:** 45 | **Name:** `structured_log` + +Processor for JSON Lines log output from log tailing tools. + +## Supported Commands + +stern, kubetail. + +## Strategy + +| Content Type | Strategy | +|---|---| +| **JSON Lines (>50% valid JSON)** | Parse each JSON object. Group entries by log level (error/warn/info/debug/trace). Show count per level. Extract and display error messages (up to 10). Detect level from common keys: `level`, `severity`, `log_level`, `lvl` | +| **Non-JSON output** | Fall back to log compression (head/tail with error preservation) | + +## Level Detection + +Checks these JSON keys in order: `level`, `severity`, `log_level`, `loglevel`, `lvl`, `log.level`. Falls back to regex matching on message content for `ERROR`/`WARN` patterns. + +## Message Extraction + +Checks these JSON keys in order: `msg`, `message`, `text`, `log`, `body`. Truncates messages longer than 200 characters. + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| kubectl_keep_head | 5 | Lines to keep from start (non-JSON fallback) | +| kubectl_keep_tail | 10 | Lines to keep from end (non-JSON fallback) | + +## Future Use + +This processor can be activated via `chain_to` from other processors for outputs that contain embedded JSON Lines. diff --git a/src/config.py b/src/config.py index 633adb9..5eb3fda 100644 --- a/src/config.py +++ b/src/config.py @@ -51,6 +51,8 @@ "cargo_warning_example_count": 2, "cargo_warning_group_threshold": 3, "jq_passthrough_threshold": 50, + "disabled_processors": [], + "max_chain_depth": 3, "debug": False, } @@ -129,6 +131,8 @@ def _load_config() -> dict[str, Any]: elif isinstance(default_val, float): with contextlib.suppress(ValueError): config[key] = float(env_val) + elif isinstance(default_val, list): + config[key] = [s.strip() for s in env_val.split(",") if s.strip()] else: config[key] = env_val config.setdefault("_config_source", {})[key] = f"env:{env_key}" diff --git a/src/engine.py b/src/engine.py index abd8a79..eed63d5 100644 --- a/src/engine.py +++ b/src/engine.py @@ -23,7 +23,12 @@ class CompressionEngine: _by_name: dict[str, Processor] def __init__(self) -> None: - self.processors = discover_processors() + all_processors = discover_processors() + raw_disabled = config.get("disabled_processors") or [] + disabled = set(raw_disabled if isinstance(raw_disabled, list) else []) + # Never disable generic — it's the fallback and provides clean() + disabled.discard("generic") + self.processors = [p for p in all_processors if p.name not in disabled] self._generic = self.processors[-1] # Last = GenericProcessor (priority 999) self._by_name = {p.name: p for p in self.processors} @@ -51,16 +56,25 @@ def compress(self, command: str, output: str) -> tuple[str, str, bool]: if compressed is output or compressed == output: return output, processor.name, False - # Chain to secondary processor if declared (max depth = 1) - if ( - processor.chain_to - and processor.chain_to != processor.name - and processor.chain_to in self._by_name - ): - secondary = self._by_name[processor.chain_to] - chained = secondary.process(command, compressed) - if chained is not compressed and chained != compressed: - compressed = chained + # Chain to secondary processors if declared + chain_list = processor.chain_to + if chain_list: + if isinstance(chain_list, str): + chain_list = [chain_list] + max_depth = config.get("max_chain_depth") + visited = {processor.name} + depth = 0 + for chain_name in chain_list: + if depth >= max_depth: + break + if chain_name in visited or chain_name not in self._by_name: + continue + secondary = self._by_name[chain_name] + visited.add(chain_name) + chained = secondary.process(command, compressed) + if chained is not compressed and chained != compressed: + compressed = chained + depth += 1 # If a specialized processor handled it, also run generic # cleanup (ANSI strip, blank line collapse) but not truncation diff --git a/src/processors/__init__.py b/src/processors/__init__.py index b1e7eec..0a72645 100644 --- a/src/processors/__init__.py +++ b/src/processors/__init__.py @@ -108,8 +108,14 @@ def collect_hook_patterns() -> list[str]: """Collect all hook_patterns from discovered processors. Returns a flat list of regex pattern strings, used by hook_pretool.py. + Disabled processors are excluded so their commands are not intercepted. """ + from .. import config # noqa: PLC0415 + + raw_disabled = config.get("disabled_processors") or [] + disabled = set(raw_disabled if isinstance(raw_disabled, list) else []) patterns: list[str] = [] for processor in discover_processors(): - patterns.extend(processor.hook_patterns) + if processor.name not in disabled: + patterns.extend(processor.hook_patterns) return patterns diff --git a/src/processors/base.py b/src/processors/base.py index 3d84861..67c8f59 100644 --- a/src/processors/base.py +++ b/src/processors/base.py @@ -19,7 +19,7 @@ class Processor(ABC): priority: int = 50 hook_patterns: list[str] = [] - chain_to: str | None = None + chain_to: str | list[str] | None = None @abstractmethod def can_handle(self, command: str) -> bool: diff --git a/src/processors/build_output.py b/src/processors/build_output.py index be0fbb9..55ca72c 100644 --- a/src/processors/build_output.py +++ b/src/processors/build_output.py @@ -9,10 +9,9 @@ class BuildOutputProcessor(Processor): priority = 25 hook_patterns = [ r"^(npm\s+(run|install|build|ci|audit)|yarn\s+(run|install|build|add|audit)|pnpm\s+(run|install|build|add|audit))\b", - r"^(cargo\s+(build|check)|make\b|cmake\b|gradle\b|mvn\b|ant\b)", - r"^(pip3?\s+install|poetry\s+(install|update)|uv\s+(pip|sync))\b", + r"^(make|cmake|ant)\b", r"^(tsc|webpack|vite(\s+build)?|esbuild|rollup|next\s+build|nuxt\s+build)\b", - r"^(turbo\s+run|turbo\s+build|nx\s+(run|build)|bazel\s+build|sbt\s|mix\s+compile)\b", + r"^(turbo\s+run|turbo\s+build|nx\s+(run|build)|bazel\s+build|sbt\b|mix\s+compile)\b", r"^docker\s+(build|compose\s+build)\b", r"^bun\s+(install|build|run)\b", ] @@ -25,17 +24,19 @@ def can_handle(self, command: str) -> bool: # Exclude package listing commands (handled by PackageListProcessor) if re.search(r"\b(pip3?\s+(list|freeze)|npm\s+(ls|list)|conda\s+list)\b", command): return False - # Exclude cargo clippy (handled by LintOutputProcessor) - if re.search(r"\bcargo\s+clippy\b", command): + # Exclude Python install (handled by PythonInstallProcessor) + if re.search( + r"\b(pip3?\s+install|poetry\s+(install|update|add)|uv\s+(pip\s+install|sync))\b", + command, + ): return False - # Exclude cargo build/check (handled by CargoProcessor) - if re.search(r"\bcargo\s+(build|check)\b", command): + # Exclude Maven/Gradle (handled by MavenGradleProcessor) + if re.search(r"\b(mvn|mvnw|gradle|gradlew)\b", command): return False return bool( re.search( r"\b(npm\s+(run|install|ci|build|audit)|yarn\s+(run|install|build|add|audit)|pnpm\s+(run|install|build|add|audit)|" - r"cargo\s+(build|check)|make\b|cmake\b|gradle\b|mvn\b|ant\b|" - r"pip3?\s+install|poetry\s+(install|update)|uv\s+(pip|sync)|" + r"make\b|cmake\b|ant\b|" r"tsc\b|webpack\b|vite(\s+build)?|esbuild\b|rollup\b|next\s+build|nuxt\s+build|" r"docker\s+(build|compose\s+build)|" r"turbo\s+(run|build)|nx\s+(run|build)|bazel\s+build|sbt\b|mix\s+compile|" diff --git a/src/processors/cargo_clippy.py b/src/processors/cargo_clippy.py new file mode 100644 index 0000000..cfe83fd --- /dev/null +++ b/src/processors/cargo_clippy.py @@ -0,0 +1,189 @@ +"""Cargo clippy processor: dedicated Rust clippy lint handling.""" + +import re +from collections import defaultdict + +from .. import config +from .base import Processor + +_CLIPPY_CMD_RE = re.compile(r"\bcargo\s+clippy\b") +_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") +_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") +_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") +_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") +_FINISHED_RE = re.compile(r"^\s*Finished\s+") +_CHECKING_RE = re.compile(r"^\s*Checking\s+\S+\s+v") +_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") + +# Clippy lint categories +_CLIPPY_CATEGORIES = { + "needless_return": "style", + "redundant_closure": "style", + "len_zero": "style", + "manual_map": "style", + "single_match": "style", + "match_bool": "style", + "collapsible_if": "style", + "unused_imports": "correctness", + "unused_variables": "correctness", + "dead_code": "correctness", + "unreachable_code": "correctness", + "needless_borrow": "complexity", + "unnecessary_unwrap": "complexity", + "map_unwrap_or": "complexity", + "clone_on_copy": "perf", + "large_enum_variant": "perf", + "box_collection": "perf", +} + + +def _categorize_lint(rule: str) -> str: + """Categorize a clippy lint by its rule name.""" + # Strip clippy:: prefix if present + short = rule.replace("clippy::", "") + return _CLIPPY_CATEGORIES.get(short, "other") + + +class CargoClippyProcessor(Processor): + priority = 26 + chain_to = ["lint"] + hook_patterns = [ + r"^cargo\s+clippy\b", + ] + + @property + def name(self) -> str: + return "cargo_clippy" + + def can_handle(self, command: str) -> bool: + return bool(_CLIPPY_CMD_RE.search(command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + result: list[str] = [] + checking_count = 0 + compiling_count = 0 + + # Parse warnings as multi-line blocks + warnings_by_rule: dict[str, list[list[str]]] = defaultdict(list) + error_blocks: list[list[str]] = [] + current_block: list[str] = [] + current_rule: str | None = None + in_error = False + current_error: list[str] = [] + finished_lines: list[str] = [] + summary_lines: list[str] = [] + + for line in lines: + stripped = line.strip() + + if _CHECKING_RE.match(stripped): + checking_count += 1 + continue + if _COMPILING_RE.match(stripped): + compiling_count += 1 + continue + + # Error start + if _ERROR_START_RE.match(stripped): + # Flush current warning block + if current_rule and current_block: + warnings_by_rule[current_rule].append(current_block) + current_block = [] + current_rule = None + # Start error block + if in_error and current_error: + error_blocks.append(current_error) + in_error = True + current_error = [line] + continue + + # Warning start + wm = _WARNING_START_RE.match(stripped) + if wm and not _WARNING_SUMMARY_RE.match(stripped): + # Flush previous + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + if current_rule and current_block: + warnings_by_rule[current_rule].append(current_block) + + rule = wm.group(1) or "other" + current_rule = rule + current_block = [line] + continue + + if _WARNING_SUMMARY_RE.match(stripped): + if current_rule and current_block: + warnings_by_rule[current_rule].append(current_block) + current_block = [] + current_rule = None + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + summary_lines.append(line) + continue + + if _FINISHED_RE.match(stripped): + if current_rule and current_block: + warnings_by_rule[current_rule].append(current_block) + current_block = [] + current_rule = None + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + finished_lines.append(line) + continue + + # Context lines (spans, code, help annotations) + if in_error: + current_error.append(line) + elif current_rule: + current_block.append(line) + + # Flush remaining + if in_error and current_error: + error_blocks.append(current_error) + if current_rule and current_block: + warnings_by_rule[current_rule].append(current_block) + + # Build compressed output + prep = [] + if checking_count: + prep.append(f"{checking_count} checked") + if compiling_count: + prep.append(f"{compiling_count} compiled") + if prep: + result.append(f"[{', '.join(prep)}]") + + # All errors (kept in full) + for block in error_blocks: + result.extend(block) + + # Grouped warnings by rule + example_count = config.get("cargo_warning_example_count") + group_threshold = config.get("cargo_warning_group_threshold") + + for rule, blocks in sorted(warnings_by_rule.items(), key=lambda x: -len(x[1])): + count = len(blocks) + category = _categorize_lint(rule) + if count >= group_threshold: + result.append(f"warning[{rule}] ({category}): {count} occurrences") + for block in blocks[:example_count]: + result.extend(f" {bline}" for bline in block) + if count > example_count: + result.append(f" ... ({count - example_count} more)") + else: + for block in blocks: + result.extend(block) + + result.extend(summary_lines) + result.extend(finished_lines) + + return "\n".join(result) if result else output diff --git a/src/processors/lint_output.py b/src/processors/lint_output.py index 96ed45a..8577ffb 100644 --- a/src/processors/lint_output.py +++ b/src/processors/lint_output.py @@ -10,9 +10,9 @@ class LintOutputProcessor(Processor): priority = 27 hook_patterns = [ - r"^(eslint|ruff(\s+check)?|flake8|pylint|clippy|rubocop|golangci-lint|stylelint|biome\s+(check|lint))\b", + r"^(eslint|ruff(\s+check)?|flake8|pylint|rubocop|golangci-lint|stylelint|biome\s+(check|lint))\b", r"^python3?\s+-m\s+(flake8|pylint|ruff|mypy)\b", - r"^(mypy|prettier\s+--check|shellcheck|hadolint|tflint|ktlint|swiftlint|cargo\s+clippy)\b", + r"^(mypy|prettier\s+--check|shellcheck|hadolint|tflint|ktlint|swiftlint)\b", r"^(oxlint|deno\s+lint)\b", ] diff --git a/src/processors/maven_gradle.py b/src/processors/maven_gradle.py new file mode 100644 index 0000000..22992fb --- /dev/null +++ b/src/processors/maven_gradle.py @@ -0,0 +1,224 @@ +"""Maven/Gradle processor: mvn, gradle, gradlew, mvnw builds.""" + +import re + +from .base import Processor + +_MVN_RE = re.compile(r"\b(mvn|\.?/?mvnw)\b") +_GRADLE_RE = re.compile(r"\b(gradle|\.?/?gradlew)\b") + +# Maven patterns +_MVN_DOWNLOAD_RE = re.compile(r"^\[INFO\]\s+(Downloading|Downloaded)\s+from\s+") +_MVN_MODULE_RE = re.compile(r"^\[INFO\]\s+Building\s+(.+?)\s+\[") +_MVN_SEPARATOR_RE = re.compile(r"^\[INFO\]\s+-{10,}") +_MVN_ERROR_RE = re.compile(r"^\[(ERROR|FATAL)\]") +_MVN_WARNING_RE = re.compile(r"^\[WARNING\]") +_MVN_BUILD_RESULT_RE = re.compile(r"^\[INFO\]\s+(BUILD\s+(SUCCESS|FAILURE))") +_MVN_TEST_RESULT_RE = re.compile(r"^\[INFO\]\s+Tests run:\s+(\d+)") +_MVN_REACTOR_RE = re.compile(r"^\[INFO\]\s+Reactor Summary") +_MVN_TOTAL_TIME_RE = re.compile(r"^\[INFO\]\s+Total time:") +_MVN_EMPTY_INFO_RE = re.compile(r"^\[INFO\]\s*$") + +# Gradle patterns +_GRADLE_TASK_RE = re.compile(r"^>\s+Task\s+:(\S+)") +_GRADLE_UPTODATE_RE = re.compile(r"\b(UP-TO-DATE|NO-SOURCE|SKIPPED|FROM-CACHE)\s*$") +_GRADLE_BUILD_RESULT_RE = re.compile(r"^(BUILD\s+(SUCCESSFUL|FAILED))") +_GRADLE_ACTIONABLE_RE = re.compile(r"^\d+\s+actionable\s+task") +_GRADLE_ERROR_RE = re.compile( + r"^(FAILURE:|>\s+.*[Ee]rror|e:\s+|" + r"\s+What went wrong|\s+Execution failed)" +) +_GRADLE_TEST_RESULT_RE = re.compile(r"^\d+\s+tests?\s+(completed|passed|failed)") + + +class MavenGradleProcessor(Processor): + priority = 28 + hook_patterns = [ + r"^(\.?/?mvnw?|\.?/?gradlew?)\b", + ] + + @property + def name(self) -> str: + return "maven_gradle" + + def can_handle(self, command: str) -> bool: + return bool(_MVN_RE.search(command) or _GRADLE_RE.search(command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + if _GRADLE_RE.search(command): + return self._process_gradle(output) + return self._process_maven(output) + + def _process_maven(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + download_count = 0 + module_count = 0 + errors: list[str] = [] + warnings: list[str] = [] + test_results: list[str] = [] + in_reactor = False + reactor_lines: list[str] = [] + build_result = "" + timing_line = "" + separator_count = 0 + + for line in lines: + stripped = line.strip() + + if _MVN_DOWNLOAD_RE.match(stripped): + download_count += 1 + continue + + if _MVN_MODULE_RE.match(stripped): + module_count += 1 + continue + + if _MVN_SEPARATOR_RE.match(stripped): + separator_count += 1 + continue + + if _MVN_EMPTY_INFO_RE.match(stripped): + continue + + if _MVN_REACTOR_RE.match(stripped): + in_reactor = True + reactor_lines.append(line) + continue + + if in_reactor: + if _MVN_BUILD_RESULT_RE.match(stripped) or _MVN_TOTAL_TIME_RE.match(stripped): + in_reactor = False + else: + reactor_lines.append(line) + continue + + if _MVN_BUILD_RESULT_RE.match(stripped): + build_result = line + continue + + if _MVN_TOTAL_TIME_RE.match(stripped): + timing_line = line + continue + + if _MVN_TEST_RESULT_RE.match(stripped): + test_results.append(line) + continue + + if _MVN_ERROR_RE.match(stripped): + errors.append(line) + continue + + if _MVN_WARNING_RE.match(stripped): + warnings.append(line) + continue + + # Build compressed output + summary_parts = [] + if module_count: + summary_parts.append(f"{module_count} modules") + if download_count: + summary_parts.append(f"{download_count} downloads") + if summary_parts: + result.append(f"[{', '.join(summary_parts)}]") + + if errors: + result.extend(errors) + + if warnings: + if len(warnings) > 5: + result.extend(warnings[:5]) + result.append(f"... ({len(warnings) - 5} more warnings)") + else: + result.extend(warnings) + + if test_results: + result.extend(test_results) + + if reactor_lines: + result.extend(reactor_lines) + + if build_result: + result.append(build_result) + if timing_line: + result.append(timing_line) + + return "\n".join(result) if result else output + + def _process_gradle(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + skipped_tasks = 0 + executed_tasks: list[str] = [] + errors: list[str] = [] + test_results: list[str] = [] + build_result = "" + actionable_line = "" + in_error_block = False + + for line in lines: + stripped = line.strip() + + m = _GRADLE_TASK_RE.match(stripped) + if m: + if _GRADLE_UPTODATE_RE.search(stripped): + skipped_tasks += 1 + else: + executed_tasks.append(m.group(1)) + in_error_block = False + continue + + if _GRADLE_BUILD_RESULT_RE.match(stripped): + build_result = line + in_error_block = False + continue + + if _GRADLE_ACTIONABLE_RE.match(stripped): + actionable_line = line + continue + + if _GRADLE_TEST_RESULT_RE.match(stripped): + test_results.append(line) + continue + + if _GRADLE_ERROR_RE.match(stripped): + in_error_block = True + errors.append(line) + continue + + if in_error_block and stripped: + errors.append(line) + continue + + # Build compressed output + summary_parts = [] + if executed_tasks: + summary_parts.append(f"{len(executed_tasks)} executed") + if skipped_tasks: + summary_parts.append(f"{skipped_tasks} up-to-date") + if summary_parts: + result.append(f"Tasks: {', '.join(summary_parts)}") + + if executed_tasks and len(executed_tasks) <= 10: + for task in executed_tasks: + result.append(f" :{task}") + elif executed_tasks: + for task in executed_tasks[:5]: + result.append(f" :{task}") + result.append(f" ... ({len(executed_tasks) - 5} more)") + + if errors: + result.extend(errors) + + if test_results: + result.extend(test_results) + + if build_result: + result.append(build_result) + if actionable_line: + result.append(actionable_line) + + return "\n".join(result) if result else output diff --git a/src/processors/python_install.py b/src/processors/python_install.py new file mode 100644 index 0000000..739b19c --- /dev/null +++ b/src/processors/python_install.py @@ -0,0 +1,216 @@ +"""Python install processor: pip install, poetry install/update/add, uv pip install/sync.""" + +import re + +from .base import Processor + +_PIP_INSTALL_RE = re.compile(r"\bpip3?\s+install\b") +_POETRY_RE = re.compile(r"\bpoetry\s+(install|update|add)\b") +_UV_RE = re.compile(r"\buv\s+(pip\s+install|sync)\b") + +_COLLECTING_RE = re.compile(r"^\s*Collecting\s+") +_DOWNLOADING_RE = re.compile(r"^\s*(Downloading|Using cached)\s+") +_PROGRESS_RE = re.compile(r"^\s*━|^\s*\[.*\]\s+\d+%|^\s*\d+\.\d+\s*(kB|MB|GB)") +_ALREADY_RE = re.compile(r"^\s*Requirement already satisfied") +_INSTALLING_RE = re.compile(r"^\s*Installing collected packages:") +_SUCCESS_RE = re.compile(r"^\s*Successfully installed\s+(.+)") +_RESOLVING_RE = re.compile(r"^\s*(Resolving dependencies|Updating dependencies)") +_POETRY_INSTALL_RE = re.compile(r"^\s*(Installing|Updating|Removing)\s+(\S+)\s+\((.+?)\)") +_UV_RESOLVED_RE = re.compile(r"^\s*Resolved\s+(\d+)\s+packages?") +_UV_INSTALLED_RE = re.compile(r"^\s*(Installed|Uninstalled)\s+(\d+)\s+packages?") +_ERROR_RE = re.compile( + r"\b(error|Error|ERROR|exception|Exception|" + r"Could not|cannot|Cannot|FAILED|failed|" + r"conflict|Conflict|incompatible)\b" +) +_WARNING_RE = re.compile(r"\b(warning|Warning|WARNING|DEPRECATION)\b") + + +class PythonInstallProcessor(Processor): + priority = 24 + hook_patterns = [ + r"^(pip3?\s+install|poetry\s+(install|update|add)|uv\s+(pip\s+install|sync))\b", + ] + + @property + def name(self) -> str: + return "python_install" + + def can_handle(self, command: str) -> bool: + if re.search(r"\bpip3?\s+(list|freeze)\b", command): + return False + return bool( + _PIP_INSTALL_RE.search(command) or _POETRY_RE.search(command) or _UV_RE.search(command) + ) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + if _POETRY_RE.search(command): + return self._process_poetry(output) + if _UV_RE.search(command): + return self._process_uv(output) + return self._process_pip(output) + + def _process_pip(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + collecting_count = 0 + downloading_count = 0 + already_count = 0 + installed_packages: list[str] = [] + errors: list[str] = [] + warnings: list[str] = [] + + for line in lines: + stripped = line.strip() + if not stripped: + continue + + if _COLLECTING_RE.match(stripped): + collecting_count += 1 + elif _DOWNLOADING_RE.match(stripped) or _PROGRESS_RE.match(stripped): + downloading_count += 1 + elif _ALREADY_RE.match(stripped): + already_count += 1 + elif _INSTALLING_RE.match(stripped): + continue + elif m := _SUCCESS_RE.match(stripped): + pkgs = m.group(1).split() + installed_packages.extend(pkgs) + elif _ERROR_RE.search(stripped): + errors.append(line) + elif _WARNING_RE.search(stripped): + warnings.append(line) + + if collecting_count: + result.append(f"[{collecting_count} packages collected]") + if downloading_count: + result.append(f"[{downloading_count} downloads]") + if already_count: + result.append(f"[{already_count} already satisfied]") + + if errors: + result.extend(errors) + + if warnings: + result.extend(warnings[:5]) + if len(warnings) > 5: + result.append(f"... ({len(warnings) - 5} more warnings)") + + if installed_packages: + result.append(f"Successfully installed {len(installed_packages)} packages:") + # Show first 10 packages, summarize rest + for pkg in installed_packages[:10]: + result.append(f" {pkg}") + if len(installed_packages) > 10: + result.append(f" ... ({len(installed_packages) - 10} more)") + + return "\n".join(result) if result else output + + def _process_poetry(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + installed: list[str] = [] + updated: list[str] = [] + removed: list[str] = [] + errors: list[str] = [] + resolving_skipped = 0 + + for line in lines: + stripped = line.strip() + if not stripped: + continue + + if _RESOLVING_RE.match(stripped): + resolving_skipped += 1 + continue + + m = _POETRY_INSTALL_RE.match(stripped) + if m: + action, pkg, version = m.groups() + if action == "Installing": + installed.append(f"{pkg} ({version})") + elif action == "Updating": + updated.append(f"{pkg} ({version})") + elif action == "Removing": + removed.append(pkg) + continue + + if _ERROR_RE.search(stripped): + errors.append(line) + + if resolving_skipped: + result.append(f"[dependency resolution: {resolving_skipped} steps]") + + if errors: + result.extend(errors) + + if installed: + result.append(f"Installed {len(installed)} packages:") + for pkg in installed[:10]: + result.append(f" {pkg}") + if len(installed) > 10: + result.append(f" ... ({len(installed) - 10} more)") + + if updated: + result.append(f"Updated {len(updated)} packages:") + for pkg in updated[:5]: + result.append(f" {pkg}") + if len(updated) > 5: + result.append(f" ... ({len(updated) - 5} more)") + + if removed: + result.append(f"Removed {len(removed)} packages") + + return "\n".join(result) if result else output + + def _process_uv(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + errors: list[str] = [] + resolved = 0 + installed = 0 + uninstalled = 0 + downloading_count = 0 + + for line in lines: + stripped = line.strip() + if not stripped: + continue + + m = _UV_RESOLVED_RE.match(stripped) + if m: + resolved = int(m.group(1)) + continue + + m = _UV_INSTALLED_RE.match(stripped) + if m: + action = m.group(1) + count = int(m.group(2)) + if action == "Installed": + installed = count + else: + uninstalled = count + continue + + if _DOWNLOADING_RE.match(stripped) or _PROGRESS_RE.match(stripped): + downloading_count += 1 + continue + + if _ERROR_RE.search(stripped): + errors.append(line) + + if resolved: + result.append(f"Resolved {resolved} packages") + if downloading_count: + result.append(f"[{downloading_count} downloads]") + if errors: + result.extend(errors) + if installed: + result.append(f"Installed {installed} packages") + if uninstalled: + result.append(f"Uninstalled {uninstalled} packages") + + return "\n".join(result) if result else output diff --git a/src/processors/structured_log.py b/src/processors/structured_log.py new file mode 100644 index 0000000..a7f5012 --- /dev/null +++ b/src/processors/structured_log.py @@ -0,0 +1,159 @@ +"""Structured log processor: JSON Lines output from stern, kubetail, and similar tools.""" + +import json +import re +from collections import defaultdict + +from .. import config +from .base import Processor +from .utils import compress_log_lines + +_STERN_RE = re.compile(r"\b(stern|kubetail)\b") + +# Common JSON log level keys +_LEVEL_KEYS = ("level", "severity", "log_level", "loglevel", "lvl", "log.level") +_MESSAGE_KEYS = ("msg", "message", "text", "log", "body") +_TIMESTAMP_KEYS = ("timestamp", "time", "ts", "@timestamp", "datetime", "date") + +_ERROR_LEVELS = {"error", "fatal", "critical", "panic", "err", "crit", "emerg", "alert"} +_WARN_LEVELS = {"warn", "warning"} + + +class StructuredLogProcessor(Processor): + priority = 45 + hook_patterns = [ + r"^(stern|kubetail)\b", + ] + + @property + def name(self) -> str: + return "structured_log" + + def can_handle(self, command: str) -> bool: + return bool(_STERN_RE.search(command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + if len(lines) < 5: + return output + + # Try to parse as JSON lines + parsed_lines: list[dict | None] = [] + json_count = 0 + for line in lines: + stripped = line.strip() + if not stripped: + parsed_lines.append(None) + continue + try: + obj = json.loads(stripped) + if isinstance(obj, dict): + parsed_lines.append(obj) + json_count += 1 + else: + parsed_lines.append(None) + except (json.JSONDecodeError, ValueError): + parsed_lines.append(None) + + non_empty = sum(1 for line in lines if line.strip()) + # If less than 50% lines are JSON objects, fall back to log compression + if non_empty == 0 or json_count / non_empty < 0.5: + keep_head = config.get("kubectl_keep_head") + keep_tail = config.get("kubectl_keep_tail") + return compress_log_lines(lines, keep_head=keep_head, keep_tail=keep_tail) + + return self._process_json_lines(lines, parsed_lines) + + def _process_json_lines(self, raw_lines: list[str], parsed: list[dict | None]) -> str: + # Group by level + level_counts: dict[str, int] = defaultdict(int) + error_lines: list[str] = [] + total = 0 + + for i, obj in enumerate(parsed): + if obj is None: + continue + total += 1 + level = self._extract_level(obj) + level_counts[level] += 1 + + if level in _ERROR_LEVELS: + msg = self._extract_message(obj) + if msg: + error_lines.append(f" [{level.upper()}] {msg}") + else: + # Keep raw line but truncate + raw = raw_lines[i].strip() + if len(raw) > 200: + raw = raw[:197] + "..." + error_lines.append(f" {raw}") + + result = [f"{total} log entries:"] + + # Level summary + for level in ( + "error", + "fatal", + "critical", + "panic", + "warn", + "warning", + "info", + "debug", + "trace", + ): + if level in level_counts: + result.append(f" {level}: {level_counts[level]}") + + # Other levels not in the standard list + for level, count in sorted(level_counts.items(), key=lambda x: -x[1]): + if level not in ( + "error", + "fatal", + "critical", + "panic", + "warn", + "warning", + "info", + "debug", + "trace", + ): + result.append(f" {level}: {count}") + + # Show error messages + if error_lines: + result.append(f"\nErrors ({len(error_lines)}):") + max_errors = 10 + result.extend(error_lines[:max_errors]) + if len(error_lines) > max_errors: + result.append(f" ... ({len(error_lines) - max_errors} more)") + + return "\n".join(result) + + def _extract_level(self, obj: dict) -> str: + """Extract log level from a JSON log entry.""" + for key in _LEVEL_KEYS: + if key in obj: + val = str(obj[key]).lower().strip() + return val + # Fallback: look for common patterns in message + msg = self._extract_message(obj) + if msg: + if re.search(r"\b(ERROR|FATAL|PANIC)\b", msg): + return "error" + if re.search(r"\bWARN(ING)?\b", msg): + return "warn" + return "unknown" + + def _extract_message(self, obj: dict) -> str: + """Extract message from a JSON log entry.""" + for key in _MESSAGE_KEYS: + if key in obj: + val = str(obj[key]) + if len(val) > 200: + val = val[:197] + "..." + return val + return "" diff --git a/tests/test_config.py b/tests/test_config.py index 49dc511..d8b30e5 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -54,6 +54,47 @@ def test_env_override_bool(self): del os.environ["TOKEN_SAVER_DEBUG"] config.reload() + def test_default_disabled_processors(self, monkeypatch): + for key in list(os.environ): + if key.startswith("TOKEN_SAVER_"): + monkeypatch.delenv(key) + config.reload() + assert config.get("disabled_processors") == [] + + def test_env_override_list(self): + os.environ["TOKEN_SAVER_DISABLED_PROCESSORS"] = "git,docker" # noqa: S105 + config.reload() + try: + assert config.get("disabled_processors") == ["git", "docker"] + finally: + del os.environ["TOKEN_SAVER_DISABLED_PROCESSORS"] + config.reload() + + def test_env_override_list_single_value(self): + os.environ["TOKEN_SAVER_DISABLED_PROCESSORS"] = "git" # noqa: S105 + config.reload() + try: + assert config.get("disabled_processors") == ["git"] + finally: + del os.environ["TOKEN_SAVER_DISABLED_PROCESSORS"] + config.reload() + + def test_default_max_chain_depth(self, monkeypatch): + for key in list(os.environ): + if key.startswith("TOKEN_SAVER_"): + monkeypatch.delenv(key) + config.reload() + assert config.get("max_chain_depth") == 3 + + def test_env_override_list_empty_string(self): + os.environ["TOKEN_SAVER_DISABLED_PROCESSORS"] = "" + config.reload() + try: + assert config.get("disabled_processors") == [] + finally: + del os.environ["TOKEN_SAVER_DISABLED_PROCESSORS"] + config.reload() + def test_invalid_env_value_ignored(self): os.environ["TOKEN_SAVER_MIN_INPUT_LENGTH"] = "not_a_number" # noqa: S105 config.reload() diff --git a/tests/test_engine.py b/tests/test_engine.py index 765a2b1..f59fc9d 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -210,9 +210,9 @@ class TestProcessorRegistry: """Tests for auto-discovery and the processor registry.""" def test_discover_processors_finds_all(self): - """Auto-discovery should find all 25 processors.""" + """Auto-discovery should find all 29 processors.""" processors = discover_processors() - assert len(processors) == 25 + assert len(processors) == 29 def test_discover_processors_sorted_by_priority(self): """Processors must be returned in ascending priority order.""" @@ -247,8 +247,11 @@ def test_expected_priority_order(self): assert name_to_priority["test"] == 21 assert name_to_priority["cargo"] == 22 assert name_to_priority["go"] == 23 + assert name_to_priority["python_install"] == 24 assert name_to_priority["build"] == 25 + assert name_to_priority["cargo_clippy"] == 26 assert name_to_priority["lint"] == 27 + assert name_to_priority["maven_gradle"] == 28 assert name_to_priority["network"] == 30 assert name_to_priority["docker"] == 31 assert name_to_priority["kubectl"] == 32 @@ -264,6 +267,7 @@ def test_expected_priority_order(self): assert name_to_priority["syslog"] == 42 assert name_to_priority["ssh"] == 43 assert name_to_priority["jq_yq"] == 44 + assert name_to_priority["structured_log"] == 45 assert name_to_priority["file_listing"] == 50 assert name_to_priority["file_content"] == 51 assert name_to_priority["generic"] == 999 @@ -417,6 +421,25 @@ def test_collect_hook_patterns_covers_key_commands(self): "yq . config.yaml", "ssh host 'ls -la'", "scp file.txt host:/tmp/", + # Python install (dedicated processor) + "pip install flask", + "pip3 install -r requirements.txt", + "poetry install", + "poetry update", + "poetry add requests", + "uv pip install flask", + "uv sync", + # Cargo clippy (dedicated processor) + "cargo clippy", + # Maven/Gradle (dedicated processor) + "mvn clean install", + "mvn package", + "./mvnw verify", + "gradle build", + "./gradlew assemble", + # Structured log + "stern my-pod", + "kubetail my-service", ] for cmd in test_commands: @@ -433,6 +456,78 @@ def test_engine_uses_discovered_processors(self): assert ep.priority == dp.priority +class TestDisabledProcessors: + """Tests for per-processor enable/disable.""" + + def test_disabled_processor_excluded(self, monkeypatch): + monkeypatch.setenv("TOKEN_SAVER_DISABLED_PROCESSORS", "git") + from src import config + + config.reload() + engine = CompressionEngine() + names = [p.name for p in engine.processors] + assert "git" not in names + assert "build" in names # Other processors still present + monkeypatch.delenv("TOKEN_SAVER_DISABLED_PROCESSORS") + config.reload() + + def test_disabled_generic_ignored(self, monkeypatch): + """Generic processor cannot be disabled.""" + monkeypatch.setenv("TOKEN_SAVER_DISABLED_PROCESSORS", "generic") + from src import config + + config.reload() + engine = CompressionEngine() + names = [p.name for p in engine.processors] + assert "generic" in names + monkeypatch.delenv("TOKEN_SAVER_DISABLED_PROCESSORS") + config.reload() + + def test_disabled_multiple_processors(self, monkeypatch): + monkeypatch.setenv("TOKEN_SAVER_DISABLED_PROCESSORS", "git,docker,lint") + from src import config + + config.reload() + engine = CompressionEngine() + names = [p.name for p in engine.processors] + assert "git" not in names + assert "docker" not in names + assert "lint" not in names + assert "build" in names + monkeypatch.delenv("TOKEN_SAVER_DISABLED_PROCESSORS") + config.reload() + + def test_disabled_processors_string_in_json_ignored(self, monkeypatch): + """If disabled_processors is a string (wrong type from JSON), treat as empty.""" + from src import config + + # Simulate a JSON config with wrong type: "lint" instead of ["lint"] + cfg = {**config._load_config(), "disabled_processors": "lint"} + monkeypatch.setattr(config, "_config", cfg) + engine = CompressionEngine() + names = [p.name for p in engine.processors] + # "lint" as string should NOT disable any processor (would be {"l","i","n","t"} otherwise) + assert "lint" in names + config.reload() + + def test_disabled_processors_hook_patterns(self, monkeypatch): + """Disabled processors should not contribute hook patterns.""" + import re + + monkeypatch.setenv("TOKEN_SAVER_DISABLED_PROCESSORS", "git") + from src import config + + config.reload() + patterns = collect_hook_patterns() + compiled = [re.compile(p) for p in patterns] + # git status should NOT match any pattern + assert not any(p.search("git status") for p in compiled) + # Other commands should still match + assert any(p.search("pytest tests/") for p in compiled) + monkeypatch.delenv("TOKEN_SAVER_DISABLED_PROCESSORS") + config.reload() + + class TestProcessorChaining: """Tests for multi-processor chaining infrastructure.""" @@ -441,7 +536,10 @@ def setup_method(self): def test_chain_to_attribute_default_none(self): for p in self.engine.processors: - assert p.chain_to is None + if p.name == "cargo_clippy": + assert p.chain_to == ["lint"] + else: + assert p.chain_to is None def test_processor_by_name_lookup(self): assert "git" in self.engine._by_name @@ -450,3 +548,226 @@ def test_processor_by_name_lookup(self): assert "go" in self.engine._by_name assert "ssh" in self.engine._by_name assert "jq_yq" in self.engine._by_name + assert "python_install" in self.engine._by_name + assert "cargo_clippy" in self.engine._by_name + assert "maven_gradle" in self.engine._by_name + assert "structured_log" in self.engine._by_name + + def test_chain_to_string_backward_compat(self): + """String chain_to should work (normalized to single-element list).""" + from src.processors.base import Processor + + class FakeA(Processor): + priority = 1 + hook_patterns = [] + chain_to = "generic" + + @property + def name(self): + return "fake_a" + + def can_handle(self, command): + return command == "fake_chain" + + def process(self, command, output): + return output.replace("AAA", "BBB") + + engine = self.engine + # Inject fake processor + engine.processors.insert(0, FakeA()) + engine._by_name["fake_a"] = engine.processors[0] + + output = "AAA\n" * 300 + _compressed, proc, _was = engine.compress("fake_chain", output) + # FakeA transforms AAA->BBB, then chains to generic + assert proc in ("fake_a", "generic") + + def test_chain_to_list(self): + """List chain_to should apply processors in sequence.""" + from src.processors.base import Processor + + class ProcA(Processor): + priority = 1 + hook_patterns = [] + chain_to = ["proc_b"] + + @property + def name(self): + return "proc_a" + + def can_handle(self, command): + return command == "chain_list_test" + + def process(self, command, output): + return output.replace("STEP1", "STEP2") + + class ProcB(Processor): + priority = 2 + hook_patterns = [] + + @property + def name(self): + return "proc_b" + + def can_handle(self, command): + return False + + def process(self, command, output): + return output.replace("STEP2", "STEP3") + + engine = self.engine + a, b = ProcA(), ProcB() + engine.processors.insert(0, a) + engine.processors.insert(1, b) + engine._by_name["proc_a"] = a + engine._by_name["proc_b"] = b + + output = "STEP1\n" * 100 + compressed, _proc, was = engine.compress("chain_list_test", output) + if was: + assert "STEP3" in compressed + + def test_chain_cycle_detection(self): + """Cycle in chain_to should not cause infinite loop.""" + from src.processors.base import Processor + + class CycleA(Processor): + priority = 1 + hook_patterns = [] + chain_to = ["cycle_b"] + + @property + def name(self): + return "cycle_a" + + def can_handle(self, command): + return command == "cycle_test" + + def process(self, command, output): + return output + "\nA" + + class CycleB(Processor): + priority = 2 + hook_patterns = [] + chain_to = ["cycle_a"] + + @property + def name(self): + return "cycle_b" + + def can_handle(self, command): + return False + + def process(self, command, output): + return output + "\nB" + + engine = self.engine + a, b = CycleA(), CycleB() + engine.processors.insert(0, a) + engine.processors.insert(1, b) + engine._by_name["cycle_a"] = a + engine._by_name["cycle_b"] = b + + output = "start\n" * 100 + # Should not hang + _compressed, proc, _was = engine.compress("cycle_test", output) + assert proc in ("cycle_a", "generic", "none") + + def test_chain_unknown_name_skipped(self): + """Unknown processor name in chain_to should be silently skipped.""" + from src.processors.base import Processor + + class UnknownChain(Processor): + priority = 1 + hook_patterns = [] + chain_to = ["nonexistent_processor"] + + @property + def name(self): + return "unknown_chain" + + def can_handle(self, command): + return command == "unknown_chain_test" + + def process(self, command, output): + return output.replace("X", "Y") + + engine = self.engine + p = UnknownChain() + engine.processors.insert(0, p) + engine._by_name["unknown_chain"] = p + + output = "X\n" * 100 + # Should not raise + _compressed, proc, _was = engine.compress("unknown_chain_test", output) + assert proc in ("unknown_chain", "generic", "none") + + def test_chain_max_depth(self, monkeypatch): + """max_chain_depth config should limit chaining.""" + from src import config + from src.processors.base import Processor + + monkeypatch.setenv("TOKEN_SAVER_MAX_CHAIN_DEPTH", "1") + config.reload() + + class DepthA(Processor): + priority = 1 + hook_patterns = [] + chain_to = ["depth_b", "depth_c"] + + @property + def name(self): + return "depth_a" + + def can_handle(self, command): + return command == "depth_test" + + def process(self, command, output): + return output.replace("D0", "D1") + + class DepthB(Processor): + priority = 2 + hook_patterns = [] + + @property + def name(self): + return "depth_b" + + def can_handle(self, command): + return False + + def process(self, command, output): + return output.replace("D1", "D2") + + class DepthC(Processor): + priority = 3 + hook_patterns = [] + + @property + def name(self): + return "depth_c" + + def can_handle(self, command): + return False + + def process(self, command, output): + return output.replace("D2", "D3") + + engine = CompressionEngine() + a, b, c = DepthA(), DepthB(), DepthC() + engine.processors.insert(0, a) + engine.processors.insert(1, b) + engine.processors.insert(2, c) + engine._by_name["depth_a"] = a + engine._by_name["depth_b"] = b + engine._by_name["depth_c"] = c + + output = "D0\n" * 100 + compressed, _proc, was = engine.compress("depth_test", output) + if was: + # With max_depth=1, only depth_b should run (not depth_c) + assert "D2" in compressed + assert "D3" not in compressed + + monkeypatch.delenv("TOKEN_SAVER_MAX_CHAIN_DEPTH") + config.reload() diff --git a/tests/test_hooks.py b/tests/test_hooks.py index dc070b8..0284ff2 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -49,6 +49,25 @@ def test_build_commands_compressible(self): assert is_compressible("webpack") assert is_compressible("next build") + def test_python_install_commands_compressible(self): + assert is_compressible("pip install flask") + assert is_compressible("pip3 install -r requirements.txt") + assert is_compressible("poetry install") + assert is_compressible("poetry update") + assert is_compressible("poetry add requests") + assert is_compressible("uv pip install flask") + assert is_compressible("uv sync") + + def test_maven_gradle_commands_compressible(self): + assert is_compressible("mvn clean install") + assert is_compressible("mvn package") + assert is_compressible("gradle build") + assert is_compressible("./gradlew assemble") + + def test_structured_log_commands_compressible(self): + assert is_compressible("stern my-pod") + assert is_compressible("kubetail my-service") + def test_lint_commands_compressible(self): assert is_compressible("eslint src/") assert is_compressible("ruff check .") diff --git a/tests/test_processors.py b/tests/test_processors.py index 90dfc9b..e9d64a3 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -9,6 +9,7 @@ from src.processors.ansible import AnsibleProcessor from src.processors.build_output import BuildOutputProcessor from src.processors.cargo import CargoProcessor +from src.processors.cargo_clippy import CargoClippyProcessor from src.processors.cloud_cli import CloudCliProcessor from src.processors.db_query import DbQueryProcessor from src.processors.docker import DockerProcessor @@ -23,10 +24,13 @@ from src.processors.jq_yq import JqYqProcessor from src.processors.kubectl import KubectlProcessor from src.processors.lint_output import LintOutputProcessor +from src.processors.maven_gradle import MavenGradleProcessor from src.processors.network import NetworkProcessor from src.processors.package_list import PackageListProcessor +from src.processors.python_install import PythonInstallProcessor from src.processors.search import SearchProcessor from src.processors.ssh import SshProcessor +from src.processors.structured_log import StructuredLogProcessor from src.processors.syslog import SyslogProcessor from src.processors.system_info import SystemInfoProcessor from src.processors.terraform import TerraformProcessor @@ -684,10 +688,13 @@ def test_can_handle_build_commands(self): assert self.p.can_handle("npm run build") assert not self.p.can_handle("cargo build") # handled by CargoProcessor assert self.p.can_handle("make") - assert self.p.can_handle("pip install -r requirements.txt") + assert not self.p.can_handle("pip install -r requirements.txt") # PythonInstallProcessor assert self.p.can_handle("yarn add lodash") assert self.p.can_handle("next build") assert not self.p.can_handle("git status") + assert not self.p.can_handle("mvn clean install") # MavenGradleProcessor + assert not self.p.can_handle("gradle build") # MavenGradleProcessor + assert not self.p.can_handle("./gradlew assemble") # MavenGradleProcessor def test_empty_output(self): assert self.p.process("npm run build", "") == "" @@ -821,20 +828,10 @@ def test_npm_audit_groups_by_severity(self): assert "high" in result assert "vulnerabilities" in result.lower() or "found" in result.lower() - def test_pip_progress_skipped(self): - output = "\n".join( - [ - "Collecting requests", - " Downloading requests-2.31.0-py3-none-any.whl", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.6/62.6 kB 1.2 MB/s", - "Installing collected packages: requests", - "Successfully installed requests-2.31.0", - ] - ) - result = self.p.process("pip install requests", output) - assert "━" not in result - assert "Collecting" not in result - assert "Build succeeded" in result + def test_pip_install_not_handled(self): + """pip install is now handled by PythonInstallProcessor.""" + assert not self.p.can_handle("pip install requests") + assert not self.p.can_handle("pip3 install flask") def test_yarn_berry_step_progress_skipped(self): """Yarn Berry (v2+) outputs step lines prefixed with ➤ YN0000: ┌/└.""" @@ -3908,3 +3905,359 @@ def test_yq_large_output_summarized(self): output = "\n".join(lines) result = self.p.process("yq . config.yaml", output) assert len(result) < len(output) + + +# ─────────────────────────────────────────────────────────────── +# PythonInstallProcessor +# ─────────────────────────────────────────────────────────────── + + +class TestPythonInstallProcessor: + def setup_method(self): + self.p = PythonInstallProcessor() + + def test_can_handle_pip_install(self): + assert self.p.can_handle("pip install flask") + assert self.p.can_handle("pip3 install -r requirements.txt") + + def test_can_handle_poetry(self): + assert self.p.can_handle("poetry install") + assert self.p.can_handle("poetry update") + assert self.p.can_handle("poetry add requests") + + def test_can_handle_uv(self): + assert self.p.can_handle("uv pip install flask") + assert self.p.can_handle("uv sync") + + def test_not_handle_pip_list(self): + assert not self.p.can_handle("pip list") + assert not self.p.can_handle("pip freeze") + + def test_not_handle_unrelated(self): + assert not self.p.can_handle("npm install") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("pip install flask", "") == "" + + def test_pip_install_compressed(self): + lines = [] + for i in range(30): + lines.append(f"Collecting package-{i}>=1.0") + for i in range(30): + lines.append(f" Downloading package_{i}-1.2.3-py3-none-any.whl (10 kB)") + lines.append("Installing collected packages: " + ", ".join(f"p{i}" for i in range(30))) + lines.append("Successfully installed " + " ".join(f"package-{i}-1.2.3" for i in range(30))) + output = "\n".join(lines) + + result = self.p.process("pip install -r requirements.txt", output) + assert len(result) < len(output) + assert "30 packages collected" in result + assert "30 downloads" in result + assert "Successfully installed 30 packages" in result + + def test_pip_errors_preserved(self): + output = ( + "Collecting nonexistent-pkg\n" + " ERROR: Could not find a version that satisfies the requirement\n" + "ERROR: No matching distribution found for nonexistent-pkg" + ) + result = self.p.process("pip install nonexistent-pkg", output) + assert "ERROR" in result + assert "Could not find" in result + + def test_pip_already_satisfied(self): + lines = [f"Requirement already satisfied: pkg-{i} in /usr/lib" for i in range(20)] + output = "\n".join(lines) + result = self.p.process("pip install flask", output) + assert "20 already satisfied" in result + + def test_poetry_install_compressed(self): + lines = ["Resolving dependencies..."] + for i in range(20): + lines.append(f" Installing package-{i} (1.{i}.0)") + output = "\n".join(lines) + + result = self.p.process("poetry install", output) + assert len(result) < len(output) + assert "Installed 20 packages" in result + assert "dependency resolution" in result + + def test_uv_sync_compressed(self): + output = ( + "Resolved 42 packages in 1.2s\n" + "Downloading flask-2.0.0\n" + "Downloading requests-2.28.0\n" + "Installed 5 packages in 0.5s" + ) + result = self.p.process("uv sync", output) + assert "Resolved 42 packages" in result + assert "Installed 5 packages" in result + + +# ─────────────────────────────────────────────────────────────── +# CargoClippyProcessor +# ─────────────────────────────────────────────────────────────── + + +class TestCargoClippyProcessor: + def setup_method(self): + self.p = CargoClippyProcessor() + + def test_can_handle_cargo_clippy(self): + assert self.p.can_handle("cargo clippy") + assert self.p.can_handle("cargo clippy --all-targets") + assert self.p.can_handle("cargo clippy -- -W clippy::all") + + def test_not_handle_cargo_build(self): + assert not self.p.can_handle("cargo build") + assert not self.p.can_handle("cargo test") + + def test_empty_output(self): + assert self.p.process("cargo clippy", "") == "" + + def test_warning_blocks_grouped(self): + lines = [] + # 5 warnings of same rule + for i in range(5): + lines.append("warning[clippy::needless_return]: unneeded `return` statement") + lines.append(f" --> src/file{i}.rs:10:5") + lines.append(" |") + lines.append("10 | return x;") + lines.append(" | ^^^^^^^^^ help: remove `return`") + lines.append(" |") + lines.append("warning: `my_crate` (bin) generated 5 warnings") + output = "\n".join(lines) + + result = self.p.process("cargo clippy", output) + assert "clippy::needless_return" in result + assert "5 occurrences" in result + assert len(result) < len(output) + + def test_errors_preserved(self): + output = ( + "error[E0308]: mismatched types\n" + " --> src/main.rs:5:5\n" + " |\n" + '5 | let x: i32 = "hello";\n' + " | ^^^^^^^ expected `i32`, found `&str`" + ) + result = self.p.process("cargo clippy", output) + assert "error[E0308]" in result + assert "mismatched types" in result + + def test_checking_count(self): + output = ( + " Checking serde v1.0.0\n" + " Checking tokio v1.0.0\n" + " Checking my-crate v0.1.0\n" + " Finished `dev` profile\n" + ) + result = self.p.process("cargo clippy", output) + assert "3 checked" in result + + def test_mixed_warnings_and_errors(self): + output = ( + " Checking my-crate v0.1.0\n" + "warning[clippy::unused_imports]: unused import\n" + " --> src/lib.rs:1:5\n" + " |\n" + "1 | use std::io;\n" + " | ^^^^^^^\n" + "error[E0599]: method not found\n" + " --> src/main.rs:10:5\n" + " |\n" + "10 | x.foo();\n" + " | ^^^ method not found\n" + ) + result = self.p.process("cargo clippy", output) + assert "error[E0599]" in result + assert "method not found" in result + + +# ─────────────────────────────────────────────────────────────── +# MavenGradleProcessor +# ─────────────────────────────────────────────────────────────── + + +class TestMavenGradleProcessor: + def setup_method(self): + self.p = MavenGradleProcessor() + + def test_can_handle_mvn(self): + assert self.p.can_handle("mvn clean install") + assert self.p.can_handle("mvn package") + assert self.p.can_handle("./mvnw verify") + + def test_can_handle_gradle(self): + assert self.p.can_handle("gradle build") + assert self.p.can_handle("./gradlew assemble") + assert self.p.can_handle("gradle test") + + def test_not_handle_unrelated(self): + assert not self.p.can_handle("npm run build") + assert not self.p.can_handle("cargo build") + + def test_empty_output(self): + assert self.p.process("mvn clean install", "") == "" + + def test_maven_downloads_stripped(self): + lines = [] + for i in range(50): + lines.append( + f"[INFO] Downloading from central: https://repo.maven.org/artifact-{i}.jar" + ) + lines.append( + f"[INFO] Downloaded from central: https://repo.maven.org/artifact-{i}.jar (10 kB)" + ) + lines.append("[INFO] Building my-project 1.0.0 [1/3]") + lines.append("[INFO] BUILD SUCCESS") + lines.append("[INFO] Total time: 45.2 s") + output = "\n".join(lines) + + result = self.p.process("mvn clean install", output) + assert len(result) < len(output) + assert "100 downloads" in result + assert "BUILD SUCCESS" in result + + def test_maven_errors_preserved(self): + output = ( + "[INFO] Building my-project 1.0.0\n" + "[ERROR] Failed to execute goal: compilation failure\n" + "[ERROR] src/main/java/App.java:[10,5] cannot find symbol\n" + "[INFO] BUILD FAILURE\n" + "[INFO] Total time: 5.1 s" + ) + result = self.p.process("mvn compile", output) + assert "ERROR" in result + assert "cannot find symbol" in result + assert "BUILD FAILURE" in result + + def test_maven_test_results_preserved(self): + output = ( + "[INFO] Building my-project 1.0.0\n" + "[INFO] Tests run: 42, Failures: 1, Errors: 0, Skipped: 2\n" + "[INFO] BUILD FAILURE" + ) + result = self.p.process("mvn test", output) + assert "Tests run: 42" in result + + def test_gradle_tasks_compressed(self): + lines = [] + for i in range(20): + lines.append(f"> Task :sub{i}:compileJava UP-TO-DATE") + lines.append("> Task :app:compileJava") + lines.append("> Task :app:processResources NO-SOURCE") + lines.append("> Task :app:jar") + lines.append("") + lines.append("BUILD SUCCESSFUL in 12s") + lines.append("23 actionable tasks: 2 executed, 21 up-to-date") + output = "\n".join(lines) + + result = self.p.process("gradle build", output) + assert len(result) < len(output) + assert "2 executed" in result + assert "21 up-to-date" in result + assert "BUILD SUCCESSFUL" in result + + def test_gradle_errors_preserved(self): + output = ( + "> Task :app:compileJava\n" + "FAILURE: Build failed with an exception.\n" + "\n" + "* What went wrong:\n" + "Execution failed for task ':app:compileJava'.\n" + "> Compilation failed\n" + "\n" + "BUILD FAILED in 5s" + ) + result = self.p.process("./gradlew build", output) + assert "FAILURE" in result + assert "Compilation failed" in result + assert "BUILD FAILED" in result + + def test_gradle_test_results(self): + output = "> Task :test\n10 tests completed, 2 failed\n\nBUILD FAILED in 8s" + result = self.p.process("gradle test", output) + assert "10 tests completed, 2 failed" in result + + +# ─────────────────────────────────────────────────────────────── +# StructuredLogProcessor +# ─────────────────────────────────────────────────────────────── + + +class TestStructuredLogProcessor: + def setup_method(self): + self.p = StructuredLogProcessor() + + def test_can_handle_stern(self): + assert self.p.can_handle("stern my-pod") + assert self.p.can_handle("stern -n default my-pod") + + def test_can_handle_kubetail(self): + assert self.p.can_handle("kubetail my-service") + + def test_not_handle_unrelated(self): + assert not self.p.can_handle("kubectl logs my-pod") + assert not self.p.can_handle("docker logs container") + + def test_empty_output(self): + assert self.p.process("stern my-pod", "") == "" + + def test_json_lines_compressed(self): + import json + + lines = [] + for i in range(30): + entry = { + "level": "info", + "msg": f"processing item {i}", + "ts": f"2024-01-01T00:00:{i:02d}Z", + } + lines.append(json.dumps(entry)) + for i in range(5): + entry = { + "level": "error", + "msg": f"failed to process item {i}", + "ts": f"2024-01-01T00:01:{i:02d}Z", + } + lines.append(json.dumps(entry)) + output = "\n".join(lines) + + result = self.p.process("stern my-pod", output) + assert len(result) < len(output) + assert "35 log entries" in result + assert "info: 30" in result + assert "error: 5" in result + assert "Errors (5)" in result + + def test_non_json_fallback(self): + lines = [f"plain log line {i}" for i in range(50)] + output = "\n".join(lines) + result = self.p.process("stern my-pod", output) + # Should still compress via log compression fallback + assert len(result) < len(output) + + def test_mixed_json_non_json(self): + import json + + lines = ["plain text line"] + for i in range(20): + lines.append(json.dumps({"level": "info", "msg": f"msg {i}"})) + lines.append("another plain line") + output = "\n".join(lines) + result = self.p.process("stern my-pod", output) + assert "log entries" in result + + def test_error_messages_shown(self): + import json + + lines = [] + for i in range(10): + lines.append(json.dumps({"level": "info", "msg": f"ok {i}"})) + lines.append(json.dumps({"level": "error", "msg": "database connection failed"})) + output = "\n".join(lines) + + result = self.p.process("stern my-pod", output) + assert "database connection failed" in result From f795261f6dfd8336a72591e4f7420771e6c4f9d7 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Sat, 28 Mar 2026 16:10:46 +0100 Subject: [PATCH 5/6] fix: bugs and code deduplication in processors - Fix missing @ separator in Pipfile.lock package formatting - Fix overly permissive tree summary regex in file_listing - Fix extra newlines in generic truncation message - Deduplicate poetry.lock/Cargo.lock parsers into _compress_toml_lock - Extract shared Rust compiler regex patterns to utils.py --- src/processors/cargo.py | 40 ++++++++++++++++++---------------- src/processors/cargo_clippy.py | 25 +++++++++++---------- src/processors/file_content.py | 33 ++++++++-------------------- src/processors/file_listing.py | 2 +- src/processors/generic.py | 2 +- src/processors/utils.py | 8 +++++++ 6 files changed, 53 insertions(+), 57 deletions(-) diff --git a/src/processors/cargo.py b/src/processors/cargo.py index f40d52f..7a48513 100644 --- a/src/processors/cargo.py +++ b/src/processors/cargo.py @@ -5,17 +5,19 @@ from .. import config from .base import Processor +from .utils import ( + RUST_COMPILING_RE, + RUST_ERROR_START_RE, + RUST_FINISHED_RE, + RUST_SPAN_LINE_RE, + RUST_WARNING_START_RE, + RUST_WARNING_SUMMARY_RE, +) _CARGO_CMD_RE = re.compile(r"\bcargo\s+(build|check|doc|update|bench)\b") -_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") _DOWNLOADING_RE = re.compile(r"^\s*Downloading\s+\S+\s+v") _DOCUMENTING_RE = re.compile(r"^\s*Documenting\s+\S+\s+v") _RUNNING_RE = re.compile(r"^\s*Running\s+") -_FINISHED_RE = re.compile(r"^\s*Finished\s+") -_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") -_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") -_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") -_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") _UPDATE_LINE_RE = re.compile( r"^\s*(Updating|Removing|Adding)\s+(\S+)\s+v([\d.]+)(?:\s*->\s*v([\d.]+))?" ) @@ -89,7 +91,7 @@ def _process_cargo_build(self, output: str) -> str: for line in lines: stripped = line.strip() - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 continue if _DOWNLOADING_RE.match(stripped): @@ -97,7 +99,7 @@ def _process_cargo_build(self, output: str) -> str: continue # Error start - if _ERROR_START_RE.match(stripped): + if RUST_ERROR_START_RE.match(stripped): # Flush current warning block if current_type and current_block: warnings_by_type[current_type].append(current_block) @@ -111,8 +113,8 @@ def _process_cargo_build(self, output: str) -> str: continue # Warning start - wm = _WARNING_START_RE.match(stripped) - if wm and not _WARNING_SUMMARY_RE.match(stripped): + wm = RUST_WARNING_START_RE.match(stripped) + if wm and not RUST_WARNING_SUMMARY_RE.match(stripped): # Flush previous if in_error and current_error: error_blocks.append(current_error) @@ -127,7 +129,7 @@ def _process_cargo_build(self, output: str) -> str: current_block = [line] continue - if _WARNING_SUMMARY_RE.match(stripped): + if RUST_WARNING_SUMMARY_RE.match(stripped): if current_type and current_block: warnings_by_type[current_type].append(current_block) current_block = [] @@ -139,7 +141,7 @@ def _process_cargo_build(self, output: str) -> str: warning_summary_lines.append(line) continue - if _FINISHED_RE.match(stripped): + if RUST_FINISHED_RE.match(stripped): if current_type and current_block: warnings_by_type[current_type].append(current_block) current_block = [] @@ -201,16 +203,16 @@ def _process_cargo_doc(self, output: str) -> str: for line in lines: stripped = line.strip() - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 elif _DOCUMENTING_RE.match(stripped): documenting_count += 1 elif ( - _FINISHED_RE.match(stripped) + RUST_FINISHED_RE.match(stripped) or re.match(r"^\s*Generated\s+", stripped) or re.search(r"\bwarning\b", stripped) - or _ERROR_START_RE.match(stripped) - or (_SPAN_LINE_RE.match(stripped) and result) + or RUST_ERROR_START_RE.match(stripped) + or (RUST_SPAN_LINE_RE.match(stripped) and result) ): result.append(line) @@ -274,15 +276,15 @@ def _process_cargo_bench(self, output: str) -> str: for line in lines: stripped = line.strip() - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 elif _RUNNING_RE.match(stripped): continue elif ( re.match(r"^test\s+.+\s+bench:", stripped) or re.match(r"^test result:", stripped) - or _FINISHED_RE.match(stripped) - or _ERROR_START_RE.match(stripped) + or RUST_FINISHED_RE.match(stripped) + or RUST_ERROR_START_RE.match(stripped) ): result.append(line) diff --git a/src/processors/cargo_clippy.py b/src/processors/cargo_clippy.py index cfe83fd..65f03c2 100644 --- a/src/processors/cargo_clippy.py +++ b/src/processors/cargo_clippy.py @@ -5,15 +5,16 @@ from .. import config from .base import Processor +from .utils import ( + RUST_COMPILING_RE, + RUST_ERROR_START_RE, + RUST_FINISHED_RE, + RUST_WARNING_START_RE, + RUST_WARNING_SUMMARY_RE, +) _CLIPPY_CMD_RE = re.compile(r"\bcargo\s+clippy\b") -_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") -_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") -_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") -_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") -_FINISHED_RE = re.compile(r"^\s*Finished\s+") _CHECKING_RE = re.compile(r"^\s*Checking\s+\S+\s+v") -_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") # Clippy lint categories _CLIPPY_CATEGORIES = { @@ -83,12 +84,12 @@ def process(self, command: str, output: str) -> str: if _CHECKING_RE.match(stripped): checking_count += 1 continue - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 continue # Error start - if _ERROR_START_RE.match(stripped): + if RUST_ERROR_START_RE.match(stripped): # Flush current warning block if current_rule and current_block: warnings_by_rule[current_rule].append(current_block) @@ -102,8 +103,8 @@ def process(self, command: str, output: str) -> str: continue # Warning start - wm = _WARNING_START_RE.match(stripped) - if wm and not _WARNING_SUMMARY_RE.match(stripped): + wm = RUST_WARNING_START_RE.match(stripped) + if wm and not RUST_WARNING_SUMMARY_RE.match(stripped): # Flush previous if in_error and current_error: error_blocks.append(current_error) @@ -117,7 +118,7 @@ def process(self, command: str, output: str) -> str: current_block = [line] continue - if _WARNING_SUMMARY_RE.match(stripped): + if RUST_WARNING_SUMMARY_RE.match(stripped): if current_rule and current_block: warnings_by_rule[current_rule].append(current_block) current_block = [] @@ -129,7 +130,7 @@ def process(self, command: str, output: str) -> str: summary_lines.append(line) continue - if _FINISHED_RE.match(stripped): + if RUST_FINISHED_RE.match(stripped): if current_rule and current_block: warnings_by_rule[current_rule].append(current_block) current_block = [] diff --git a/src/processors/file_content.py b/src/processors/file_content.py index 52c5d39..e46043b 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -401,8 +401,8 @@ def _compress_yarn_lock(self, lines: list[str], total: int) -> str: result.append(f" ... ({len(deps) - 50} more)") return "\n".join(result) - def _compress_poetry_lock(self, lines: list[str], total: int) -> str: - """poetry.lock: extract [[package]] name and version.""" + def _compress_toml_lock(self, lines: list[str], total: int, label: str) -> str: + """Extract [[package]] name and version from TOML lock files (poetry.lock, Cargo.lock).""" deps = [] current_name = None for line in lines: @@ -417,35 +417,20 @@ def _compress_poetry_lock(self, lines: list[str], total: int) -> str: deps.append(f"{current_name}@{val}") current_name = None - result = [f"poetry.lock ({len(deps)} packages, {total} lines):"] + result = [f"{label} ({len(deps)} packages, {total} lines):"] for d in deps[:50]: result.append(f" {d}") if len(deps) > 50: result.append(f" ... ({len(deps) - 50} more)") return "\n".join(result) + def _compress_poetry_lock(self, lines: list[str], total: int) -> str: + """poetry.lock: extract [[package]] name and version.""" + return self._compress_toml_lock(lines, total, "poetry.lock") + def _compress_cargo_lock(self, lines: list[str], total: int) -> str: """Cargo.lock: extract [[package]] name and version.""" - deps = [] - current_name = None - for line in lines: - stripped = line.strip() - if stripped == "[[package]]": - current_name = None - elif stripped.startswith("name = "): - val = stripped.split('"')[1] if '"' in stripped else stripped.split("=")[1].strip() - current_name = val - elif stripped.startswith("version = ") and current_name: - val = stripped.split('"')[1] if '"' in stripped else stripped.split("=")[1].strip() - deps.append(f"{current_name}@{val}") - current_name = None - - result = [f"Cargo.lock ({len(deps)} packages, {total} lines):"] - for d in deps[:50]: - result.append(f" {d}") - if len(deps) > 50: - result.append(f" ... ({len(deps) - 50} more)") - return "\n".join(result) + return self._compress_toml_lock(lines, total, "Cargo.lock") def _compress_json_lock(self, raw: str, total: int) -> str: """composer.lock / Pipfile.lock: extract package names + versions from JSON.""" @@ -465,7 +450,7 @@ def _compress_json_lock(self, raw: str, total: int) -> str: for section in ("default", "develop"): for name, info in data.get(section, {}).items(): version = info.get("version", "?") if isinstance(info, dict) else "?" - deps.append(f"{name}{version}") + deps.append(f"{name}@{version}") result = [f"lock file ({len(deps)} packages, {total} lines):"] for d in deps[:50]: diff --git a/src/processors/file_listing.py b/src/processors/file_listing.py index b607877..9f71b6c 100644 --- a/src/processors/file_listing.py +++ b/src/processors/file_listing.py @@ -169,7 +169,7 @@ def _process_tree(self, output: str) -> str: # Find the summary line (usually last line like "X directories, Y files") summary = "" for line in reversed(lines): - if re.match(r"\d+\s+director", line): + if re.match(r"\d+\s+director(?:ies|y)\b", line): summary = line break diff --git a/src/processors/generic.py b/src/processors/generic.py index db20c07..9ae0179 100644 --- a/src/processors/generic.py +++ b/src/processors/generic.py @@ -209,6 +209,6 @@ def _truncate_middle(self, lines: list[str]) -> list[str]: removed = total - keep_head - keep_tail return [ *lines[:keep_head], - f"\n... ({removed} lines truncated, {total} total) ...\n", + f"... ({removed} lines truncated, {total} total) ...", *lines[-keep_tail:], ] diff --git a/src/processors/utils.py b/src/processors/utils.py index 619593f..b2a1d5d 100644 --- a/src/processors/utils.py +++ b/src/processors/utils.py @@ -3,6 +3,14 @@ import re from collections import defaultdict +# Shared Rust compiler output patterns (used by cargo and cargo_clippy processors) +RUST_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") +RUST_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") +RUST_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") +RUST_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") +RUST_FINISHED_RE = re.compile(r"^\s*Finished\s+") +RUST_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") + _DEFAULT_ERROR_RE = re.compile( r"\b(error|Error|ERROR|exception|Exception|EXCEPTION|" r"fatal|Fatal|FATAL|panic|Panic|PANIC|traceback|Traceback)\b" From db99a54c8382ad08d60b0dc7a094fa1ffc9dbcb9 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Sat, 28 Mar 2026 16:16:19 +0100 Subject: [PATCH 6/6] chore: bump version to 2.2.1 --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- pyproject.toml | 2 +- src/__init__.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index e6f06a2..2d56257 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "token-saver", "source": "./", "description": "Automatically compresses verbose CLI output to save tokens. 21 specialized processors for git, docker, npm, terraform, kubectl, helm, ansible, and more.", - "version": "2.1.1", + "version": "2.2.1", "author": { "name": "ppgranger" }, diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index ecf48d3..f08d73e 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "token-saver", "description": "Automatically compresses verbose CLI output (git, docker, npm, terraform, kubectl, etc.) to save tokens in Claude Code sessions. 21 specialized processors with content-aware compression.", - "version": "2.1.1", + "version": "2.2.1", "author": { "name": "ppgranger", "url": "https://github.com/ppgranger" diff --git a/pyproject.toml b/pyproject.toml index 06d353a..ee37ea8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "token-saver" -version = "2.1.1" +version = "2.2.1" requires-python = ">=3.10" [project.optional-dependencies] diff --git a/src/__init__.py b/src/__init__.py index 666977b..4fbe162 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,6 +1,6 @@ import os -__version__ = "2.1.1" +__version__ = "2.2.1" def data_dir() -> str: