diff --git a/docs/processors/cargo.md b/docs/processors/cargo.md new file mode 100644 index 0000000..c9a16ba --- /dev/null +++ b/docs/processors/cargo.md @@ -0,0 +1,34 @@ +# Cargo Processor + +**File:** `src/processors/cargo.py` | **Priority:** 22 | **Name:** `cargo` + +Dedicated processor for Rust's cargo build system. + +## Supported Commands + +cargo build, cargo check, cargo doc, cargo update, cargo bench. + +## Strategy + +| Subcommand | Strategy | +|---|---| +| **build/check** | Collapse `Compiling X v1.0` lines into count. Group warnings by type (unused_variable, unused_import, dead_code, unused_mut, lifetime, borrow_checker). Show first N examples per type. Keep ALL errors with full span context (`-->`, `|`, `^^` markers). Keep `Finished` summary | +| **doc** | Collapse `Documenting X` and `Compiling X` lines. Keep doc warnings, errors, `Finished`, and `Generated` lines | +| **update** | Show all major version bumps explicitly (breaking changes). Collapse minor/patch bumps into count. Keep `Adding` and `Removing` lines | +| **bench** | Keep benchmark result lines (`bench: N ns/iter`). Strip `Compiling` and `Running` noise. Keep `test result:` summary | + +## Exclusions + +- `cargo test` is routed to `TestOutputProcessor` +- `cargo clippy` is routed to `LintOutputProcessor` + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| cargo_warning_example_count | 2 | Number of example warnings to show per category | +| cargo_warning_group_threshold | 3 | Minimum occurrences before warnings are grouped | + +## Removed Noise + +`Compiling X v1.0.0` lines, `Downloading X v1.0.0` lines, `Running` lines (bench), intermediate blank lines between warnings. diff --git a/docs/processors/go.md b/docs/processors/go.md new file mode 100644 index 0000000..4ff73ba --- /dev/null +++ b/docs/processors/go.md @@ -0,0 +1,36 @@ +# Go Processor + +**File:** `src/processors/go.py` | **Priority:** 23 | **Name:** `go` + +Dedicated processor for Go toolchain commands. + +## Supported Commands + +go build, go vet, go mod tidy, go mod download, go generate, go install. + +## Strategy + +| Subcommand | Strategy | +|---|---| +| **build/install** | Keep all `file.go:line:col: message` errors. For multi-package builds with many `# package` headers, truncate to first 3. Pass through unchanged if no errors (successful builds produce no output) | +| **vet** | Group warnings by type (printf, unreachable, shadow, unused, nil, loop). Show first N examples per type. Keep `# package` headers for context | +| **mod tidy/download** | Collapse `go: downloading X v1.0` lines into count. Keep `go: added/upgraded/downgraded/removed` lines (important dependency changes) | +| **generate** | Collapse `running` lines into count. Keep errors and generator output | + +## Exclusions + +- `go test` is routed to `TestOutputProcessor` +- `golangci-lint` is routed to `LintOutputProcessor` + +## Configuration + +Uses existing parameters: + +| Parameter | Default | Description | +|---|---|---| +| lint_example_count | 2 | Examples per warning type (go vet) | +| lint_group_threshold | 3 | Minimum occurrences before grouping | + +## Removed Noise + +`go: downloading X` lines, `# package` headers (when redundant), `running` lines from go generate. diff --git a/docs/processors/jq_yq.md b/docs/processors/jq_yq.md new file mode 100644 index 0000000..0c3a7b6 --- /dev/null +++ b/docs/processors/jq_yq.md @@ -0,0 +1,29 @@ +# JQ/YQ Processor + +**File:** `src/processors/jq_yq.py` | **Priority:** 44 | **Name:** `jq_yq` + +Compresses large JSON and YAML outputs from jq and yq. + +## Supported Commands + +jq, yq. + +## Strategy + +| Output Type | Strategy | +|---|---| +| **Small output** (< 50 lines) | Pass through unchanged | +| **jq JSON** | Parse as JSON, compress with `compress_json_value()` (truncate arrays > 5 items, summarize deeply nested objects). Re-serialize with indent | +| **jq streaming** (one JSON per line) | Detect repeated structure (same keys), show first 3 + count. Fallback to head/tail | +| **yq YAML** | Count top-level keys and list items. Collapse large arrays (> 3 items at same indent) to count. Add structure summary header | + +## Configuration + +| Parameter | Default | Description | +|---|---|---| +| jq_passthrough_threshold | 50 | Lines below which output passes through unchanged | + +## Notes + +- No runtime dependencies: JSON parsing uses stdlib `json` module, YAML uses heuristic analysis (no PyYAML dependency) +- Streaming jq output (one value per line) is detected and compressed separately from single-document output diff --git a/docs/processors/ssh.md b/docs/processors/ssh.md new file mode 100644 index 0000000..f1bcf48 --- /dev/null +++ b/docs/processors/ssh.md @@ -0,0 +1,29 @@ +# SSH Processor + +**File:** `src/processors/ssh.py` | **Priority:** 43 | **Name:** `ssh` + +Handles non-interactive SSH and SCP command output. + +## Supported Commands + +- `ssh host 'command'` or `ssh host "command"` (non-interactive SSH with quoted remote command) +- `ssh -o Option=value host 'command'` (with SSH options) +- `scp` (all forms — always non-interactive) + +## Not Supported + +- `ssh host` (interactive SSH — no remote command) remains excluded from compression + +## Strategy + +| Command | Strategy | +|---|---| +| **SSH remote** | Apply log-style compression: keep first 10 + last 20 lines, preserve error lines with context in the middle section | +| **SCP** | Collapse progress bar lines (containing `%` and transfer rates) to final status per file. Keep error lines (permission denied, connection refused, etc.) | + +## How It Works + +The SSH/SCP exclusion in `hook_pretool.py` was narrowed from a blanket `ssh|scp` exclusion to only exclude interactive SSH (no quoted command). This allows: +- `ssh host 'ls -la'` — compressed (non-interactive) +- `scp file host:/path` — compressed (always non-interactive) +- `ssh host` — still excluded (interactive) diff --git a/scripts/hook_pretool.py b/scripts/hook_pretool.py index c7113f6..348f5a6 100644 --- a/scripts/hook_pretool.py +++ b/scripts/hook_pretool.py @@ -86,7 +86,7 @@ def _load_compressible_patterns() -> list[str]: EXCLUDED_PATTERNS = [ r"(? list[str]: r"<\(", # process substitution r"^\s*sudo\b", r"^\s*(vi|vim|nano|emacs|code)\b", - r"^\s*(ssh|scp)\b", + r"^\s*ssh\s+(?:-\S+\s+)*\S+\s*$", # interactive ssh only r"^\s*rsync\b.*\S+:\S+", # only exclude remote rsync (host:path) r"^\s*env\s+\S+=", r"(?:^|\s)token[-_]saver\s", diff --git a/src/config.py b/src/config.py index 289ac9d..633adb9 100644 --- a/src/config.py +++ b/src/config.py @@ -48,6 +48,9 @@ "db_prune_days": 90, "chars_per_token": 4, "user_processors_dir": "", + "cargo_warning_example_count": 2, + "cargo_warning_group_threshold": 3, + "jq_passthrough_threshold": 50, "debug": False, } diff --git a/src/engine.py b/src/engine.py index a951f48..abd8a79 100644 --- a/src/engine.py +++ b/src/engine.py @@ -20,10 +20,12 @@ class CompressionEngine: processors: list[Processor] _generic: Processor + _by_name: dict[str, Processor] def __init__(self) -> None: self.processors = discover_processors() self._generic = self.processors[-1] # Last = GenericProcessor (priority 999) + self._by_name = {p.name: p for p in self.processors} def compress(self, command: str, output: str) -> tuple[str, str, bool]: """Compress output for a given command. @@ -49,6 +51,17 @@ def compress(self, command: str, output: str) -> tuple[str, str, bool]: if compressed is output or compressed == output: return output, processor.name, False + # Chain to secondary processor if declared (max depth = 1) + if ( + processor.chain_to + and processor.chain_to != processor.name + and processor.chain_to in self._by_name + ): + secondary = self._by_name[processor.chain_to] + chained = secondary.process(command, compressed) + if chained is not compressed and chained != compressed: + compressed = chained + # If a specialized processor handled it, also run generic # cleanup (ANSI strip, blank line collapse) but not truncation if processor is not self._generic: diff --git a/src/processors/base.py b/src/processors/base.py index 9101fc3..3d84861 100644 --- a/src/processors/base.py +++ b/src/processors/base.py @@ -19,6 +19,7 @@ class Processor(ABC): priority: int = 50 hook_patterns: list[str] = [] + chain_to: str | None = None @abstractmethod def can_handle(self, command: str) -> bool: diff --git a/src/processors/build_output.py b/src/processors/build_output.py index d2ea7bc..be0fbb9 100644 --- a/src/processors/build_output.py +++ b/src/processors/build_output.py @@ -28,6 +28,9 @@ def can_handle(self, command: str) -> bool: # Exclude cargo clippy (handled by LintOutputProcessor) if re.search(r"\bcargo\s+clippy\b", command): return False + # Exclude cargo build/check (handled by CargoProcessor) + if re.search(r"\bcargo\s+(build|check)\b", command): + return False return bool( re.search( r"\b(npm\s+(run|install|ci|build|audit)|yarn\s+(run|install|build|add|audit)|pnpm\s+(run|install|build|add|audit)|" diff --git a/src/processors/cargo.py b/src/processors/cargo.py new file mode 100644 index 0000000..f40d52f --- /dev/null +++ b/src/processors/cargo.py @@ -0,0 +1,292 @@ +"""Cargo processor: cargo build, check, doc, update, bench.""" + +import re +from collections import defaultdict + +from .. import config +from .base import Processor + +_CARGO_CMD_RE = re.compile(r"\bcargo\s+(build|check|doc|update|bench)\b") +_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") +_DOWNLOADING_RE = re.compile(r"^\s*Downloading\s+\S+\s+v") +_DOCUMENTING_RE = re.compile(r"^\s*Documenting\s+\S+\s+v") +_RUNNING_RE = re.compile(r"^\s*Running\s+") +_FINISHED_RE = re.compile(r"^\s*Finished\s+") +_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") +_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") +_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") +_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") +_UPDATE_LINE_RE = re.compile( + r"^\s*(Updating|Removing|Adding)\s+(\S+)\s+v([\d.]+)(?:\s*->\s*v([\d.]+))?" +) + + +class CargoProcessor(Processor): + priority = 22 + hook_patterns = [ + r"^cargo\s+(build|check|doc|update|bench)\b", + ] + + @property + def name(self) -> str: + return "cargo" + + def can_handle(self, command: str) -> bool: + if re.search(r"\bcargo\s+(test|clippy)\b", command): + return False + return bool(_CARGO_CMD_RE.search(command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + m = _CARGO_CMD_RE.search(command) + if not m: + return output + + subcmd = m.group(1) + if subcmd in ("build", "check"): + return self._process_cargo_build(output) + if subcmd == "doc": + return self._process_cargo_doc(output) + if subcmd == "update": + return self._process_cargo_update(output) + if subcmd == "bench": + return self._process_cargo_bench(output) + return output + + def _categorize_warning(self, msg: str) -> str: + if "unused variable" in msg: + return "unused_variable" + if "unused import" in msg: + return "unused_import" + if "dead_code" in msg or "never read" in msg or "never used" in msg: + return "dead_code" + if "does not need to be mutable" in msg: + return "unused_mut" + if "lifetime" in msg: + return "lifetime" + if "borrow" in msg: + return "borrow_checker" + m = re.search(r"\[(\w+(?:::\w+)*)\]", msg) + return m.group(1) if m else "other" + + def _process_cargo_build(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + compiling_count = 0 + downloading_count = 0 + + warnings_by_type: dict[str, list[list[str]]] = defaultdict(list) + current_block: list[str] = [] + current_type: str | None = None + in_error = False + error_blocks: list[list[str]] = [] + current_error: list[str] = [] + finished_lines: list[str] = [] + warning_summary_lines: list[str] = [] + + for line in lines: + stripped = line.strip() + + if _COMPILING_RE.match(stripped): + compiling_count += 1 + continue + if _DOWNLOADING_RE.match(stripped): + downloading_count += 1 + continue + + # Error start + if _ERROR_START_RE.match(stripped): + # Flush current warning block + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + current_block = [] + current_type = None + # Start error block + if in_error and current_error: + error_blocks.append(current_error) + in_error = True + current_error = [line] + continue + + # Warning start + wm = _WARNING_START_RE.match(stripped) + if wm and not _WARNING_SUMMARY_RE.match(stripped): + # Flush previous + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + + rule = wm.group(1) or "" + msg = wm.group(2) + current_type = self._categorize_warning(rule + " " + msg) + current_block = [line] + continue + + if _WARNING_SUMMARY_RE.match(stripped): + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + current_block = [] + current_type = None + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + warning_summary_lines.append(line) + continue + + if _FINISHED_RE.match(stripped): + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + current_block = [] + current_type = None + if in_error and current_error: + error_blocks.append(current_error) + in_error = False + current_error = [] + finished_lines.append(line) + continue + + # Context lines (spans, code, etc.) + if in_error: + current_error.append(line) + elif current_type: + current_block.append(line) + + # Flush remaining + if in_error and current_error: + error_blocks.append(current_error) + if current_type and current_block: + warnings_by_type[current_type].append(current_block) + + # Build compressed output + if downloading_count > 0: + result.append(f"[{downloading_count} crates downloaded]") + if compiling_count > 0: + result.append(f"[{compiling_count} crates compiled]") + + # All errors (kept in full) + for block in error_blocks: + result.extend(block) + + # Grouped warnings + example_count = config.get("cargo_warning_example_count") + group_threshold = config.get("cargo_warning_group_threshold") + for wtype, blocks in sorted(warnings_by_type.items(), key=lambda x: -len(x[1])): + count = len(blocks) + if count >= group_threshold: + result.append(f"warning: {wtype} ({count} occurrences)") + for block in blocks[:example_count]: + result.extend(f" {line}" for line in block) + if count > example_count: + result.append(f" ... ({count - example_count} more)") + else: + for block in blocks: + result.extend(block) + + result.extend(warning_summary_lines) + result.extend(finished_lines) + + return "\n".join(result) if result else output + + def _process_cargo_doc(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + compiling_count = 0 + documenting_count = 0 + + for line in lines: + stripped = line.strip() + if _COMPILING_RE.match(stripped): + compiling_count += 1 + elif _DOCUMENTING_RE.match(stripped): + documenting_count += 1 + elif ( + _FINISHED_RE.match(stripped) + or re.match(r"^\s*Generated\s+", stripped) + or re.search(r"\bwarning\b", stripped) + or _ERROR_START_RE.match(stripped) + or (_SPAN_LINE_RE.match(stripped) and result) + ): + result.append(line) + + summary_parts = [] + if compiling_count > 0: + summary_parts.append(f"{compiling_count} compiled") + if documenting_count > 0: + summary_parts.append(f"{documenting_count} documented") + if summary_parts: + result.insert(0, f"[{', '.join(summary_parts)}]") + + return "\n".join(result) if result else output + + def _process_cargo_update(self, output: str) -> str: + lines = output.splitlines() + updates: list[str] = [] + major_bumps: list[str] = [] + additions: list[str] = [] + removals: list[str] = [] + + for line in lines: + m = _UPDATE_LINE_RE.match(line.strip()) + if m: + action, pkg, old_ver, new_ver = m.groups() + if action == "Adding": + additions.append(f" + {pkg} v{old_ver}") + elif action == "Removing": + removals.append(f" - {pkg} v{old_ver}") + elif action == "Updating" and new_ver: + old_major = old_ver.split(".")[0] + new_major = new_ver.split(".")[0] + if old_major != new_major: + major_bumps.append(f" {pkg}: v{old_ver} -> v{new_ver} (MAJOR)") + else: + updates.append(pkg) + + result = [] + total = len(updates) + len(major_bumps) + result.append(f"[{total} dependencies updated]") + + if major_bumps: + result.append("Major version bumps:") + result.extend(major_bumps) + + if updates: + result.append(f"Minor/patch updates: {len(updates)} packages") + + if additions: + result.append("Added:") + result.extend(additions) + if removals: + result.append("Removed:") + result.extend(removals) + + return "\n".join(result) if result else output + + def _process_cargo_bench(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + compiling_count = 0 + + for line in lines: + stripped = line.strip() + if _COMPILING_RE.match(stripped): + compiling_count += 1 + elif _RUNNING_RE.match(stripped): + continue + elif ( + re.match(r"^test\s+.+\s+bench:", stripped) + or re.match(r"^test result:", stripped) + or _FINISHED_RE.match(stripped) + or _ERROR_START_RE.match(stripped) + ): + result.append(line) + + if compiling_count > 0: + result.insert(0, f"[{compiling_count} crates compiled]") + + return "\n".join(result) if result else output diff --git a/src/processors/go.py b/src/processors/go.py new file mode 100644 index 0000000..07cb2be --- /dev/null +++ b/src/processors/go.py @@ -0,0 +1,179 @@ +"""Go processor: go build, vet, mod, generate, install.""" + +import re +from collections import defaultdict + +from .. import config +from .base import Processor + +_GO_CMD_RE = re.compile(r"\bgo\s+(build|vet|mod|generate|install)\b") +_GO_ERROR_RE = re.compile(r"^(\S+\.go):(\d+):(\d+):\s+(.+)$") +_GO_PACKAGE_RE = re.compile(r"^#\s+(\S+)") +_GO_DOWNLOADING_RE = re.compile(r"^go:\s+downloading\s+(\S+)\s+v") +_GO_MOD_ACTION_RE = re.compile(r"^go:\s+(added|upgraded|downgraded|removed)\s+") +_GO_GENERATE_RUN_RE = re.compile(r"^(\S+\.go):\d+:\s+running\s+") + + +class GoProcessor(Processor): + priority = 23 + hook_patterns = [ + r"^go\s+(build|vet|mod|generate|install)\b", + ] + + @property + def name(self) -> str: + return "go" + + def can_handle(self, command: str) -> bool: + if re.search(r"\bgo\s+test\b", command): + return False + if re.search(r"\bgolangci-lint\b", command): + return False + return bool(_GO_CMD_RE.search(command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + m = _GO_CMD_RE.search(command) + if not m: + return output + + subcmd = m.group(1) + if subcmd == "build": + return self._process_go_build(output) + if subcmd == "install": + return self._process_go_build(output) + if subcmd == "vet": + return self._process_go_vet(output) + if subcmd == "mod": + return self._process_go_mod(output) + if subcmd == "generate": + return self._process_go_generate(output) + return output + + def _process_go_build(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + package_lines: list[str] = [] + error_lines: list[str] = [] + has_errors = False + + for line in lines: + stripped = line.strip() + + if _GO_ERROR_RE.match(stripped): + has_errors = True + error_lines.append(line) + elif _GO_PACKAGE_RE.match(stripped): + package_lines.append(line) + elif stripped and has_errors: + # Context lines after an error (e.g., code snippet, notes) + error_lines.append(line) + + if not has_errors: + # No errors but output exists — could be warnings or linker errors + return output + + # For multi-package builds, keep package headers + if len(package_lines) > 1: + result.extend(package_lines[:3]) + if len(package_lines) > 3: + result.append(f"... ({len(package_lines) - 3} more packages)") + + result.extend(error_lines) + return "\n".join(result) if result else output + + def _categorize_vet_warning(self, msg: str) -> str: + msg_lower = msg.lower() + if "printf" in msg_lower: + return "printf" + if "unreachable" in msg_lower: + return "unreachable" + if "shadow" in msg_lower: + return "shadow" + if "unused" in msg_lower: + return "unused" + if "nil" in msg_lower: + return "nil" + if "loop" in msg_lower: + return "loop" + return "other" + + def _process_go_vet(self, output: str) -> str: + lines = output.splitlines() + warnings_by_type: dict[str, list[str]] = defaultdict(list) + package_lines: list[str] = [] + + for line in lines: + stripped = line.strip() + m = _GO_ERROR_RE.match(stripped) + if m: + msg = m.group(4) + wtype = self._categorize_vet_warning(msg) + warnings_by_type[wtype].append(line) + elif _GO_PACKAGE_RE.match(stripped): + package_lines.append(line) + + if not warnings_by_type: + return output + + example_count = config.get("lint_example_count") + group_threshold = config.get("lint_group_threshold") + result: list[str] = [] + + if package_lines: + result.extend(package_lines[:2]) + + for wtype, warnings in sorted(warnings_by_type.items(), key=lambda x: -len(x[1])): + count = len(warnings) + if count >= group_threshold: + result.append(f"{wtype}: {count} warnings") + for w in warnings[:example_count]: + result.append(f" {w}") + if count > example_count: + result.append(f" ... ({count - example_count} more)") + else: + result.extend(warnings) + + return "\n".join(result) if result else output + + def _process_go_mod(self, output: str) -> str: + lines = output.splitlines() + download_count = 0 + action_lines: list[str] = [] + other_lines: list[str] = [] + + for line in lines: + stripped = line.strip() + if _GO_DOWNLOADING_RE.match(stripped): + download_count += 1 + elif _GO_MOD_ACTION_RE.match(stripped): + action_lines.append(line) + elif stripped: + other_lines.append(line) + + result: list[str] = [] + if download_count > 0: + result.append(f"[{download_count} packages downloaded]") + result.extend(action_lines) + result.extend(other_lines) + + return "\n".join(result) if result else output + + def _process_go_generate(self, output: str) -> str: + lines = output.splitlines() + generate_count = 0 + result: list[str] = [] + + for line in lines: + stripped = line.strip() + if _GO_GENERATE_RUN_RE.match(stripped): + generate_count += 1 + elif stripped: + result.append(line) + + if generate_count > 0: + result.insert(0, f"[{generate_count} generators ran]") + + return "\n".join(result) if result else output diff --git a/src/processors/jq_yq.py b/src/processors/jq_yq.py new file mode 100644 index 0000000..e1d3976 --- /dev/null +++ b/src/processors/jq_yq.py @@ -0,0 +1,154 @@ +"""JQ/YQ processor: compress large JSON and YAML outputs.""" + +import json +import re + +from .. import config +from .base import Processor +from .utils import compress_json_value + +_JQ_RE = re.compile(r"\bjq\b") +_YQ_RE = re.compile(r"\byq\b") + + +class JqYqProcessor(Processor): + priority = 44 + hook_patterns = [ + r"^(jq|yq)\b", + ] + + @property + def name(self) -> str: + return "jq_yq" + + def can_handle(self, command: str) -> bool: + return bool(re.search(r"\b(jq|yq)\b", command)) + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + lines = output.splitlines() + threshold = config.get("jq_passthrough_threshold") + if len(lines) <= threshold: + return output + + if _JQ_RE.search(command): + return self._process_jq(output, lines) + return self._process_yq(output, lines) + + def _process_jq(self, output: str, lines: list[str]) -> str: + # Try parsing as a single JSON document + try: + data = json.loads(output.strip()) + compressed = compress_json_value(data, max_depth=4) + result = json.dumps(compressed, indent=2) + if len(result) < len(output): + return result + f"\n({len(lines)} lines compressed)" + return output + except (json.JSONDecodeError, ValueError): + pass + + # Streaming mode: one JSON value per line + return self._process_streaming_json(lines) + + @staticmethod + def _parse_json_keys(line: str) -> str | None: + try: + obj = json.loads(line.strip()) + except (json.JSONDecodeError, ValueError): + return None + if isinstance(obj, dict): + return ",".join(sorted(obj.keys())) + return None + + def _process_streaming_json(self, lines: list[str]) -> str: + structures: list[str] = [] + for line in lines[:5]: + keys = self._parse_json_keys(line) + if keys is None: + break + structures.append(keys) + + # If all parsed lines have the same keys, it's a repeated structure + if len(structures) >= 3 and len(set(structures)) == 1: + result = list(lines[:3]) + result.append(f"... ({len(lines) - 3} more items with same structure)") + return "\n".join(result) + + keep_head = 20 + keep_tail = 10 + if len(lines) <= keep_head + keep_tail: + return "\n".join(lines) + + result = lines[:keep_head] + result.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") + result.extend(lines[-keep_tail:]) + return "\n".join(result) + + def _process_yq(self, output: str, lines: list[str]) -> str: + # Count top-level keys and list items + top_level_keys = 0 + list_items = 0 + for line in lines: + if line and not line[0].isspace() and line.rstrip().endswith(":"): + top_level_keys += 1 + elif re.match(r"^- ", line) or re.match(r"^ - ", line): + list_items += 1 + + # Collapse large arrays (lines starting with "- " at consistent indent) + result: list[str] = [] + array_count = 0 + array_indent: int | None = None + + for line in lines: + m = re.match(r"^(\s*)- ", line) + if m: + indent = len(m.group(1)) + if array_indent is None: + array_indent = indent + array_count = 1 + result.append(line) + elif indent == array_indent: + array_count += 1 + if array_count <= 3: + result.append(line) + elif array_count == 4: + result.append(f"{' ' * indent} ... ({array_count} items so far)") + elif array_count <= 3: + result.append(line) + else: + # Non-array line — flush array count if needed + if array_count > 3: + # Update the "so far" placeholder with final count + for j in range(len(result) - 1, -1, -1): + if "items so far" in result[j] or "items total" in result[j]: + indent_str = " " * (array_indent or 0) + result[j] = f"{indent_str} ... ({array_count} items total)" + break + array_count = 0 + array_indent = None + result.append(line) + + # Final flush + if array_count > 3: + for j in range(len(result) - 1, -1, -1): + if "items so far" in result[j] or "items total" in result[j]: + indent_str = " " * (array_indent or 0) + result[j] = f"{indent_str} ... ({array_count} items total)" + break + + compressed = "\n".join(result) + if len(compressed) < len(output): + summary = f"--- ({len(lines)} lines" + if top_level_keys > 0: + summary += f", {top_level_keys} top-level keys" + summary += ") ---" + return summary + "\n" + compressed + + keep_head = 20 + keep_tail = 10 + result_lines = lines[:keep_head] + result_lines.append(f"\n... ({len(lines) - keep_head - keep_tail} lines truncated) ...\n") + result_lines.extend(lines[-keep_tail:]) + return "\n".join(result_lines) diff --git a/src/processors/ssh.py b/src/processors/ssh.py new file mode 100644 index 0000000..65726a0 --- /dev/null +++ b/src/processors/ssh.py @@ -0,0 +1,65 @@ +"""SSH processor: non-interactive SSH and SCP commands.""" + +import re + +from .base import Processor +from .utils import compress_log_lines + +_SSH_NON_INTERACTIVE_RE = re.compile(r"""\bssh\s+.+\s+['"]""") +_SCP_RE = re.compile(r"\bscp\b") +_SCP_PROGRESS_RE = re.compile(r"^\s*\S+\s+\d+%") + + +class SshProcessor(Processor): + priority = 43 + hook_patterns = [ + r"^ssh\s+.+\s+['\"]", + r"^scp\b", + ] + + @property + def name(self) -> str: + return "ssh" + + def can_handle(self, command: str) -> bool: + if _SCP_RE.search(command): + return True + if re.search(r"\bssh\b", command): + return bool(_SSH_NON_INTERACTIVE_RE.search(command)) + return False + + def process(self, command: str, output: str) -> str: + if not output or not output.strip(): + return output + + if _SCP_RE.search(command): + return self._process_scp(output) + return self._process_ssh_remote(output) + + def _process_ssh_remote(self, output: str) -> str: + lines = output.splitlines() + return compress_log_lines(lines, keep_head=10, keep_tail=20) + + def _process_scp(self, output: str) -> str: + lines = output.splitlines() + result: list[str] = [] + last_progress: str | None = None + + for line in lines: + stripped = line.strip() + + if _SCP_PROGRESS_RE.match(stripped): + last_progress = line + elif re.search(r"\b(error|Error|ERROR|denied|refused|No such)\b", stripped): + result.append(line) + elif stripped and not _SCP_PROGRESS_RE.match(stripped): + if last_progress: + result.append(last_progress) + last_progress = None + result.append(line) + + # Flush final progress line + if last_progress: + result.append(last_progress) + + return "\n".join(result) if result else output diff --git a/tests/test_engine.py b/tests/test_engine.py index 183026a..765a2b1 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -210,9 +210,9 @@ class TestProcessorRegistry: """Tests for auto-discovery and the processor registry.""" def test_discover_processors_finds_all(self): - """Auto-discovery should find all 21 processors.""" + """Auto-discovery should find all 25 processors.""" processors = discover_processors() - assert len(processors) == 21 + assert len(processors) == 25 def test_discover_processors_sorted_by_priority(self): """Processors must be returned in ascending priority order.""" @@ -245,6 +245,8 @@ def test_expected_priority_order(self): assert name_to_priority["package_list"] == 15 assert name_to_priority["git"] == 20 assert name_to_priority["test"] == 21 + assert name_to_priority["cargo"] == 22 + assert name_to_priority["go"] == 23 assert name_to_priority["build"] == 25 assert name_to_priority["lint"] == 27 assert name_to_priority["network"] == 30 @@ -260,6 +262,8 @@ def test_expected_priority_order(self): assert name_to_priority["ansible"] == 40 assert name_to_priority["helm"] == 41 assert name_to_priority["syslog"] == 42 + assert name_to_priority["ssh"] == 43 + assert name_to_priority["jq_yq"] == 44 assert name_to_priority["file_listing"] == 50 assert name_to_priority["file_content"] == 51 assert name_to_priority["generic"] == 999 @@ -398,6 +402,21 @@ def test_collect_hook_patterns_covers_key_commands(self): # Syslog "journalctl -u nginx", "dmesg", + # Cargo (dedicated processor) + "cargo doc", + "cargo update", + "cargo bench", + # Go (dedicated processor) + "go build ./...", + "go vet ./...", + "go mod tidy", + "go generate ./...", + "go install ./cmd/...", + # JQ/YQ + "jq . file.json", + "yq . config.yaml", + "ssh host 'ls -la'", + "scp file.txt host:/tmp/", ] for cmd in test_commands: @@ -412,3 +431,22 @@ def test_engine_uses_discovered_processors(self): for ep, dp in zip(engine.processors, discovered, strict=False): assert ep.name == dp.name assert ep.priority == dp.priority + + +class TestProcessorChaining: + """Tests for multi-processor chaining infrastructure.""" + + def setup_method(self): + self.engine = CompressionEngine() + + def test_chain_to_attribute_default_none(self): + for p in self.engine.processors: + assert p.chain_to is None + + def test_processor_by_name_lookup(self): + assert "git" in self.engine._by_name + assert "build" in self.engine._by_name + assert "cargo" in self.engine._by_name + assert "go" in self.engine._by_name + assert "ssh" in self.engine._by_name + assert "jq_yq" in self.engine._by_name diff --git a/tests/test_hooks.py b/tests/test_hooks.py index bc4b18a..dc070b8 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -511,3 +511,70 @@ def test_single_segment_after_split(self): def test_three_segment_chain(self): assert is_compressible("cd /a && cd /b && git status") assert is_compressible("touch f && chmod 644 f && ls -la f") + + def test_cd_then_go_build(self): + assert is_compressible("cd /project && go build ./...") + + def test_cd_then_cargo_bench(self): + assert is_compressible("cd /project && cargo bench") + + +class TestNewProcessorHookPatterns: + """Tests for hook patterns of newly added processors.""" + + # --- Cargo --- + def test_cargo_doc_compressible(self): + assert is_compressible("cargo doc") + assert is_compressible("cargo doc --open") + + def test_cargo_update_compressible(self): + assert is_compressible("cargo update") + + def test_cargo_bench_compressible(self): + assert is_compressible("cargo bench") + + def test_cargo_build_still_compressible(self): + assert is_compressible("cargo build") + assert is_compressible("cargo build --release") + assert is_compressible("cargo check") + + # --- Go --- + def test_go_build_compressible(self): + assert is_compressible("go build ./...") + assert is_compressible("go build -o myapp ./cmd/server") + + def test_go_vet_compressible(self): + assert is_compressible("go vet ./...") + + def test_go_mod_compressible(self): + assert is_compressible("go mod tidy") + assert is_compressible("go mod download") + + def test_go_generate_compressible(self): + assert is_compressible("go generate ./...") + + def test_go_install_compressible(self): + assert is_compressible("go install ./cmd/...") + + # --- SSH non-interactive --- + def test_ssh_non_interactive_compressible(self): + assert is_compressible("ssh host 'ls -la'") + assert is_compressible('ssh host "uname -a"') + assert is_compressible("ssh -o StrictHostKeyChecking=no host 'uptime'") + + def test_ssh_interactive_still_excluded(self): + assert not is_compressible("ssh host") + assert not is_compressible("ssh -p 22 host") + + def test_scp_compressible(self): + assert is_compressible("scp file.txt host:/tmp/") + assert is_compressible("scp -r dir/ user@host:/path/") + + # --- JQ/YQ --- + def test_jq_compressible(self): + assert is_compressible("jq . file.json") + assert is_compressible("jq '.items[]' data.json") + + def test_yq_compressible(self): + assert is_compressible("yq . config.yaml") + assert is_compressible("yq eval '.spec' deployment.yaml") diff --git a/tests/test_precision.py b/tests/test_precision.py index e649e66..88b8362 100644 --- a/tests/test_precision.py +++ b/tests/test_precision.py @@ -1024,3 +1024,125 @@ def test_poetry_lock_all_packages_listed(self): assert "40 packages" in compressed assert "lib-0" in compressed assert "lib-39" in compressed + + +class TestCargoPrecision: + def setup_method(self): + self.engine = CompressionEngine() + + def test_cargo_build_preserves_all_errors_with_spans(self): + lines = [f" Compiling dep-{i} v1.0.{i}" for i in range(100)] + lines.extend( + [ + "error[E0308]: mismatched types", + " --> src/main.rs:10:5", + " |", + '10 | let x: i32 = "hello";', + " | ^^^^^^^ expected i32, found &str", + "", + "error[E0425]: cannot find value `y`", + " --> src/lib.rs:20:10", + " |", + "20 | y + 1", + " | ^ not found in this scope", + ] + ) + output = "\n".join(lines) + compressed, proc, was_compressed = self.engine.compress("cargo build", output) + assert was_compressed + assert proc == "cargo" + assert "mismatched types" in compressed + assert "src/main.rs:10:5" in compressed + assert "expected i32" in compressed + assert "cannot find value" in compressed + assert "src/lib.rs:20:10" in compressed + assert "Compiling dep-" not in compressed + + def test_cargo_build_preserves_warning_types(self): + warnings = [] + for i in range(10): + warnings.extend( + [ + f"warning: unused variable: `var{i}`", + f" --> src/file{i}.rs:{i + 1}:5", + "", + ] + ) + for i in range(5): + warnings.extend( + [ + f"warning: unused import: `mod{i}`", + f" --> src/lib.rs:{i + 10}:5", + "", + ] + ) + warnings.append("warning: `myapp` (lib) generated 15 warnings") + warnings.append(" Finished dev [unoptimized + debuginfo] target(s)") + output = "\n".join(warnings) + compressed, proc, was_compressed = self.engine.compress("cargo build", output) + assert was_compressed + assert proc == "cargo" + assert "unused_variable" in compressed + assert "unused_import" in compressed + assert "Finished" in compressed + + +class TestGoPrecision: + def setup_method(self): + self.engine = CompressionEngine() + + def test_go_build_preserves_all_errors(self): + # Need enough package headers to trigger compression (multi-package build) + lines = [f"# myapp/pkg/module{i}" for i in range(10)] + lines.extend( + [ + "# myapp/pkg/handler", + "pkg/handler/main.go:15:2: undefined: DoSomething", + "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", + "# myapp/pkg/db", + 'pkg/db/conn.go:5:3: imported and not used: "fmt"', + ] + ) + output = "\n".join(lines) + compressed, proc, was_compressed = self.engine.compress("go build ./...", output) + assert was_compressed + assert proc == "go" + assert "undefined: DoSomething" in compressed + assert "main.go:15:2" in compressed + assert "main.go:20:10" in compressed + assert "conn.go:5:3" in compressed + + def test_go_mod_tidy_preserves_additions(self): + lines = [f"go: downloading github.com/pkg/dep{i} v1.0.{i}" for i in range(50)] + lines.append("go: added github.com/new/important v1.0.0") + lines.append("go: removed github.com/old/unused v0.5.0") + output = "\n".join(lines) + compressed, proc, was_compressed = self.engine.compress("go mod tidy", output) + assert was_compressed + assert proc == "go" + assert "added" in compressed + assert "removed" in compressed + assert "50 packages downloaded" in compressed + + +class TestJqPrecision: + def setup_method(self): + self.engine = CompressionEngine() + + def test_jq_preserves_top_level_structure(self): + import json + + data = { + "users": [ + {"id": i, "name": f"user-{i}", "email": f"user{i}@test.com"} for i in range(50) + ], + "metadata": {"total": 50, "page": 1}, + "status": "ok", + } + output = json.dumps(data, indent=2) + compressed, proc, was_compressed = self.engine.compress("jq . data.json", output) + assert was_compressed + assert proc == "jq_yq" + assert "users" in compressed + assert "metadata" in compressed + assert "status" in compressed diff --git a/tests/test_processors.py b/tests/test_processors.py index 89fc10a..90dfc9b 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -8,6 +8,7 @@ from src.chain_utils import extract_primary_command from src.processors.ansible import AnsibleProcessor from src.processors.build_output import BuildOutputProcessor +from src.processors.cargo import CargoProcessor from src.processors.cloud_cli import CloudCliProcessor from src.processors.db_query import DbQueryProcessor from src.processors.docker import DockerProcessor @@ -17,12 +18,15 @@ from src.processors.generic import GenericProcessor from src.processors.gh import GhProcessor from src.processors.git import GitProcessor +from src.processors.go import GoProcessor from src.processors.helm import HelmProcessor +from src.processors.jq_yq import JqYqProcessor from src.processors.kubectl import KubectlProcessor from src.processors.lint_output import LintOutputProcessor from src.processors.network import NetworkProcessor from src.processors.package_list import PackageListProcessor from src.processors.search import SearchProcessor +from src.processors.ssh import SshProcessor from src.processors.syslog import SyslogProcessor from src.processors.system_info import SystemInfoProcessor from src.processors.terraform import TerraformProcessor @@ -678,7 +682,7 @@ def setup_method(self): def test_can_handle_build_commands(self): assert self.p.can_handle("npm run build") - assert self.p.can_handle("cargo build") + assert not self.p.can_handle("cargo build") # handled by CargoProcessor assert self.p.can_handle("make") assert self.p.can_handle("pip install -r requirements.txt") assert self.p.can_handle("yarn add lodash") @@ -3496,3 +3500,411 @@ def test_dmesg_no_errors_truncated(self): # Tail preserved assert "Normal kernel message 199" in result assert len(result) < len(output) + + +# --- Cargo Processor --- + + +class TestCargoProcessor: + def setup_method(self): + self.p = CargoProcessor() + + def test_can_handle_cargo_commands(self): + assert self.p.can_handle("cargo build") + assert self.p.can_handle("cargo check") + assert self.p.can_handle("cargo build --release") + assert self.p.can_handle("cargo doc") + assert self.p.can_handle("cargo doc --open") + assert self.p.can_handle("cargo update") + assert self.p.can_handle("cargo bench") + + def test_cannot_handle_cargo_test(self): + assert not self.p.can_handle("cargo test") + + def test_cannot_handle_cargo_clippy(self): + assert not self.p.can_handle("cargo clippy") + + def test_cannot_handle_non_cargo(self): + assert not self.p.can_handle("npm run build") + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("cargo build", "") == "" + + def test_cargo_build_collapses_compiling(self): + lines = [f" Compiling dep-{i} v1.{i}.0" for i in range(50)] + lines.append(" Finished dev [unoptimized + debuginfo] target(s) in 12.34s") + output = "\n".join(lines) + result = self.p.process("cargo build", output) + assert "50 crates compiled" in result + assert "Finished" in result + assert "Compiling dep-" not in result + + def test_cargo_build_preserves_all_errors(self): + output = "\n".join( + [ + " Compiling myapp v0.1.0", + "error[E0308]: mismatched types", + " --> src/main.rs:10:5", + " |", + '10 | let x: i32 = "hello";', + " | ^^^^^^^ expected i32, found &str", + "", + "error: aborting due to previous error", + ] + ) + result = self.p.process("cargo build", output) + assert "mismatched types" in result + assert "src/main.rs:10:5" in result + assert "expected i32" in result + assert "aborting" in result + + def test_cargo_build_groups_warnings_by_type(self): + warnings = [] + for i in range(6): + warnings.extend( + [ + f"warning: unused variable: `x{i}`", + f" --> src/file{i}.rs:{i + 1}:5", + " |", + f"{i + 1} | let x{i} = 42;", + " | ^^ help: if this is intentional, prefix it with an underscore", + "", + ] + ) + warnings.append("warning: `myapp` (lib) generated 6 warnings") + warnings.append(" Finished dev [unoptimized + debuginfo] target(s)") + output = "\n".join(warnings) + result = self.p.process("cargo build", output) + assert "unused_variable" in result + assert "6 occurrences" in result + assert "Finished" in result + + def test_cargo_build_keeps_finished_line(self): + output = "\n".join( + [ + " Compiling myapp v0.1.0", + " Finished dev [unoptimized + debuginfo] target(s) in 2.34s", + ] + ) + result = self.p.process("cargo build", output) + assert "Finished" in result + + def test_cargo_build_mixed_errors_and_warnings(self): + output = "\n".join( + [ + " Compiling myapp v0.1.0", + "warning: unused import: `std::io`", + " --> src/main.rs:1:5", + "", + "error[E0425]: cannot find value `x`", + " --> src/main.rs:5:5", + " |", + '5 | println!("{}", x);', + " | ^ not found", + "", + "error: aborting due to previous error", + ] + ) + result = self.p.process("cargo build", output) + assert "cannot find value" in result + assert "src/main.rs:5:5" in result + + def test_cargo_doc_collapses_documenting(self): + lines = [f" Documenting dep-{i} v1.{i}.0" for i in range(20)] + lines.append(" Finished `doc` profile [unoptimized]") + lines.append(" Generated /target/doc/myapp/index.html") + output = "\n".join(lines) + result = self.p.process("cargo doc", output) + assert "20 documented" in result + assert "Finished" in result + assert "Generated" in result + assert "Documenting dep-" not in result + + def test_cargo_update_shows_major_bumps(self): + output = "\n".join( + [ + " Updating serde v1.0.0 -> v2.0.0", + " Updating tokio v1.28.0 -> v1.29.0", + " Updating rand v0.8.0 -> v0.8.1", + " Adding new-dep v0.1.0", + " Removing old-dep v0.5.0", + ] + ) + result = self.p.process("cargo update", output) + assert "MAJOR" in result + assert "serde" in result + assert "new-dep" in result + assert "old-dep" in result + + def test_cargo_update_collapses_patch_bumps(self): + lines = [f" Updating dep-{i} v1.0.{i} -> v1.0.{i + 1}" for i in range(20)] + output = "\n".join(lines) + result = self.p.process("cargo update", output) + assert "20 dependencies updated" in result + assert "Minor/patch updates: 20" in result + + def test_cargo_bench_keeps_results(self): + output = "\n".join( + [ + " Compiling myapp v0.1.0", + " Compiling myapp-bench v0.1.0", + " Running benches/bench.rs", + "test bench_add ... bench: 10 ns/iter (+/- 2)", + "test bench_multiply ... bench: 25 ns/iter (+/- 5)", + "test result: ok. 2 passed; 0 failed; 0 ignored; 2 measured", + ] + ) + result = self.p.process("cargo bench", output) + assert "bench_add" in result + assert "bench_multiply" in result + assert "10 ns/iter" in result + assert "test result:" in result + assert "2 crates compiled" in result + assert "Running" not in result + + def test_cargo_build_downloading_and_compiling(self): + lines = [f" Downloading dep-{i} v1.0.{i}" for i in range(10)] + lines.extend([f" Compiling dep-{i} v1.0.{i}" for i in range(10)]) + lines.append(" Finished dev [unoptimized] target(s) in 30.5s") + output = "\n".join(lines) + result = self.p.process("cargo build", output) + assert "10 crates downloaded" in result + assert "10 crates compiled" in result + assert "Finished" in result + + +# --- Go Processor --- + + +class TestGoProcessor: + def setup_method(self): + self.p = GoProcessor() + + def test_can_handle_go_commands(self): + assert self.p.can_handle("go build ./...") + assert self.p.can_handle("go build -o myapp ./cmd/server") + assert self.p.can_handle("go vet ./...") + assert self.p.can_handle("go mod tidy") + assert self.p.can_handle("go mod download") + assert self.p.can_handle("go generate ./...") + assert self.p.can_handle("go install ./cmd/...") + + def test_cannot_handle_go_test(self): + assert not self.p.can_handle("go test ./...") + + def test_cannot_handle_golangci_lint(self): + assert not self.p.can_handle("golangci-lint run") + + def test_cannot_handle_non_go(self): + assert not self.p.can_handle("git status") + + def test_empty_output(self): + assert self.p.process("go build ./...", "") == "" + + def test_go_build_preserves_errors(self): + output = "\n".join( + [ + "# myapp/pkg/handler", + "pkg/handler/main.go:15:2: undefined: DoSomething", + "pkg/handler/main.go:20:10: cannot use x (variable of type string) as int", + ] + ) + result = self.p.process("go build ./...", output) + assert "undefined: DoSomething" in result + assert "main.go:15:2" in result + assert "main.go:20:10" in result + + def test_go_vet_groups_warnings(self): + warnings = [] + for i in range(6): + warnings.append(f"pkg/file{i}.go:{i + 1}:5: printf format %d has arg of wrong type") + for i in range(3): + warnings.append(f"pkg/util{i}.go:{i + 1}:3: unreachable code") + output = "\n".join(warnings) + result = self.p.process("go vet ./...", output) + assert "printf" in result + assert "6 warnings" in result + assert "unreachable" in result + + def test_go_mod_tidy_collapses_downloads(self): + lines = [f"go: downloading github.com/pkg/dep{i} v1.0.{i}" for i in range(30)] + lines.append("go: added github.com/new/pkg v0.5.0") + lines.append("go: removed github.com/old/pkg v1.0.0") + output = "\n".join(lines) + result = self.p.process("go mod tidy", output) + assert "30 packages downloaded" in result + assert "added" in result + assert "removed" in result + assert "downloading" not in result + + def test_go_mod_keeps_added_removed(self): + output = "\n".join( + [ + "go: downloading github.com/pkg/a v1.0.0", + "go: added github.com/pkg/a v1.0.0", + "go: upgraded github.com/pkg/b v1.0.0 => v1.1.0", + ] + ) + result = self.p.process("go mod tidy", output) + assert "added" in result + assert "upgraded" in result + + def test_go_generate_collapses_running(self): + lines = [f"main.go:{i}: running mockgen" for i in range(10)] + lines.append("Generated output.go") + output = "\n".join(lines) + result = self.p.process("go generate ./...", output) + assert "10 generators ran" in result + assert "Generated output.go" in result + + def test_go_generate_keeps_errors(self): + output = "\n".join( + [ + "main.go:5: running stringer", + "error: stringer: can't find type Foo", + ] + ) + result = self.p.process("go generate ./...", output) + assert "error" in result + assert "can't find type Foo" in result + + def test_go_install_delegates_to_build(self): + output = "\n".join( + [ + "# myapp/cmd/server", + "cmd/server/main.go:10:5: undefined: handler.New", + ] + ) + result = self.p.process("go install ./cmd/...", output) + assert "undefined: handler.New" in result + + +# --- SSH Processor --- + + +class TestSshProcessor: + def setup_method(self): + self.p = SshProcessor() + + def test_can_handle_non_interactive_ssh(self): + assert self.p.can_handle("ssh host 'ls -la'") + assert self.p.can_handle('ssh host "uname -a"') + assert self.p.can_handle("ssh -o StrictHostKeyChecking=no host 'uptime'") + + def test_cannot_handle_interactive_ssh(self): + assert not self.p.can_handle("ssh host") + assert not self.p.can_handle("ssh -p 22 host") + + def test_can_handle_scp(self): + assert self.p.can_handle("scp file.txt host:/tmp/") + assert self.p.can_handle("scp -r dir/ user@host:/path/") + + def test_empty_output(self): + assert self.p.process("ssh host 'ls'", "") == "" + + def test_ssh_large_output_compressed(self): + lines = [f"line {i}: some output data" for i in range(200)] + output = "\n".join(lines) + result = self.p.process("ssh host 'ls -la'", output) + assert len(result) < len(output) + # Head preserved + assert "line 0" in result + # Tail preserved + assert "line 199" in result + + def test_ssh_preserves_errors(self): + lines = [f"data line {i}" for i in range(50)] + lines.insert(25, "ERROR: connection failed at step 25") + output = "\n".join(lines) + result = self.p.process("ssh host 'run_job'", output) + assert "ERROR: connection failed" in result + + def test_scp_collapses_progress(self): + output = "\n".join( + [ + "file1.tar.gz 10% 24MB 12.3MB/s 00:02", + "file1.tar.gz 50% 120MB 12.3MB/s 00:10", + "file1.tar.gz 100% 245MB 12.3MB/s 00:20", + "file2.tar.gz 10% 10MB 5.0MB/s 00:01", + "file2.tar.gz 100% 100MB 5.0MB/s 00:20", + ] + ) + result = self.p.process("scp file1.tar.gz file2.tar.gz host:/tmp/", output) + assert len(result) < len(output) + + def test_scp_keeps_errors(self): + output = "\n".join( + [ + "file.txt 100% 1MB 1.0MB/s 00:01", + "scp: /remote/path: Permission denied", + ] + ) + result = self.p.process("scp file.txt host:/remote/path", output) + assert "Permission denied" in result + + +# --- JQ/YQ Processor --- + + +class TestJqYqProcessor: + def setup_method(self): + self.p = JqYqProcessor() + + def test_can_handle_jq(self): + assert self.p.can_handle("jq . file.json") + assert self.p.can_handle("jq '.items[]' data.json") + assert self.p.can_handle("jq -r .name file.json") + + def test_can_handle_yq(self): + assert self.p.can_handle("yq . config.yaml") + assert self.p.can_handle("yq eval '.spec' deployment.yaml") + + def test_cannot_handle_non_jq(self): + assert not self.p.can_handle("cat file.json") + assert not self.p.can_handle("grep pattern file") + + def test_empty_output(self): + assert self.p.process("jq . file.json", "") == "" + + def test_jq_small_output_passthrough(self): + data = {"key": "value", "count": 42} + import json + + output = json.dumps(data, indent=2) + result = self.p.process("jq . file.json", output) + assert result == output + + def test_jq_large_json_compressed(self): + import json + + data = [ + {"id": i, "name": f"item-{i}", "data": {"nested": "value" * 10}} for i in range(100) + ] + output = json.dumps(data, indent=2) + assert len(output.splitlines()) > 50 + result = self.p.process("jq . file.json", output) + assert len(result) < len(output) + + def test_jq_streaming_output(self): + import json + + lines = [json.dumps({"id": i, "name": f"item-{i}"}) for i in range(100)] + output = "\n".join(lines) + result = self.p.process("jq -c '.[]' file.json", output) + assert len(result) < len(output) + assert "same structure" in result + + def test_yq_small_output_passthrough(self): + output = "key: value\ncount: 42\n" + result = self.p.process("yq . config.yaml", output) + assert result == output + + def test_yq_large_output_summarized(self): + lines = ["root:"] + for i in range(80): + lines.append(f"- name: item-{i}") + lines.append(f" value: {i}") + output = "\n".join(lines) + result = self.p.process("yq . config.yaml", output) + assert len(result) < len(output)