diff --git a/plugin-hook-example/README.md b/plugin-hook-example/README.md new file mode 100644 index 0000000..4aa96a8 --- /dev/null +++ b/plugin-hook-example/README.md @@ -0,0 +1,113 @@ +# plugin-hook-example + +Reference plugin showing the `transform_llm_output` lifecycle hook. +Redacts SSN, Luhn-valid credit-card numbers, and common API-key +shapes from assistant output before it leaves the agent loop. Emits +one structured audit log line per scan so operators can see what +was caught without re-reading the text. + +## What it does + +Declares the `transform_llm_output` hook in `plugin.yaml`: + +```yaml +hooks: + - transform_llm_output +``` + +Wires it in `register(ctx)`: + +```python +def register(ctx): + ctx.register_hook( + name="transform_llm_output", + handler=_make_hook(ctx), + description="Redact SSN, Luhn-valid credit cards, and API-key shapes from assistant output.", + ) +``` + +The hook receives the assistant's outbound text and returns the same +text with each match replaced by `[REDACTED:]`: + +| Pattern | Example | Replacement | +|---|---|---| +| US SSN | `123-45-6789` | `[REDACTED:ssn]` | +| Credit card (Luhn-valid, 13-19 digits) | `4111 1111 1111 1111` | `[REDACTED:credit_card]` | +| OpenAI key | `sk-abc...` | `[REDACTED:openai_key]` | +| OpenAI project key | `sk-proj-...` | `[REDACTED:openai_project_key]` | +| Anthropic key | `sk-ant-...` | `[REDACTED:anthropic_key]` | +| AWS access key | `AKIA...`, `ASIA...` | `[REDACTED:aws_access_key]` | +| GitHub token | `ghp_...`, `gho_...`, etc. | `[REDACTED:github_token]` | + +## Why this shape + +The plugin is intentionally policy-only: + +* No blocking, retry, or escalation - returning the redacted string is + the whole contract. +* Patterns favour high precision over high recall. SSN refuses the + invalid SSA blocks; credit cards must pass a Luhn check; each API + key prefix is specific enough that a match is overwhelmingly real. +* Pure-function redaction logic in `redact.py` is unit-testable + without any `ctx` fixture. + +That keeps the example focused on the **hook lifecycle** itself +(declare, register, return transformed text) rather than the +redaction strategy, which a real deployment would replace with +Microsoft Presidio, regex catalogs, or a dedicated DLP service. + +## How it works + +The host calls the hook with the assistant's outbound text: + +```python +def hook(text, **metadata): + redacted, audit = redact(text) + if not audit: + return text + logger.info( + "plugin-hook-example: redacted %d span(s) from %d-char output [%s]", + len(audit), len(text), summary, + ) + return redacted +``` + +The host then sends the redacted text on to whatever channel the +user is connected through (CLI, Telegram, Discord, web UI). + +Cold-path note: if the input has no matches, the hook returns the +original string unchanged and writes no log entry - so the steady +state has zero overhead. + +## Audit output + +A scan produces one structured log line on the standard `logging` +hierarchy: + +``` +INFO plugin_hook_example: redacted 2 span(s) from 1284-char output [openai_key=1, ssn=1] +``` + +Real audit pipelines route this through `ctx.audit.emit` or a +dedicated sink; the reference plugin keeps it on the standard +logger so it shows up in the host's log stream without any extra +wiring. + +## Try it + +Clone this repo (or download just this directory), drop it into +your user-plugins folder, and enable it: + +```bash +git clone https://github.com/NousResearch/hermes-example-plugins.git +cp -r hermes-example-plugins/plugin-hook-example ~/.hermes/plugins/ +hermes plugins enable plugin-hook-example +``` + +Then in a Hermes session, ask the assistant to read a file or +output that happens to contain a secret shape. The redaction +fires and the audit line shows up in the host log. + +## License + +MIT, same as hermes-agent. diff --git a/plugin-hook-example/__init__.py b/plugin-hook-example/__init__.py new file mode 100644 index 0000000..93907b4 --- /dev/null +++ b/plugin-hook-example/__init__.py @@ -0,0 +1,76 @@ +"""plugin-hook-example - reference plugin for the ``transform_llm_output`` hook. + +Companion to the +`Plugin Hooks `_ +docs page. Demonstrates the lifecycle-hook surface: + +* declares ``transform_llm_output`` in ``plugin.yaml``, +* registers a single hook that redacts SSN, Luhn-valid credit cards, + and common API-key shapes from assistant output, +* emits one ``logger.info`` audit record per scan with the count and + kinds of redactions so operators can see what was caught without + re-reading the text. + +The plugin is intentionally policy-only: it does not block, retry, or +escalate. The host can chain additional hooks (rate limiting, content +classification, summarisation) downstream of this one. + +The trust gate stays at the default: hooks run for every assistant +turn unless the operator disables this plugin in ``config.yaml``. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from .redact import Redaction, redact + +logger = logging.getLogger(__name__) + + +def _make_hook(ctx: Any): + """Build the ``transform_llm_output`` hook bound to this plugin's ctx.""" + + plugin_name = "plugin-hook-example" + + def hook(text: str, **_metadata: Any) -> str: + redacted, audit = redact(text) + if not audit: + return text + _log_audit(plugin_name, audit, original_len=len(text)) + return redacted + + return hook + + +def _log_audit(plugin_name: str, audit: list[Redaction], *, original_len: int) -> None: + """Emit a single structured log line summarising one scan. + + Real audit pipelines would route this through ``ctx.audit.emit`` or + a dedicated sink; for the reference plugin we keep it on the + standard logger so it shows up in the host's log stream without + any extra wiring. + """ + + kinds: dict[str, int] = {} + for entry in audit: + kinds[entry.kind] = kinds.get(entry.kind, 0) + 1 + summary = ", ".join(f"{k}={v}" for k, v in sorted(kinds.items())) + logger.info( + "%s: redacted %d span(s) from %d-char output [%s]", + plugin_name, + len(audit), + original_len, + summary, + ) + + +def register(ctx: Any) -> None: + """Plugin entry point - wires the lifecycle hook.""" + ctx.register_hook( + name="transform_llm_output", + handler=_make_hook(ctx), + description="Redact SSN, Luhn-valid credit cards, and API-key shapes from assistant output.", + ) + logger.debug("plugin-hook-example: registered transform_llm_output hook") diff --git a/plugin-hook-example/plugin.yaml b/plugin-hook-example/plugin.yaml new file mode 100644 index 0000000..d7e4a77 --- /dev/null +++ b/plugin-hook-example/plugin.yaml @@ -0,0 +1,8 @@ +name: plugin-hook-example +version: 1.0.0 +description: "Reference plugin showing the transform_llm_output lifecycle hook. Redacts SSN, Luhn-valid credit card numbers, and common API-key shapes (OpenAI, Anthropic, AWS, GitHub) from assistant output before it leaves the agent loop, and emits a structured audit log line per redaction so operators can see what was caught." +author: NousResearch +hooks: + - transform_llm_output +provides: + commands: [] diff --git a/plugin-hook-example/redact.py b/plugin-hook-example/redact.py new file mode 100644 index 0000000..a4e5185 --- /dev/null +++ b/plugin-hook-example/redact.py @@ -0,0 +1,108 @@ +"""Pure-function redaction helpers for ``plugin-hook-example``. + +The patterns are deliberately conservative: each one targets a shape +that has near-zero false-positive risk in normal prose. The cost of a +miss (a real SSN leaking) is much higher than the cost of a false +negative (a near-miss the regex didn't catch), so the patterns favour +high precision over high recall. + +Callers receive the redacted text plus an audit list naming each match +(kind + offset), so the hook can emit a structured log entry without +re-scanning the text. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Redaction: + kind: str + start: int + end: int + + +# US SSN (digit-only, ###-##-####). Refuses the well-known invalid blocks +# (000, 666, 9##) per SSA convention to cut false positives. +_SSN = re.compile( + r"\b(?!000|666|9\d{2})\d{3}-(?!00)\d{2}-(?!0000)\d{4}\b" +) + +# 13-19 digit numbers with optional spaces or single hyphens. We Luhn-check +# in the second pass; the regex's job is to narrow the candidate set. +_CC_CANDIDATE = re.compile(r"\b(?:\d[ \-]?){12,18}\d\b") + +# Common API-key shapes. Each prefix is well-defined enough that a match +# is overwhelmingly an actual key, not a coincidental string. +_API_KEYS: list[tuple[str, re.Pattern[str]]] = [ + ("openai_key", re.compile(r"\bsk-[A-Za-z0-9]{20,}\b")), + ("openai_project_key", re.compile(r"\bsk-proj-[A-Za-z0-9_-]{20,}\b")), + ("anthropic_key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}\b")), + ("aws_access_key", re.compile(r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b")), + ("github_token", re.compile(r"\b(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}\b")), +] + + +def _luhn_valid(digits: str) -> bool: + total = 0 + parity = len(digits) % 2 + for index, ch in enumerate(digits): + d = ord(ch) - 48 + if index % 2 == parity: + d *= 2 + if d > 9: + d -= 9 + total += d + return total % 10 == 0 + + +def redact(text: str) -> tuple[str, list[Redaction]]: + """Return ``(redacted_text, audit_entries)``. + + The audit list records each match in left-to-right order. The + returned text replaces each match with ``[REDACTED:]``. + """ + + if not text: + return text, [] + + matches: list[tuple[int, int, str]] = [] + + for match in _SSN.finditer(text): + matches.append((match.start(), match.end(), "ssn")) + + for match in _CC_CANDIDATE.finditer(text): + candidate = match.group(0) + digits_only = re.sub(r"[ \-]", "", candidate) + if 13 <= len(digits_only) <= 19 and _luhn_valid(digits_only): + matches.append((match.start(), match.end(), "credit_card")) + + for kind, pattern in _API_KEYS: + for match in pattern.finditer(text): + matches.append((match.start(), match.end(), kind)) + + if not matches: + return text, [] + + # Resolve overlaps: prefer the earlier match, then the longer one. + matches.sort(key=lambda item: (item[0], -(item[1] - item[0]))) + resolved: list[tuple[int, int, str]] = [] + cursor = 0 + for start, end, kind in matches: + if start < cursor: + continue + resolved.append((start, end, kind)) + cursor = end + + pieces: list[str] = [] + audit: list[Redaction] = [] + last = 0 + for start, end, kind in resolved: + pieces.append(text[last:start]) + pieces.append(f"[REDACTED:{kind}]") + audit.append(Redaction(kind=kind, start=start, end=end)) + last = end + pieces.append(text[last:]) + return "".join(pieces), audit