From 527fde4bb42b58384418b81dae445fb9d477548a Mon Sep 17 00:00:00 2001 From: Aolinge <153434584+aolingge@users.noreply.github.com> Date: Tue, 28 Apr 2026 17:54:36 +0800 Subject: [PATCH 1/5] feat: US-006 - Harden community files and CI --- docs/release-readiness.md | 2 ++ scripts/ralph/prd.json | 4 ++-- scripts/ralph/progress.txt | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/docs/release-readiness.md b/docs/release-readiness.md index 81800e7..474c813 100644 --- a/docs/release-readiness.md +++ b/docs/release-readiness.md @@ -38,6 +38,8 @@ node scripts/release-dry-run.mjs The dry-run script wraps `npm pack --json --dry-run`, parses the package file list, and fails if required public artifacts are missing from the tarball preview. It does not publish to npm, create tags, create GitHub releases, push branches, or read credentials. +If a later optional integration needs credentials, reuse an existing logged-in browser session, OpenCLI, a tool-native credential store, Cockpit Tools, or the local encrypted AI secret vault. Do not ask contributors to paste credentials into issue templates, progress logs, docs, repository files, or dry-run output. + ## Launch Surface Dry Run Before posting publicly, replace launch placeholders only after the matching public URLs exist: diff --git a/scripts/ralph/prd.json b/scripts/ralph/prd.json index ad38347..21264ce 100644 --- a/scripts/ralph/prd.json +++ b/scripts/ralph/prd.json @@ -101,8 +101,8 @@ "Typecheck passes" ], "priority": 6, - "passes": false, - "notes": "Reuse decision: current CI already uses npm ci, setup-node, npm run check, npm run smoke, and contents: read. Online prior art checked: GitHub Actions docs support explicit GITHUB_TOKEN permissions; npm docs support package files/bin/README review and dry-run packlist checks. Avoid adding publish or release jobs." + "passes": true, + "notes": "Reuse decision: current CI already uses npm ci, setup-node, npm run check, npm run smoke, and contents: read. Online prior art checked: GitHub Actions docs support explicit GITHUB_TOKEN permissions; npm docs support package files/bin/README review and dry-run packlist checks. Avoid adding publish or release jobs. Verified release dry run locally with 86 intended package files and required files present; credential reuse boundary documented in docs/release-readiness.md." }, { "id": "US-007", diff --git a/scripts/ralph/progress.txt b/scripts/ralph/progress.txt index 4e5e5a1..e051355 100644 --- a/scripts/ralph/progress.txt +++ b/scripts/ralph/progress.txt @@ -219,3 +219,26 @@ Started: 2026-04-28T12:51:53 - Remaining boundary: public GitHub repository creation, remote push, GitHub release, and `npm publish` were intentionally not performed because they are external publication actions requiring explicit user confirmation. - Local final commit hash: recorded in final response after commit creation; not embedded here because a commit cannot contain its own final hash. --- +## 2026-04-28 17:54:00 +08:00 - US-006 +- What was implemented + - Reviewed CI, Dependabot, issue templates, PR template, contributing/security/changelog/license/gitignore/package metadata, release readiness docs, and package dry-run tooling against local-only release safety rules. + - Added an explicit credential boundary to `docs/release-readiness.md`: later optional integrations must reuse existing logged-in sessions, OpenCLI, tool-native credential stores, Cockpit Tools, or the local encrypted AI secret vault, and must not ask contributors to paste credentials into repository or progress artifacts. + - Marked US-006 passing in `scripts/ralph/prd.json`. +- Files changed + - `docs/release-readiness.md` + - `scripts/ralph/prd.json` + - `scripts/ralph/progress.txt` +- Verification run + - `npm install` installed local dependencies needed for `tsc`. + - `npm run typecheck` passed. + - `npm run build` passed. + - `node scripts/release-dry-run.mjs` passed: `agent-reliability-kit@0.1.0`, 86 files, 244.0 KiB unpacked, required files present. + - `npm pack --json --dry-run` passed and showed the intended package contents without publishing. +- Self-review result and remaining P2/P3 risks, if any + - P0/P1: none found for US-006. CI remains read-only, runs `npm ci`, `npm run check`, and `npm run smoke`, and no workflow publishes packages, creates releases, pushes branches, or requires secrets. + - P2: `scripts/ralph/task.md` has a pre-existing/unrelated update from the current Ralph task prompt and is intentionally excluded from this story commit. +- Learnings for future iterations: + - Pattern discovered: keep release safety boundaries in `docs/release-readiness.md` so package dry runs, CI, and community templates share the same local-only assumptions. + - Gotcha encountered: verification failed before `npm install` because `tsc` was unavailable; install dependencies before TypeScript checks on a fresh workspace. + - Useful context for the next iteration: US-007 can reuse the current build output and should avoid committing generated `.tmp` self-scan reports. +--- From fa485dce2266e6cca560f992123917ed97dd61cf Mon Sep 17 00:00:00 2001 From: Aolinge <153434584+aolingge@users.noreply.github.com> Date: Tue, 28 Apr 2026 18:06:17 +0800 Subject: [PATCH 2/5] feat: US-007 - Run local verification and self scan --- .gitignore | 1 + scripts/lint.mjs | 3 +-- scripts/ralph/prd.json | 4 ++-- scripts/ralph/progress.txt | 33 +++++++++++++++++++++++++++++++++ src/report/html.ts | 2 +- 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index ce0b1ef..ad948d1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ node_modules/ dist/ coverage/ .tmp/ +.playwright-mcp/ .agent-reliability/ *.log /progress.md diff --git a/scripts/lint.mjs b/scripts/lint.mjs index 41f0d25..a682347 100644 --- a/scripts/lint.mjs +++ b/scripts/lint.mjs @@ -3,7 +3,7 @@ import path from "node:path"; const root = process.cwd(); const checkedExtensions = new Set([".ts", ".js", ".md", ".json", ".yml", ".yaml", ".html", ".svg"]); -const ignoredDirs = new Set([".git", "node_modules", "dist", "coverage", ".tmp", ".agent-reliability"]); +const ignoredDirs = new Set([".git", "node_modules", "dist", "coverage", ".tmp", ".playwright-mcp", ".agent-reliability"]); const problems = []; function walk(dir) { @@ -32,4 +32,3 @@ if (problems.length > 0) { } console.log("lint: ok"); - diff --git a/scripts/ralph/prd.json b/scripts/ralph/prd.json index 21264ce..35b0451 100644 --- a/scripts/ralph/prd.json +++ b/scripts/ralph/prd.json @@ -121,8 +121,8 @@ "Typecheck passes" ], "priority": 7, - "passes": false, - "notes": "Reuse decision: dogfood the built CLI and existing smoke script before considering new validation tooling. The Build/Sale/Test/Fix flywheel is captured here by building artifacts, validating product-facing outputs, and turning self-scan failures into a prioritized backlog." + "passes": true, + "notes": "Reuse decision: dogfood the built CLI and existing smoke script before considering new validation tooling. The Build/Sale/Test/Fix flywheel is captured here by building artifacts, validating product-facing outputs, and turning self-scan failures into a prioritized backlog. Verified npm run check, npm run smoke, self-scan markdown/json/html/sarif/annotations, JSON parsing, static HTML generation, and Playwright desktop/mobile screenshots. Fixed P1 lint exclusion for local Playwright MCP artifacts and kept mobile report heading width polish." }, { "id": "US-008", diff --git a/scripts/ralph/progress.txt b/scripts/ralph/progress.txt index e051355..a958e95 100644 --- a/scripts/ralph/progress.txt +++ b/scripts/ralph/progress.txt @@ -242,3 +242,36 @@ Started: 2026-04-28T12:51:53 - Gotcha encountered: verification failed before `npm install` because `tsc` was unavailable; install dependencies before TypeScript checks on a fresh workspace. - Useful context for the next iteration: US-007 can reuse the current build output and should avoid committing generated `.tmp` self-scan reports. --- +## 2026-04-28T18:02:45 - runner-warning +- Codex iteration 2 exited with code -1. +- The loop will continue unless max iterations is reached. +--- +## 2026-04-28 18:05:32 +08:00 - US-007 +- What was implemented + - Ran full local verification, smoke test, and repository self-scan with markdown, JSON, HTML, SARIF, and annotations output. + - Parsed `.tmp/self-scan/report.json` successfully and confirmed score 100 with 0 findings. + - Opened `.tmp/self-scan/report.html` through Playwright screenshot checks at desktop 1280x900 and mobile 390x844. + - Fixed a P1 verification issue where local `.playwright-mcp` browser artifacts were ignored by git but still scanned by `npm run lint`. + - Kept the mobile HTML report heading width polish after browser review showed it prevents overly narrow wrapping. +- Files changed + - `.gitignore` + - `scripts/lint.mjs` + - `src/report/html.ts` + - `scripts/ralph/prd.json` + - `scripts/ralph/progress.txt` +- Verification run + - `npm run check` passed: lint ok, typecheck ok, 23 tests passed, build passed. + - `npm run smoke` passed: clean fixture score 100/100, 0 findings. + - `node dist/cli.js scan . --out .tmp/self-scan --format markdown,json,html,sarif,annotations --min-score 0` passed: score 100/100, 0 findings. + - `.tmp/self-scan/report.json` parsed with Node and `.tmp/self-scan/report.html` was generated. + - `playwright screenshot --viewport-size=1280,900 .tmp/self-scan/report.html .tmp/self-scan/report-desktop.png` passed. + - `playwright screenshot --viewport-size=390,844 .tmp/self-scan/report.html .tmp/self-scan/report-mobile.png` passed. +- Self-review result and remaining P2/P3 risks, if any + - P0/P1: none remaining. The only P1 found was lint scanning local Playwright MCP output despite `.gitignore`; fixed by excluding `.playwright-mcp` in `scripts/lint.mjs`. + - P2: `.tmp/self-scan` screenshots and reports are generated evidence only and remain ignored/uncommitted. + - P2: `scripts/ralph/task.md` contains a pre-existing task prompt update outside this story and is intentionally not included in the US-007 commit. +- Learnings for future iterations: + - Pattern discovered: keep browser-tool artifact directories aligned between `.gitignore` and `scripts/lint.mjs`. + - Gotcha encountered: PowerShell treats JavaScript template backticks as escapes inside `node -e`; use plain string concatenation for one-line JSON parse checks. + - Useful context for the next iteration: US-008 should rerun final `npm run check`, `npm run smoke`, and self-scan, then inspect the accumulated diff before the final local commit. +--- diff --git a/src/report/html.ts b/src/report/html.ts index 860e5a4..6ea77e6 100644 --- a/src/report/html.ts +++ b/src/report/html.ts @@ -103,7 +103,7 @@ export function formatHtml(report: Report): string { main { width: calc(100% - 24px); max-width: 1120px; padding: 24px 0 40px; } .hero, .facts-grid { grid-template-columns: 1fr; } .hero > *, .facts-grid > * { min-width: 0; } - h1 { font-size: 32px; line-height: 1; max-width: 8ch; } + h1 { font-size: 32px; line-height: 1; max-width: 12ch; } p { max-width: 26ch; } .title, .score, .finding, .empty, .facts { padding: 18px; } .score { min-height: 190px; } From 5dbf431629d171b472efe94432ffc37401479750 Mon Sep 17 00:00:00 2001 From: Aolinge <153434584+aolingge@users.noreply.github.com> Date: Tue, 28 Apr 2026 18:09:22 +0800 Subject: [PATCH 3/5] feat: US-008 - Final review and local commit --- scripts/ralph/prd.json | 4 ++-- scripts/ralph/progress.txt | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/scripts/ralph/prd.json b/scripts/ralph/prd.json index 35b0451..4e98ad9 100644 --- a/scripts/ralph/prd.json +++ b/scripts/ralph/prd.json @@ -139,8 +139,8 @@ "Typecheck passes" ], "priority": 8, - "passes": false, - "notes": "Reuse decision: use standard git local commit workflow and existing npm verification commands. Keep release readiness as documentation and dry-run evidence only; publishing and remote operations remain out of scope unless the user explicitly asks later." + "passes": true, + "notes": "Reuse decision: use standard git local commit workflow and existing npm verification commands. Keep release readiness as documentation and dry-run evidence only; publishing and remote operations remain out of scope unless the user explicitly asks later. Final verification passed with npm run check, npm run smoke, and repository self-scan score 100 with 0 findings." } ] } diff --git a/scripts/ralph/progress.txt b/scripts/ralph/progress.txt index a958e95..03374fb 100644 --- a/scripts/ralph/progress.txt +++ b/scripts/ralph/progress.txt @@ -275,3 +275,27 @@ Started: 2026-04-28T12:51:53 - Gotcha encountered: PowerShell treats JavaScript template backticks as escapes inside `node -e`; use plain string concatenation for one-line JSON parse checks. - Useful context for the next iteration: US-008 should rerun final `npm run check`, `npm run smoke`, and self-scan, then inspect the accumulated diff before the final local commit. --- +## 2026-04-28 18:08:47 +08:00 - US-008 +- What was implemented + - Completed the final review of accumulated product-quality changes against all PRD stories. + - Confirmed the final tracked diff only includes expected local repository changes for release readiness docs, lint/report polish, and Ralph state. + - Kept publishing, GitHub release creation, remote push, user-data deletion, and secret use out of scope. +- Files changed + - `scripts/ralph/prd.json` + - `scripts/ralph/progress.txt` +- Verification run + - `npm run check` passed: lint ok, typecheck ok, 23 tests passed, build passed. + - `npm run smoke` passed: clean fixture score 100/100, 0 findings. + - `node dist/cli.js scan . --out .tmp/self-scan --format markdown,json,html,sarif,annotations --min-score 0` passed: score 100/100, 0 findings. + - Parsed `.tmp/self-scan/report.json` successfully: score 100, grade A, 0 findings. + - Confirmed `.tmp/self-scan/report.html` exists as generated static HTML evidence. +- Self-review result and remaining P2/P3 risks, if any + - P0/P1: none found. No final fixes were needed after review. + - P2: ignored generated verification outputs remain in `.tmp/`, `dist/`, `.playwright-mcp/`, `node_modules/`, and fixture-local `.tmp/`; they are intentionally uncommitted. + - P2: `scripts/ralph/task.md` contains an unrelated Ralph prompt update for a later expanded task and is intentionally left uncommitted. + - Local commit hash: pending until the US-008 local commit is created. +- Learnings for future iterations: + - Pattern discovered: final review should compare `git diff main..HEAD` plus current working tree status so ignored evidence and unrelated Ralph prompt updates do not enter story commits. + - Gotcha encountered: a commit cannot contain its own final hash without changing that hash; record the final hash in the terminal/final response after commit. + - Useful context for the next iteration: all PRD stories are now marked passing after verification; no push, publish, or release was performed. +--- From c00f1582e3f8fca637ed31969a9d79e3d817f29e Mon Sep 17 00:00:00 2001 From: Aolinge <153434584+aolingge@users.noreply.github.com> Date: Tue, 28 Apr 2026 19:43:45 +0800 Subject: [PATCH 4/5] feat: expand agent reliability kit product surface --- README.md | 52 ++- assets/cli-demo.svg | 17 + docs/ai-cost-guard.md | 44 +++ docs/commercial-support.md | 36 +++ ...gent-reliability-kit-vs-generic-linters.md | 22 ++ .../agent-secret-guard-vs-gitleaks.md | 28 ++ docs/launch/demo-gif-script.md | 30 ++ docs/launch/distribution-checklist.md | 78 +++++ docs/n8n-safety-backup.md | 27 ++ docs/private-mcp-registry.md | 52 +++ docs/roadmap-consolidation.md | 33 ++ docs/team-layer.md | 46 +++ package.json | 3 + scripts/ralph/product-expansion-prd.json | 37 +++ scripts/ralph/progress.txt | 41 +++ scripts/ralph/task.md | 4 +- src/cli.ts | 173 +++++++++- src/core/files.ts | 2 +- src/core/redaction.ts | 46 +++ src/core/scan.ts | 4 +- src/cost/costReport.ts | 206 ++++++++++++ src/mcp/registry.ts | 303 ++++++++++++++++++ src/n8n/backup.ts | 70 ++++ src/scanners/n8n.ts | 131 ++++++++ src/team/teamAudit.ts | 239 ++++++++++++++ tests/cli.test.ts | 72 +++++ tests/fixtures/clean-node/SECURITY.md | 3 + tests/fixtures/mcp-registry/.mcp.json | 17 + tests/fixtures/n8n-risk/README.md | 3 + tests/fixtures/n8n-risk/workflows/risky.json | 35 ++ tests/scan.test.ts | 10 + 31 files changed, 1851 insertions(+), 13 deletions(-) create mode 100644 assets/cli-demo.svg create mode 100644 docs/ai-cost-guard.md create mode 100644 docs/commercial-support.md create mode 100644 docs/comparisons/agent-reliability-kit-vs-generic-linters.md create mode 100644 docs/comparisons/agent-secret-guard-vs-gitleaks.md create mode 100644 docs/launch/demo-gif-script.md create mode 100644 docs/launch/distribution-checklist.md create mode 100644 docs/n8n-safety-backup.md create mode 100644 docs/private-mcp-registry.md create mode 100644 docs/roadmap-consolidation.md create mode 100644 docs/team-layer.md create mode 100644 scripts/ralph/product-expansion-prd.json create mode 100644 src/core/redaction.ts create mode 100644 src/cost/costReport.ts create mode 100644 src/mcp/registry.ts create mode 100644 src/n8n/backup.ts create mode 100644 src/scanners/n8n.ts create mode 100644 src/team/teamAudit.ts create mode 100644 tests/fixtures/clean-node/SECURITY.md create mode 100644 tests/fixtures/mcp-registry/.mcp.json create mode 100644 tests/fixtures/n8n-risk/README.md create mode 100644 tests/fixtures/n8n-risk/workflows/risky.json diff --git a/README.md b/README.md index b960984..7c316ce 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,9 @@ Verify, harden, and ship AI-agent-assisted codebases in one command. [![License: MIT](https://img.shields.io/badge/license-MIT-181714.svg)](LICENSE) [![Node](https://img.shields.io/badge/node-%3E%3D20-315f9f.svg)](package.json) -Agent Reliability Kit scans a repository the way a careful maintainer would before letting AI coding agents work there: agent instructions, verification commands, README quality, secret hygiene, GitHub Actions safety, MCP/tooling risk, and release readiness. +Agent Reliability Kit scans a repository the way a careful maintainer would before letting AI coding agents work there: agent instructions, verification commands, README quality, secret hygiene, GitHub Actions safety, MCP/tooling risk, n8n workflow exports, team policy, and release readiness. + +The flagship path is simple: keep `agent-secret-guard` as the sharp security wedge, and use `agent-reliability-kit` as the one command center for agent-era repository reliability. ## Quick Start @@ -30,6 +32,16 @@ After npm publication: npx agent-reliability-kit scan . ``` +Optional focused checks: + +```bash +ark team-audit . --out .agent-reliability/team +ark mcp-registry . --registry .agent-reliability/mcp-registry.json +ark n8n-scan . --out .agent-reliability/n8n +ark n8n-backup . --backup-dir .agent-reliability/n8n-backup +ark cost-report . --trace .agent-reliability/traces --budget-usd 10 +``` + The scan writes: - `.agent-reliability/report.md` @@ -52,6 +64,10 @@ AI coding agents fail most often on the unglamorous parts: missing repo rules, u | Secrets | token-like values, tracked `.env` files, redacted evidence | | GitHub Actions | validation commands, explicit permissions, risky triggers, pipe-to-shell | | AI tooling | MCP command configs and prompt-injection-like instruction files | +| MCP registry | private allowlist, trust score, approved commands/URLs, risk owner | +| n8n | public webhooks, command nodes, risky code nodes, workflow secrets, redacted backups | +| Team layer | scan history, policy gates, audit report, dry-run Slack payload | +| Cost guard | local trace token/cost summary and budget alerts | ## CLI @@ -59,6 +75,11 @@ AI coding agents fail most often on the unglamorous parts: missing repo rules, u agent-reliability-kit scan [path] agent-reliability-kit doctor [path] agent-reliability-kit init [path] +agent-reliability-kit team-audit [path] +agent-reliability-kit mcp-registry [path] +agent-reliability-kit n8n-scan [path] +agent-reliability-kit n8n-backup [path] +agent-reliability-kit cost-report [path] ``` Examples: @@ -68,6 +89,10 @@ ark scan . --min-score 85 ark scan . --format sarif --stdout > agent-reliability.sarif ark doctor . ark init . +ark team-audit . +ark mcp-registry . +ark n8n-scan . +ark cost-report . --budget-usd 10 ``` Machine-readable stdout stays clean for CI: @@ -82,6 +107,22 @@ ark scan . --format sarif --stdout > agent-reliability.sarif The HTML report is designed for maintainers, contributors, and launch pages. It gives a score, severity counts, repository signals, and next actions for each finding. +## Product Modules + +- [Team audit layer](docs/team-layer.md): scan history, policy checks, audit report, and local Slack payload. +- [Private MCP registry](docs/private-mcp-registry.md): team allowlist with trust score, approved commands/URLs, permissions, owner, and reason. +- [n8n safety and backup](docs/n8n-safety-backup.md): risky workflow scanning and redacted Git-friendly backups. +- [AI cost guard](docs/ai-cost-guard.md): local trace cost summaries and budget alerts. +- [Commercial support path](docs/commercial-support.md): open-source boundary and future paid team features. +- [Consolidation roadmap](docs/roadmap-consolidation.md): how small tools roll into the flagship CLI. + +![CLI demo](assets/cli-demo.svg) + +## Comparisons + +- [agent-secret-guard vs gitleaks](docs/comparisons/agent-secret-guard-vs-gitleaks.md) +- [Agent Reliability Kit vs generic linters](docs/comparisons/agent-reliability-kit-vs-generic-linters.md) + ## Launch Kit The repository includes a pre-release launch kit so maintainers can prepare a public launch without inventing copy or sharing private data at the last minute. @@ -92,6 +133,8 @@ The repository includes a pre-release launch kit so maintainers can prepare a pu - [Press kit](docs/launch/press-kit.md) - [Community responses](docs/launch/community-responses.md) - [Channel rules](docs/launch/channel-rules.md) +- [Distribution checklist](docs/launch/distribution-checklist.md) +- [Demo GIF script](docs/launch/demo-gif-script.md) - [Product Hunt draft](docs/launch/product-hunt.md) - [DEV article draft](docs/launch/devto-article.md) @@ -133,9 +176,10 @@ assets/ ## Roadmap - v0.1: CLI scan, doctor, init, Markdown/JSON/HTML/SARIF reports. -- v0.2: richer language detection, monorepo command graph, and config file support. -- v0.3: GitHub Action wrapper and dogfood gallery. -- v0.4: compatibility matrix for Codex, Claude Code, Cursor, Gemini CLI, and OpenCode. +- v0.2: team audit, private MCP registry, n8n safety/backup, and local cost guard. +- v0.3: GitHub Action wrapper, dogfood gallery, and `agent-secret-guard` rule-pack consolidation. +- v0.4: hosted team dashboard prototype, org policy packs, and private MCP approval workflow. +- v0.5: `pr verify`, `trace run`, and compatibility matrix for Codex, Claude Code, Cursor, Gemini CLI, and OpenCode. ## Security diff --git a/assets/cli-demo.svg b/assets/cli-demo.svg new file mode 100644 index 0000000..b41a1c4 --- /dev/null +++ b/assets/cli-demo.svg @@ -0,0 +1,17 @@ + + Agent Reliability Kit CLI demo + Terminal preview showing scan, MCP registry, n8n safety, and AI cost guard commands. + + + + + + $ ark scan . --min-score 85 + Score 92/100 - reports written to .agent-reliability + $ ark mcp-registry . + MCP registry audit FAIL: 2 findings + $ ark n8n-scan workflows --format text --stdout + CRIT n8n workflow can execute commands + $ ark cost-report . --budget-usd 10 + AI Cost Guard: 48 calls, $7.2400, no alerts + diff --git a/docs/ai-cost-guard.md b/docs/ai-cost-guard.md new file mode 100644 index 0000000..21b6e6f --- /dev/null +++ b/docs/ai-cost-guard.md @@ -0,0 +1,44 @@ +# AI Cost Guard + +`cost-report` summarizes local JSON or JSONL trace files from coding-agent runs. + +```bash +ark cost-report . \ + --trace .agent-reliability/traces \ + --budget-usd 10 \ + --out .agent-reliability/cost +``` + +It writes: + +- `.agent-reliability/cost/cost-report.md` +- `.agent-reliability/cost/cost-report.json` + +## Supported Event Shapes + +JSONL: + +```json +{"provider":"openai","model":"gpt-5.2","inputTokens":1000,"outputTokens":500,"costUsd":0.25} +{"provider":"anthropic","model":"claude-sonnet","usage":{"prompt_tokens":2000,"completion_tokens":1000},"cost_usd":0.75} +``` + +JSON: + +```json +{ + "events": [ + { + "provider": "openai", + "model": "gpt-5.2", + "inputTokens": 1000, + "outputTokens": 500, + "costUsd": 0.25 + } + ] +} +``` + +## Scope + +This is a local cost guard, not a provider billing source of truth. It is designed for agent run traces, budget alerts, and "which model burned the most tokens?" debugging. diff --git a/docs/commercial-support.md b/docs/commercial-support.md new file mode 100644 index 0000000..613e195 --- /dev/null +++ b/docs/commercial-support.md @@ -0,0 +1,36 @@ +# Commercial Support + +Agent Reliability Kit is open source and local-first. The natural paid layer is for teams that need shared policy, history, allowlists, and audit proof. + +## Open Source + +- local CLI +- Markdown/JSON/HTML/SARIF reports +- n8n safety checks +- MCP registry audit +- local scan history +- dry-run Slack payload generation +- redacted n8n backups +- local AI cost reports + +## Future Paid Team Layer + +- hosted dashboard across repositories +- org-wide policy packs +- private MCP registry and approval workflow +- scan history and trend charts +- Slack/email/webhook alerts +- audit exports for security reviews +- team allowlists for tools, models, commands, and workflow templates + +## Support Positioning + +Commercial support can start before a hosted SaaS exists: + +- paid setup for teams adopting AI coding agents +- repository hardening audit +- MCP registry design +- n8n workflow safety review +- private policy pack maintenance + +The CLI should remain useful without a paid account. diff --git a/docs/comparisons/agent-reliability-kit-vs-generic-linters.md b/docs/comparisons/agent-reliability-kit-vs-generic-linters.md new file mode 100644 index 0000000..4677ef6 --- /dev/null +++ b/docs/comparisons/agent-reliability-kit-vs-generic-linters.md @@ -0,0 +1,22 @@ +# Agent Reliability Kit vs Generic Linters + +Generic linters check code style, syntax, and language-specific rules. + +Agent Reliability Kit checks whether a repository is safe and understandable enough for AI-assisted development. + +| Surface | Generic linter | Agent Reliability Kit | +| --- | --- | --- | +| Code syntax/style | Strong | Not the focus | +| Agent instructions | No | Yes | +| Verification commands | No | Yes | +| README replayability | No | Yes | +| GitHub Actions risk | Sometimes | Yes | +| Secret-like agent config | Sometimes | Yes | +| MCP allowlist | No | Yes | +| n8n workflow safety | No | Yes | +| Team audit report | No | Yes | +| Local cost report | No | Yes | + +## Message For Launch + +This is not another JavaScript linter. It is a release-readiness and agent-readiness scanner for teams using Codex, Claude Code, Cursor, Gemini CLI, MCP, and local automation. diff --git a/docs/comparisons/agent-secret-guard-vs-gitleaks.md b/docs/comparisons/agent-secret-guard-vs-gitleaks.md new file mode 100644 index 0000000..167b031 --- /dev/null +++ b/docs/comparisons/agent-secret-guard-vs-gitleaks.md @@ -0,0 +1,28 @@ +# agent-secret-guard vs gitleaks + +Use both when possible. + +`gitleaks` is a strong general-purpose secret scanner. `agent-secret-guard` and Agent Reliability Kit focus on the agent-era surfaces around those secrets. + +| Need | gitleaks | Agent Reliability Kit / agent-secret-guard | +| --- | --- | --- | +| Generic secret detection | Strong | Focused, lightweight | +| AI-agent config context | Limited | Built in | +| MCP config risk | Limited | Built in | +| Local automation notes | Limited | Built in | +| GitHub Actions permission hints | Limited | Built in | +| Redacted reports for agent handoff | Limited | Built in | +| Team policy and audit package | External | Local MVP | + +## Positioning + +If a team already runs `gitleaks`, Agent Reliability Kit should sit next to it: + +```bash +gitleaks detect +ark scan . +ark mcp-registry . +ark team-audit . +``` + +The product angle is not "replace every scanner." It is "catch the AI-agent and MCP operational risks that generic scanners do not explain well." diff --git a/docs/launch/demo-gif-script.md b/docs/launch/demo-gif-script.md new file mode 100644 index 0000000..d97e6d7 --- /dev/null +++ b/docs/launch/demo-gif-script.md @@ -0,0 +1,30 @@ +# Demo GIF Script + +Record this flow after `npm run build`. + +Use a clean terminal with no private paths, no real repository names, and no credentials visible. + +```bash +npm run build +ark scan tests/fixtures/n8n-risk --format text --stdout --min-score 0 +ark mcp-registry tests/fixtures/mcp-registry --out .tmp/demo-mcp +ark cost-report tests/fixtures/cost-trace --budget-usd 0.50 --out .tmp/demo-cost +``` + +Storyboard: + +1. Start on the README hero. +2. Run `ark scan tests/fixtures/n8n-risk --format text --stdout --min-score 0`. +3. Show the critical n8n command execution finding. +4. Run `ark mcp-registry tests/fixtures/mcp-registry --out .tmp/demo-mcp`. +5. Show disabled/unallowlisted MCP server findings. +6. Run `ark cost-report tests/fixtures/cost-trace --budget-usd 0.50 --out .tmp/demo-cost`. +7. Show the budget alert and generated report paths. + +Recommended caption: + +```text +One local CLI for agent-era repo risk: secrets, MCP allowlists, n8n workflow safety, team audit, and AI cost guard. +``` + +Never record real configs, browser profiles, Slack webhooks, private repo paths, or token values. diff --git a/docs/launch/distribution-checklist.md b/docs/launch/distribution-checklist.md new file mode 100644 index 0000000..a283d4f --- /dev/null +++ b/docs/launch/distribution-checklist.md @@ -0,0 +1,78 @@ +# Distribution Checklist + +Use this only after local verification passes. + +## Must Be Ready + +- `npm run check` passes +- `npm run smoke` passes +- README quick start works from a clean clone +- demo command shows a real finding in under 30 seconds +- no real secrets in screenshots, GIFs, reports, fixtures, or issues +- release notes mention local-only safety boundaries + +## Launch Channels + +### Show HN + +Title: + +```text +Show HN: Agent Reliability Kit - local checks for AI-agent-ready repos +``` + +Post: + +```text +I built a local-first CLI for teams using Codex, Claude Code, Cursor, MCP, and n8n. + +It scans repo instructions, verification commands, README replayability, secret hygiene, GitHub Actions risk, MCP allowlists, n8n workflow exports, and local AI cost traces. + +The goal is not to replace language linters. It is to answer: "Is this repo safe and clear enough to hand to an AI coding agent?" + +No account, no telemetry, no cloud required. +``` + +### Reddit + +Target communities: + +- `r/ClaudeAI` +- `r/cursor` +- `r/selfhosted` +- `r/devops` +- `r/n8n` +- `r/opensource` + +Angle: + +```text +I made a local scanner for AI-agent repo risk: MCP allowlists, n8n workflow safety, GitHub Actions permissions, secret-like values, and scan history. +``` + +### MCP Directories / Awesome Lists + +Submit the MCP registry angle: + +```text +Agent Reliability Kit helps teams review MCP config files against a private allowlist with trust score, permissions, approved commands/URLs, and risk owner metadata. +``` + +### Product Hunt + +Use after GitHub README, docs homepage, and demo asset are polished. + +Tagline: + +```text +Local reliability checks for AI-agent-assisted codebases. +``` + +## Follow-Up Metrics + +- GitHub stars +- npm downloads +- CLI runs from issue comments +- opened issues with real repo samples +- discussions asking for team policy/dashboard +- requests for GitHub Action or hosted reporting diff --git a/docs/n8n-safety-backup.md b/docs/n8n-safety-backup.md new file mode 100644 index 0000000..b7f87b5 --- /dev/null +++ b/docs/n8n-safety-backup.md @@ -0,0 +1,27 @@ +# n8n Safety And Backup + +Agent Reliability Kit now checks n8n workflow exports in the default `scan` command and also exposes n8n-focused commands. + +```bash +ark n8n-scan . --out .agent-reliability/n8n --format markdown,json,html +ark n8n-backup . --backup-dir .agent-reliability/n8n-backup +``` + +## Safety Checks + +- public webhook nodes without explicit authentication +- command execution nodes +- code/function nodes that use risky runtime APIs +- token-like values in workflow JSON + +## Backup Behavior + +`n8n-backup` writes formatted workflow JSON into a Git-friendly directory and redacts token-like values first. + +The command writes: + +- redacted workflow JSON files +- `README.md` +- `backup-report.json` + +It does not call the n8n API. Export workflows locally first, then run the backup command over the exported folder or repository. diff --git a/docs/private-mcp-registry.md b/docs/private-mcp-registry.md new file mode 100644 index 0000000..c071015 --- /dev/null +++ b/docs/private-mcp-registry.md @@ -0,0 +1,52 @@ +# Private MCP Registry + +`mcp-registry` checks committed or passed MCP config files against a private allowlist. + +```bash +ark mcp-registry . \ + --registry .agent-reliability/mcp-registry.json \ + --config .mcp.json +``` + +It writes: + +- `.agent-reliability/mcp-registry-report.md` +- `.agent-reliability/mcp-registry-report.json` + +## Registry Format + +```json +{ + "servers": [ + { + "name": "filesystem", + "approved": true, + "trustScore": 90, + "permissions": ["filesystem"], + "allowedCommands": ["node"], + "riskOwner": "platform", + "riskReason": "Read-only local examples." + }, + { + "name": "browser", + "approved": false, + "trustScore": 45, + "permissions": ["browser", "network"], + "allowedUrls": ["https://approved.example/mcp"] + } + ] +} +``` + +## Checks + +- missing private registry +- MCP server not allowlisted +- disabled server still used in config +- low trust score +- command not in `allowedCommands` +- remote URL not in `allowedUrls` +- risky permissions without `riskOwner` and `riskReason` +- token-like values pasted into MCP config + +This is the base for a hosted team registry later. diff --git a/docs/roadmap-consolidation.md b/docs/roadmap-consolidation.md new file mode 100644 index 0000000..e1b594d --- /dev/null +++ b/docs/roadmap-consolidation.md @@ -0,0 +1,33 @@ +# Product Consolidation Roadmap + +Agent Reliability Kit is the umbrella product. + +The smaller tools should either become subcommands, rule packs, or SEO entry points that point back here. + +## Current Umbrella Commands + +```text +ark scan +ark doctor +ark init +ark team-audit +ark mcp-registry +ark n8n-scan +ark n8n-backup +ark cost-report +``` + +## Migration Targets + +| Existing repo | Target in Agent Reliability Kit | +| --- | --- | +| `agent-secret-guard` | `ark scan` secret rules, later `ark secrets scan` | +| `mcp-config-doctor` | `ark mcp-registry` and future `ark mcp doctor` | +| `ai-pr-risk-labeler` | future `ark pr verify` | +| `agent-run-trace-pack` | `ark cost-report` and future `ark trace run` | +| `agent-hardening-kit` | umbrella positioning and docs merged into `ark scan` | +| n8n tools | `ark n8n-scan` and `ark n8n-backup` | + +## Rule + +Do not create another small repo until the flagship README, docs, npm package, demo, and launch channels are strong. diff --git a/docs/team-layer.md b/docs/team-layer.md new file mode 100644 index 0000000..1e9cca1 --- /dev/null +++ b/docs/team-layer.md @@ -0,0 +1,46 @@ +# Team Audit Layer + +`team-audit` is the local MVP for the future paid team layer. It does not require a server and it does not send network requests. + +```bash +ark team-audit . --out .agent-reliability/team +``` + +It writes: + +- `.agent-reliability/team/team-audit.md` +- `.agent-reliability/team/team-audit.json` +- `.agent-reliability/team/history/*.json` +- `.agent-reliability/team/slack-payload.json` + +## Policy + +Create `.agent-reliability/team-policy.json`: + +```json +{ + "minScore": 85, + "maxCritical": 0, + "maxHigh": 0, + "requiredFiles": ["AGENTS.md", "SECURITY.md", "README.md"], + "requireMcpRegistry": true, + "slackChannel": "#agent-reliability" +} +``` + +## Slack Alerts + +The command writes a Slack payload file only. It never calls a webhook. + +Teams can wire the generated `slack-payload.json` into their own approved notification workflow. + +## Commercial Path + +This local report maps cleanly to a paid team product: + +- scan history across repositories +- org-level policy packs +- private registry enforcement +- audit exports for security reviews +- Slack/email/webhook integrations +- team dashboards and trend charts diff --git a/package.json b/package.json index 44d70ed..e3df9c1 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,9 @@ "security", "ci", "mcp", + "mcp-security", + "n8n", + "llm-cost", "sarif" ], "author": "Aolinge", diff --git a/scripts/ralph/product-expansion-prd.json b/scripts/ralph/product-expansion-prd.json new file mode 100644 index 0000000..e3628c6 --- /dev/null +++ b/scripts/ralph/product-expansion-prd.json @@ -0,0 +1,37 @@ +{ + "project": "Agent Reliability Kit Product Expansion", + "branchName": "local/product-expansion", + "description": "Turn agent-reliability-kit into the flagship AI-agent reliability entry point, with agent-secret-guard as the security wedge and local MVPs for team audit, private MCP registry, n8n safety/backup, AI cost guard, and launch distribution materials.", + "userStories": [ + { + "id": "PX-001", + "title": "Local team audit MVP", + "description": "As a team lead, I want local scan history, policy checks, audit exports, and a dry-run Slack payload so I can evaluate team readiness without a hosted service.", + "passes": true + }, + { + "id": "PX-002", + "title": "Private MCP registry and allowlist", + "description": "As a maintainer, I want MCP config files checked against approved servers, permissions, trust score, commands, URLs, owners, and reasons.", + "passes": true + }, + { + "id": "PX-003", + "title": "n8n safety and redacted backup", + "description": "As a self-hosted automation user, I want dangerous n8n workflow exports detected and redacted backups written to a Git-friendly directory.", + "passes": true + }, + { + "id": "PX-004", + "title": "AI cost guard", + "description": "As an agent user, I want local JSON/JSONL traces summarized by provider/model with budget alerts.", + "passes": true + }, + { + "id": "PX-005", + "title": "Distribution and commercial packaging", + "description": "As an open-source maintainer, I want README, docs, comparison pages, launch copy, and commercial support notes that make the product easy to understand and promote.", + "passes": true + } + ] +} diff --git a/scripts/ralph/progress.txt b/scripts/ralph/progress.txt index 03374fb..46bb6ad 100644 --- a/scripts/ralph/progress.txt +++ b/scripts/ralph/progress.txt @@ -1,6 +1,47 @@ # Ralph Progress Log Started: 2026-04-28T12:51:53 --- +## 2026-04-28 18:22:30 +08:00 - PRODUCT-EXPANSION +- What was implemented + - Added local team audit MVP with scan history snapshots, policy checks, audit Markdown/JSON, and dry-run Slack payload generation. The command writes local files only and never calls Slack. + - Added private MCP registry audit with approved servers, trust score, permissions, approved commands/URLs, risk owner/reason, and token-like config checks. + - Added n8n workflow safety scanning to the default repository scan plus `n8n-scan` and redacted `n8n-backup` commands. + - Added local AI cost guard that parses JSON/JSONL trace events, summarizes provider/model token cost, and warns over budget. + - Updated README, product docs, comparison pages, commercial support positioning, consolidation roadmap, launch distribution checklist, demo GIF script, and a CLI demo SVG. + - Added focused fixtures and tests for team audit, MCP registry, n8n safety/backup, and cost guard. +- Files changed + - `README.md` + - `package.json` + - `scripts/ralph/task.md` + - `scripts/ralph/product-expansion-prd.json` + - `src/cli.ts` + - `src/core/files.ts` + - `src/core/redaction.ts` + - `src/core/scan.ts` + - `src/cost/costReport.ts` + - `src/mcp/registry.ts` + - `src/n8n/backup.ts` + - `src/scanners/n8n.ts` + - `src/team/teamAudit.ts` + - `tests/cli.test.ts` + - `tests/scan.test.ts` + - new docs under `docs/` + - new fixtures under `tests/fixtures/` +- Verification run + - `npm run check` passed: lint ok, typecheck ok, 28 tests passed, build passed. + - `npm run smoke` passed. + - `node dist/cli.js team-audit tests/fixtures/clean-node --out .tmp/team-smoke` passed. + - `node dist/cli.js mcp-registry tests/fixtures/mcp-registry --out .tmp/mcp-smoke` returned expected exit 1 with intentional fixture findings. + - `node dist/cli.js n8n-scan tests/fixtures/n8n-risk --out .tmp/n8n-smoke --format markdown,json --min-score 0` returned expected exit 1 with intentional critical fixture findings. + - `node dist/cli.js n8n-backup tests/fixtures/n8n-risk --backup-dir .tmp/n8n-backup-smoke` passed and wrote a redacted backup. + - `node dist/cli.js cost-report tests/fixtures/cost-trace --budget-usd 0.50 --out .tmp/cost-smoke` returned expected exit 1 with an intentional budget alert. +- Boundaries kept + - No npm publish, GitHub release, remote push, real Slack webhook call, paid account action, or real secret use. + - Generated `.tmp` smoke outputs were removed after verification. +- Remaining P2 risks + - The hosted dashboard, real Slack delivery, and SaaS billing layer are intentionally not implemented yet; this iteration creates the local MVP and launch/commercial path. + - Nested subcommands such as `ark mcp doctor` and `ark pr verify` are captured in the consolidation roadmap; this iteration uses flat CLI commands to keep the parser small and tested. +--- ## Codebase Patterns - Keep the root `AGENTS.md` authoritative for repository rules; fixture-level `AGENTS.md` and `CLAUDE.md` files under `tests/fixtures/` are scanner inputs, not active project instructions. diff --git a/scripts/ralph/task.md b/scripts/ralph/task.md index b0a6532..1057ce8 100644 --- a/scripts/ralph/task.md +++ b/scripts/ralph/task.md @@ -1,7 +1,7 @@ # Long Task Request -Created: 2026-04-28T12:51:54 +Created: 2026-04-28T17:50:41 ```text -Polish agent-reliability-kit into a product-quality open-source repository. Keep it local-only and safe: do not publish, do not delete user data, do not use secrets. Review and improve CLI behavior, tests, README, static product page, generated HTML report, community files, CI, examples, and release readiness. Run npm run check, npm run smoke, self scan, and visual/static validation where practical. Persist progress in scripts/ralph/progress.txt and commit only local repo changes. +把 agent-reliability-kit 做成 AI-agent 可靠性总入口,并以 agent-secret-guard 为安全卖点补齐商业化与分发能力。范围:1) 增加付费团队层的本地可用 MVP:扫描历史、team policy、audit report、Slack webhook payload/配置示例、私有 allowlist 配置与校验;2) 增加私有 MCP registry/allowlist 扫描:批准 server、权限、trust score、风险原因;3) 增加 n8n safety + backup:扫描 workflow JSON 中公开 webhook、危险 code/executeCommand 节点、secret-like 值,并支持备份到本地 git-friendly 目录;4) 增加 AI cost guard 与 trace 结合的本地 MVP:解析 trace/cost fixture、预算告警、模型/provider 汇总;5) 完善真正分发材料:README hero/quickstart、对比页、Show HN/Reddit/MCP directory/awesome list 文案、GIF/demo 脚本、商业支持说明;6) 把小工具矩阵收敛为 ark 子命令或清晰路线图。禁止发布 npm、创建 GitHub release、push 远端、写入真实密钥。每轮完成后跑 npm run check 和最相关 smoke。 ``` diff --git a/src/cli.ts b/src/cli.ts index 837ab23..7593528 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -2,23 +2,52 @@ import path from "node:path"; import process from "node:process"; import { fileURLToPath } from "node:url"; +import { buildCostReport, formatCostMarkdown, writeCostReport } from "./cost/costReport.js"; import { scanRepository, VERSION } from "./core/scan.js"; +import { scoreFindings } from "./core/scoring.js"; import { initProject } from "./init/initProject.js"; +import { runMcpRegistryAudit, writeMcpRegistryReport } from "./mcp/registry.js"; +import { backupN8nWorkflows } from "./n8n/backup.js"; import { renderReport, writeReports } from "./report/write.js"; -import type { ReportFormat, ScanOptions } from "./types.js"; +import { runTeamAudit } from "./team/teamAudit.js"; +import type { Report, ReportFormat, ScanOptions } from "./types.js"; const VALID_FORMATS = new Set(["text", "markdown", "json", "html", "sarif", "annotations"]); -const COMMANDS = new Set(["scan", "doctor", "init", "help", "version"]); -const OPTION_NAMES = new Set(["--out", "--format", "--min-score", "--stdout", "--force", "--help", "-h", "--version", "-v"]); +const COMMANDS = new Set(["scan", "doctor", "init", "team-audit", "mcp-registry", "n8n-scan", "n8n-backup", "cost-report", "help", "version"]); +const OPTION_NAMES = new Set([ + "--out", + "--format", + "--min-score", + "--stdout", + "--force", + "--policy", + "--slack-payload", + "--registry", + "--config", + "--backup-dir", + "--trace", + "--budget-usd", + "--help", + "-h", + "--version", + "-v" +]); interface ParsedArgs { - command: "scan" | "doctor" | "init" | "help" | "version"; + command: "scan" | "doctor" | "init" | "team-audit" | "mcp-registry" | "n8n-scan" | "n8n-backup" | "cost-report" | "help" | "version"; path: string; outDir: string; formats: ReportFormat[]; minScore: number; stdout: boolean; force: boolean; + policyPath?: string; + slackPayloadPath?: string; + registryPath?: string; + configPath?: string; + backupDir?: string; + tracePath?: string; + budgetUsd?: number; usedOptions: Set; } @@ -66,6 +95,34 @@ function parseArgs(argv: string[]): ParsedArgs { } else if (item === "--force") { args.usedOptions.add(item); args.force = true; + } else if (item === "--policy") { + args.usedOptions.add(item); + args.policyPath = takeOptionValue(argv, index, item); + index += 1; + } else if (item === "--slack-payload") { + args.usedOptions.add(item); + args.slackPayloadPath = takeOptionValue(argv, index, item); + index += 1; + } else if (item === "--registry") { + args.usedOptions.add(item); + args.registryPath = takeOptionValue(argv, index, item); + index += 1; + } else if (item === "--config") { + args.usedOptions.add(item); + args.configPath = takeOptionValue(argv, index, item); + index += 1; + } else if (item === "--backup-dir") { + args.usedOptions.add(item); + args.backupDir = takeOptionValue(argv, index, item); + index += 1; + } else if (item === "--trace") { + args.usedOptions.add(item); + args.tracePath = takeOptionValue(argv, index, item); + index += 1; + } else if (item === "--budget-usd") { + args.usedOptions.add(item); + args.budgetUsd = Number(takeOptionValue(argv, index, item, true)); + index += 1; } else if (item === "--help" || item === "-h") { args.command = "help"; } else if (item === "--version" || item === "-v") { @@ -80,6 +137,9 @@ function parseArgs(argv: string[]): ParsedArgs { if (!Number.isFinite(args.minScore) || args.minScore < 0 || args.minScore > 100) { throw new Error("--min-score must be a number from 0 to 100"); } + if (args.budgetUsd !== undefined && (!Number.isFinite(args.budgetUsd) || args.budgetUsd < 0)) { + throw new Error("--budget-usd must be a non-negative number"); + } validateCommandOptions(args); @@ -103,6 +163,11 @@ function validateCommandOptions(args: ParsedArgs): void { scan: new Set(["--out", "--format", "--min-score", "--stdout"]), doctor: new Set(), init: new Set(["--force"]), + "team-audit": new Set(["--policy", "--out", "--slack-payload"]), + "mcp-registry": new Set(["--registry", "--config", "--out"]), + "n8n-scan": new Set(["--out", "--format", "--min-score", "--stdout"]), + "n8n-backup": new Set(["--backup-dir"]), + "cost-report": new Set(["--trace", "--budget-usd", "--out"]), help: new Set(), version: new Set() }[args.command]; @@ -132,6 +197,11 @@ Usage: agent-reliability-kit scan [path] [--out DIR] [--format LIST] [--min-score N] [--stdout] agent-reliability-kit doctor [path] agent-reliability-kit init [path] [--force] + agent-reliability-kit team-audit [path] [--policy FILE] [--out DIR] [--slack-payload FILE] + agent-reliability-kit mcp-registry [path] [--registry FILE] [--config FILE] [--out DIR] + agent-reliability-kit n8n-scan [path] [--out DIR] [--format LIST] [--stdout] + agent-reliability-kit n8n-backup [path] [--backup-dir DIR] + agent-reliability-kit cost-report [path] [--trace FILE_OR_DIR] [--budget-usd N] [--out DIR] agent-reliability-kit --help agent-reliability-kit --version @@ -139,11 +209,20 @@ Aliases: ark scan . ark doctor . ark init . + ark team-audit . + ark mcp-registry . + ark n8n-scan . + ark cost-report . --budget-usd 10 Commands: scan Write local reliability reports and print a concise summary doctor Print the highest-priority fixes without writing report files init Add safe starter community and CI files without overwriting by default + team-audit Write scan history, policy audit, audit report, and dry-run Slack payload + mcp-registry Check MCP configs against a private registry/allowlist + n8n-scan Run the n8n safety scanner and write n8n-only reports + n8n-backup Write redacted, Git-friendly backups of n8n workflow JSON + cost-report Summarize local AI trace token/cost events and budget alerts Options: --out DIR scan only; default .agent-reliability inside the requested repository @@ -151,6 +230,13 @@ Options: --min-score N scan only; fail when score is below N, default 80, range 0-100 --stdout scan only; print the first requested format to stdout instead of text --force init only; overwrite existing starter files + --policy FILE team-audit only; default .agent-reliability/team-policy.json + --slack-payload FILE team-audit only; write a Slack payload without sending it + --registry FILE mcp-registry only; default .agent-reliability/mcp-registry.json + --config FILE mcp-registry only; explicit MCP config file + --backup-dir DIR n8n-backup only; default .agent-reliability/n8n-backup + --trace FILE_OR_DIR cost-report only; default .agent-reliability/traces + --budget-usd N cost-report only; warn when parsed cost exceeds this budget -h, --help show help -v, --version print version @@ -158,7 +244,7 @@ Formats: text, markdown, json, html, sarif, annotations Safety: - Local-only by default. scan writes under the requested repository unless --out explicitly points elsewhere. + Local-only by default. Commands write reports and dry-run payloads only; they never send Slack webhooks, publish packages, or push releases. `); } @@ -188,6 +274,29 @@ function runDoctor(root: string, io: CliIo): number { return report.summary.critical > 0 ? 1 : 0; } +function runN8nScan(options: ScanOptions, io: CliIo): number { + const report = scanRepository(options.root); + const findings = report.findings.filter((finding) => finding.scanner === "n8n-safety"); + const scored = scoreFindings(findings); + const n8nReport: Report = { + ...report, + findings, + score: scored.score, + grade: scored.grade, + summary: scored.summary + }; + const outDir = path.resolve(options.root, options.outDir); + const written = writeReports(n8nReport, outDir, options.formats); + const printable = options.stdout ? options.formats[0] ?? "text" : "text"; + io.stdout(renderReport(n8nReport, printable)); + if (!options.stdout && written.length > 0) { + io.stdout(""); + io.stdout("Written n8n reports:"); + for (const file of written) io.stdout(`- ${path.relative(io.cwd, file).replaceAll("\\", "/")}`); + } + return n8nReport.score >= options.minScore && n8nReport.summary.critical === 0 ? 0 : 1; +} + function runInit(root: string, force: boolean, io: CliIo): number { const result = initProject(root, force); io.stdout("Created:"); @@ -222,6 +331,60 @@ export function runCli(argv: string[], io: CliIo = { stdout: console.log, stderr } if (args.command === "doctor") return runDoctor(root, io); if (args.command === "init") return runInit(root, args.force, io); + if (args.command === "team-audit") { + const audit = runTeamAudit({ + root, + policyPath: args.policyPath, + outDir: args.outDir, + slackPayloadPath: args.slackPayloadPath + }); + io.stdout(`Team audit ${audit.status.toUpperCase()}: score ${audit.current.score}/100, ${audit.current.critical} critical, ${audit.current.high} high`); + io.stdout(`Reports written under ${path.relative(io.cwd, path.resolve(root, args.outDir)).replaceAll("\\", "/")}`); + return audit.status === "pass" ? 0 : 1; + } + if (args.command === "mcp-registry") { + const report = runMcpRegistryAudit({ + root, + registryPath: args.registryPath, + configPath: args.configPath, + outDir: args.outDir + }); + const written = writeMcpRegistryReport(report, path.resolve(root, args.outDir)); + io.stdout(`MCP registry audit ${report.status.toUpperCase()}: ${report.findings.length} findings`); + for (const file of written) io.stdout(`- ${path.relative(io.cwd, file).replaceAll("\\", "/")}`); + return report.status === "pass" ? 0 : 1; + } + if (args.command === "n8n-scan") { + return runN8nScan({ + root, + outDir: args.outDir, + formats: args.formats, + minScore: args.minScore, + stdout: args.stdout + }, io); + } + if (args.command === "n8n-backup") { + const backup = backupN8nWorkflows({ + root, + backupDir: args.backupDir ?? ".agent-reliability/n8n-backup" + }); + io.stdout(`Backed up ${backup.files.length} n8n workflow file(s) to ${backup.backupDir}`); + return 0; + } + if (args.command === "cost-report") { + const report = buildCostReport({ + root, + tracePath: args.tracePath, + budgetUsd: args.budgetUsd, + outDir: args.outDir + }); + const written = writeCostReport(report, path.resolve(root, args.outDir)); + io.stdout(formatCostMarkdown(report)); + io.stdout(""); + io.stdout("Written cost reports:"); + for (const file of written) io.stdout(`- ${path.relative(io.cwd, file).replaceAll("\\", "/")}`); + return report.status === "pass" ? 0 : 1; + } return 0; } catch (error) { io.stderr(`agent-reliability-kit: ${(error as Error).message}`); diff --git a/src/core/files.ts b/src/core/files.ts index f53af83..3c00b35 100644 --- a/src/core/files.ts +++ b/src/core/files.ts @@ -10,6 +10,7 @@ const SKIP_DIRS = new Set([ "dist", "build", "coverage", + ".tmp", ".next", ".turbo", ".venv", @@ -118,4 +119,3 @@ export function findFiles(files: RepoFile[], matcher: RegExp): RepoFile[] { export function lineNumber(text: string, index: number): number { return text.slice(0, index).split("\n").length; } - diff --git a/src/core/redaction.ts b/src/core/redaction.ts new file mode 100644 index 0000000..a15f44a --- /dev/null +++ b/src/core/redaction.ts @@ -0,0 +1,46 @@ +export interface SecretMatch { + label: string; + value: string; + index: number; +} + +const SECRET_PATTERNS = [ + { label: "GitHub token", pattern: /gh[pousr]_[A-Za-z0-9_]{20,}/g }, + { label: "OpenAI-style key", pattern: /sk-[A-Za-z0-9_-]{20,}/g }, + { label: "AWS access key", pattern: /AKIA[0-9A-Z]{16}/g }, + { label: "Generic secret assignment", pattern: /\b(?:api[_-]?key|secret|token|password)\s*[:=]\s*["']?[A-Za-z0-9_./+=-]{18,}/gi } +]; + +export function findSecretLikeValues(text: string): SecretMatch[] { + const matches: SecretMatch[] = []; + for (const secret of SECRET_PATTERNS) { + for (const match of text.matchAll(secret.pattern)) { + const value = match[0]; + if (isClearlyFake(value)) continue; + matches.push({ + label: secret.label, + value, + index: match.index ?? 0 + }); + } + } + return matches.sort((left, right) => left.index - right.index); +} + +export function redactSecretLikeText(text: string): string { + let output = text; + const uniqueValues = [...new Set(findSecretLikeValues(text).map((match) => match.value))]; + for (const value of uniqueValues) { + output = output.replaceAll(value, redactValue(value)); + } + return output; +} + +export function redactValue(value: string): string { + if (value.length <= 8) return "[redacted]"; + return `${value.slice(0, 4)}...[redacted]...${value.slice(-4)}`; +} + +export function isClearlyFake(value: string): boolean { + return /(example|fake|dummy|placeholder|test-only|scannerDetectionOnly|xxxx|0000|1234)/i.test(value); +} diff --git a/src/core/scan.ts b/src/core/scan.ts index 4ee39bb..10592d0 100644 --- a/src/core/scan.ts +++ b/src/core/scan.ts @@ -6,6 +6,7 @@ import { scanAgentInstructions } from "../scanners/agentInstructions.js"; import { scanAiAgentRisk } from "../scanners/aiAgentRisk.js"; import { scanCommands } from "../scanners/commands.js"; import { scanGithubActions } from "../scanners/githubActions.js"; +import { scanN8nWorkflows } from "../scanners/n8n.js"; import { scanReadme } from "../scanners/readme.js"; import { scanReleaseReadiness } from "../scanners/releaseReadiness.js"; import { scanSecrets } from "../scanners/secrets.js"; @@ -20,7 +21,8 @@ const scanners = [ scanReleaseReadiness, scanSecrets, scanGithubActions, - scanAiAgentRisk + scanAiAgentRisk, + scanN8nWorkflows ]; export function scanRepository(inputRoot: string): Report { diff --git a/src/cost/costReport.ts b/src/cost/costReport.ts new file mode 100644 index 0000000..a8b33b5 --- /dev/null +++ b/src/cost/costReport.ts @@ -0,0 +1,206 @@ +import fs from "node:fs"; +import path from "node:path"; +import { resolveRoot } from "../core/files.js"; + +export interface CostReportOptions { + root: string; + tracePath?: string; + budgetUsd?: number; + outDir: string; +} + +export interface CostEvent { + provider: string; + model: string; + inputTokens: number; + outputTokens: number; + totalTokens: number; + costUsd: number; +} + +export interface CostBucket { + provider: string; + model: string; + calls: number; + inputTokens: number; + outputTokens: number; + totalTokens: number; + costUsd: number; +} + +export interface CostReport { + generatedAt: string; + root: string; + traceFiles: string[]; + budgetUsd?: number; + total: Omit; + byModel: CostBucket[]; + alerts: string[]; + status: "pass" | "warn"; +} + +export function buildCostReport(options: CostReportOptions): CostReport { + const root = resolveRoot(options.root); + const traceFiles = resolveTraceFiles(root, options.tracePath); + const events = traceFiles.flatMap((file) => readCostEvents(file)); + const byModel = summarizeByModel(events); + const total = byModel.reduce>((acc, bucket) => ({ + calls: acc.calls + bucket.calls, + inputTokens: acc.inputTokens + bucket.inputTokens, + outputTokens: acc.outputTokens + bucket.outputTokens, + totalTokens: acc.totalTokens + bucket.totalTokens, + costUsd: acc.costUsd + bucket.costUsd + }), { calls: 0, inputTokens: 0, outputTokens: 0, totalTokens: 0, costUsd: 0 }); + const alerts: string[] = []; + + if (traceFiles.length === 0) alerts.push("No trace files found. Pass --trace or write JSON/JSONL traces under .agent-reliability/traces."); + if (events.length === 0 && traceFiles.length > 0) alerts.push("Trace files were found, but no token or cost events could be parsed."); + if (options.budgetUsd !== undefined && total.costUsd > options.budgetUsd) { + alerts.push(`Cost ${formatMoney(total.costUsd)} is above budget ${formatMoney(options.budgetUsd)}.`); + } + + return { + generatedAt: new Date().toISOString(), + root, + traceFiles: traceFiles.map((file) => path.relative(root, file).replaceAll("\\", "/")), + budgetUsd: options.budgetUsd, + total, + byModel, + alerts, + status: alerts.length > 0 ? "warn" : "pass" + }; +} + +export function writeCostReport(report: CostReport, outDir: string): string[] { + fs.mkdirSync(outDir, { recursive: true }); + const jsonPath = path.join(outDir, "cost-report.json"); + const mdPath = path.join(outDir, "cost-report.md"); + fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2), "utf8"); + fs.writeFileSync(mdPath, formatCostMarkdown(report), "utf8"); + return [jsonPath, mdPath]; +} + +export function formatCostMarkdown(report: CostReport): string { + const lines = [ + "# AI Cost Guard Report", + "", + `Generated: ${report.generatedAt}`, + "", + `Status: **${report.status.toUpperCase()}**`, + "", + `Calls: ${report.total.calls}`, + `Input tokens: ${report.total.inputTokens}`, + `Output tokens: ${report.total.outputTokens}`, + `Total tokens: ${report.total.totalTokens}`, + `Cost: ${formatMoney(report.total.costUsd)}`, + report.budgetUsd === undefined ? "Budget: not set" : `Budget: ${formatMoney(report.budgetUsd)}`, + "", + "## By Model", + "", + "| Provider | Model | Calls | Tokens | Cost |", + "| --- | --- | ---: | ---: | ---: |" + ]; + + for (const bucket of report.byModel) { + lines.push(`| ${bucket.provider} | ${bucket.model} | ${bucket.calls} | ${bucket.totalTokens} | ${formatMoney(bucket.costUsd)} |`); + } + if (report.byModel.length === 0) lines.push("| none | none | 0 | 0 | $0.0000 |"); + + lines.push("", "## Alerts", ""); + if (report.alerts.length === 0) { + lines.push("No cost alerts."); + } else { + for (const alert of report.alerts) lines.push(`- ${alert}`); + } + return lines.join("\n"); +} + +function resolveTraceFiles(root: string, tracePath?: string): string[] { + const start = tracePath ? resolveInputPath(root, tracePath) : path.join(root, ".agent-reliability", "traces"); + if (!fs.existsSync(start)) return []; + if (fs.statSync(start).isFile()) return [start]; + const files: string[] = []; + for (const entry of fs.readdirSync(start, { withFileTypes: true })) { + const absolutePath = path.join(start, entry.name); + if (entry.isDirectory()) { + files.push(...resolveTraceFiles(root, absolutePath)); + } else if (entry.isFile() && /\.(json|jsonl)$/i.test(entry.name)) { + files.push(absolutePath); + } + } + return files; +} + +function readCostEvents(file: string): CostEvent[] { + const text = fs.readFileSync(file, "utf8"); + const records = file.endsWith(".jsonl") + ? text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).map(parseJson) + : [parseJson(text)]; + return records.flatMap((record) => normalizeRecord(record)).filter((event): event is CostEvent => event !== null); +} + +function normalizeRecord(record: unknown): CostEvent[] { + if (Array.isArray(record)) return record.flatMap(normalizeRecord); + if (!record || typeof record !== "object") return []; + const item = record as Record; + if (Array.isArray(item.events)) return item.events.flatMap(normalizeRecord); + if (Array.isArray(item.calls)) return item.calls.flatMap(normalizeRecord); + + const usage = typeof item.usage === "object" && item.usage !== null ? item.usage as Record : {}; + const provider = stringField(item.provider) ?? stringField(item.modelProvider) ?? "unknown"; + const model = stringField(item.model) ?? stringField(item.modelName) ?? "unknown"; + const inputTokens = numberField(item.inputTokens) ?? numberField(item.input_tokens) ?? numberField(item.prompt_tokens) ?? numberField(usage.inputTokens) ?? numberField(usage.prompt_tokens) ?? 0; + const outputTokens = numberField(item.outputTokens) ?? numberField(item.output_tokens) ?? numberField(item.completion_tokens) ?? numberField(usage.outputTokens) ?? numberField(usage.completion_tokens) ?? 0; + const totalTokens = numberField(item.totalTokens) ?? numberField(item.total_tokens) ?? numberField(usage.totalTokens) ?? inputTokens + outputTokens; + const costUsd = numberField(item.costUsd) ?? numberField(item.cost_usd) ?? numberField(item.usd) ?? 0; + + if (inputTokens + outputTokens + totalTokens + costUsd === 0) return []; + return [{ provider, model, inputTokens, outputTokens, totalTokens, costUsd }]; +} + +function summarizeByModel(events: CostEvent[]): CostBucket[] { + const buckets = new Map(); + for (const event of events) { + const key = `${event.provider}\u0000${event.model}`; + const current = buckets.get(key) ?? { + provider: event.provider, + model: event.model, + calls: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + costUsd: 0 + }; + current.calls += 1; + current.inputTokens += event.inputTokens; + current.outputTokens += event.outputTokens; + current.totalTokens += event.totalTokens; + current.costUsd += event.costUsd; + buckets.set(key, current); + } + return [...buckets.values()].sort((left, right) => right.costUsd - left.costUsd); +} + +function parseJson(text: string): unknown { + try { + return JSON.parse(text) as unknown; + } catch { + return null; + } +} + +function numberField(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +function stringField(value: unknown): string | undefined { + return typeof value === "string" && value.trim() !== "" ? value : undefined; +} + +function resolveInputPath(root: string, input: string): string { + return path.isAbsolute(input) ? input : path.join(root, input); +} + +function formatMoney(value: number): string { + return `$${value.toFixed(4)}`; +} diff --git a/src/mcp/registry.ts b/src/mcp/registry.ts new file mode 100644 index 0000000..f15e083 --- /dev/null +++ b/src/mcp/registry.ts @@ -0,0 +1,303 @@ +import fs from "node:fs"; +import path from "node:path"; +import { resolveRoot } from "../core/files.js"; +import { findSecretLikeValues } from "../core/redaction.js"; +import type { Finding } from "../types.js"; + +export interface McpRegistryOptions { + root: string; + registryPath?: string; + configPath?: string; + outDir: string; +} + +export interface McpRegistryReport { + generatedAt: string; + root: string; + registryPath: string; + configPaths: string[]; + approvedServers: string[]; + checkedServers: string[]; + findings: Finding[]; + status: "pass" | "fail"; +} + +interface RegistryServer { + name: string; + approved?: boolean; + trustScore?: number; + permissions?: string[]; + allowedCommands?: string[]; + allowedUrls?: string[]; + riskOwner?: string; + riskReason?: string; +} + +interface RegistryFile { + servers?: unknown; +} + +interface McpServerConfig { + name: string; + command?: string; + url?: string; + env?: unknown; +} + +const DEFAULT_CONFIG_PATHS = [ + ".mcp.json", + "mcp.json", + ".cursor/mcp.json", + ".vscode/mcp.json", + ".claude/mcp.json", + "claude_desktop_config.json" +]; + +export function runMcpRegistryAudit(options: McpRegistryOptions): McpRegistryReport { + const root = resolveRoot(options.root); + const registryPath = resolveInputPath(root, options.registryPath ?? ".agent-reliability/mcp-registry.json"); + const registry = readRegistry(registryPath); + const configPaths = resolveConfigPaths(root, options.configPath); + const findings: Finding[] = []; + const checkedServers: string[] = []; + + if (!fs.existsSync(registryPath)) { + findings.push({ + id: "mcp.registry.missing", + title: "Private MCP registry is missing", + severity: "high", + scanner: "mcp-registry", + file: path.relative(root, registryPath).replaceAll("\\", "/"), + why: "Teams need an explicit allowlist before agents can safely use file, shell, browser, network, or SaaS tools.", + next: "Create .agent-reliability/mcp-registry.json with approved servers, permissions, trust score, and risk owner." + }); + } + + if (configPaths.length === 0) { + findings.push({ + id: "mcp.registry.no-config", + title: "No MCP config found to check", + severity: "info", + scanner: "mcp-registry", + why: "The registry can only protect MCP configs that are committed or passed explicitly.", + next: "Pass --config path/to/mcp.json or commit a redacted repo-local MCP config template." + }); + } + + const registryByName = new Map(registry.map((server) => [server.name, server])); + for (const configFile of configPaths) { + const text = fs.readFileSync(configFile, "utf8"); + for (const secret of findSecretLikeValues(text)) { + findings.push({ + id: "mcp.config.secret-like-value", + title: `MCP config contains possible ${secret.label}`, + severity: "critical", + scanner: "mcp-registry", + file: path.relative(root, configFile).replaceAll("\\", "/"), + evidence: "[redacted]", + why: "MCP configs are frequently pasted into issue reports and agent setup docs.", + next: "Move credentials to environment variables or a secret manager, rotate real values, and keep only placeholders in config examples." + }); + } + + for (const server of readMcpServers(text)) { + checkedServers.push(server.name); + const registryEntry = registryByName.get(server.name); + const relativeConfig = path.relative(root, configFile).replaceAll("\\", "/"); + if (!registryEntry) { + findings.push({ + id: "mcp.server.not-allowlisted", + title: "MCP server is not in the private allowlist", + severity: "high", + scanner: "mcp-registry", + file: relativeConfig, + evidence: server.name, + why: "Unreviewed MCP servers can expose files, commands, browser state, network calls, or SaaS data to agents.", + next: "Add the server to the private registry with an owner, trust score, allowed command or URL, and permission scope before use." + }); + continue; + } + + if (registryEntry.approved === false) { + findings.push({ + id: "mcp.server.disabled", + title: "MCP server is present but not approved", + severity: "high", + scanner: "mcp-registry", + file: relativeConfig, + evidence: server.name, + why: "A disabled registry entry means the team has explicitly not approved the server for agent use.", + next: "Remove the server from config or update the registry after a documented review." + }); + } + + if ((registryEntry.trustScore ?? 100) < 70) { + findings.push({ + id: "mcp.server.low-trust", + title: "MCP server trust score is below policy target", + severity: "medium", + scanner: "mcp-registry", + file: relativeConfig, + evidence: `${server.name}: ${registryEntry.trustScore}`, + why: "Low-trust tools should not be available to coding agents without supervision.", + next: "Raise the trust score with a review, limit permissions, or require human approval." + }); + } + + if (server.command && registryEntry.allowedCommands && !registryEntry.allowedCommands.includes(server.command)) { + findings.push({ + id: "mcp.command.not-approved", + title: "MCP command does not match the registry allowlist", + severity: "high", + scanner: "mcp-registry", + file: relativeConfig, + evidence: `${server.name}: ${server.command}`, + why: "A changed command can swap an approved MCP server for arbitrary local execution.", + next: "Update the config to the approved command or review and add the new command to the registry." + }); + } + + if (server.url && registryEntry.allowedUrls && !registryEntry.allowedUrls.some((allowed) => server.url?.startsWith(allowed))) { + findings.push({ + id: "mcp.url.not-approved", + title: "MCP URL does not match the registry allowlist", + severity: "high", + scanner: "mcp-registry", + file: relativeConfig, + evidence: `${server.name}: ${server.url}`, + why: "Remote MCP URLs can redirect agent data to an unreviewed service.", + next: "Use an approved URL prefix or add a reviewed remote endpoint to the registry." + }); + } + + const riskyPermissions = (registryEntry.permissions ?? []).filter((permission) => /file|shell|browser|network|secret|credential/i.test(permission)); + if (riskyPermissions.length > 0 && (!registryEntry.riskOwner || !registryEntry.riskReason)) { + findings.push({ + id: "mcp.permission.owner-missing", + title: "Risky MCP permissions need an owner and reason", + severity: "medium", + scanner: "mcp-registry", + file: path.relative(root, registryPath).replaceAll("\\", "/"), + evidence: `${server.name}: ${riskyPermissions.join(", ")}`, + why: "High-power MCP tools need a named owner so teams know who accepts the risk.", + next: "Add riskOwner and riskReason to the registry entry." + }); + } + } + } + + return { + generatedAt: new Date().toISOString(), + root, + registryPath: path.relative(root, registryPath).replaceAll("\\", "/"), + configPaths: configPaths.map((config) => path.relative(root, config).replaceAll("\\", "/")), + approvedServers: registry.filter((server) => server.approved !== false).map((server) => server.name), + checkedServers: [...new Set(checkedServers)], + findings, + status: findings.some((finding) => finding.severity === "critical" || finding.severity === "high") ? "fail" : "pass" + }; +} + +export function writeMcpRegistryReport(report: McpRegistryReport, outDir: string): string[] { + fs.mkdirSync(outDir, { recursive: true }); + const jsonPath = path.join(outDir, "mcp-registry-report.json"); + const mdPath = path.join(outDir, "mcp-registry-report.md"); + fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2), "utf8"); + fs.writeFileSync(mdPath, formatMcpRegistryMarkdown(report), "utf8"); + return [jsonPath, mdPath]; +} + +export function formatMcpRegistryMarkdown(report: McpRegistryReport): string { + const lines = [ + "# MCP Registry Audit", + "", + `Generated: ${report.generatedAt}`, + "", + `Status: **${report.status.toUpperCase()}**`, + "", + `Registry: \`${report.registryPath}\``, + `Configs: ${report.configPaths.length > 0 ? report.configPaths.map((item) => `\`${item}\``).join(", ") : "none"}`, + `Approved servers: ${report.approvedServers.length > 0 ? report.approvedServers.map((item) => `\`${item}\``).join(", ") : "none"}`, + `Checked servers: ${report.checkedServers.length > 0 ? report.checkedServers.map((item) => `\`${item}\``).join(", ") : "none"}`, + "" + ]; + if (report.findings.length === 0) { + lines.push("## Findings", "", "No MCP registry findings."); + return lines.join("\n"); + } + lines.push("## Findings", ""); + for (const finding of report.findings) { + lines.push(`### ${finding.severity.toUpperCase()} ${finding.title}`); + lines.push(""); + lines.push(`- Rule: \`${finding.id}\``); + if (finding.file) lines.push(`- File: \`${finding.file}\``); + if (finding.evidence) lines.push(`- Evidence: \`${finding.evidence}\``); + lines.push(`- Why: ${finding.why}`); + lines.push(`- Next: ${finding.next}`); + lines.push(""); + } + return lines.join("\n"); +} + +function resolveInputPath(root: string, input: string): string { + return path.isAbsolute(input) ? input : path.join(root, input); +} + +function resolveConfigPaths(root: string, configPath?: string): string[] { + if (configPath) { + const absolutePath = resolveInputPath(root, configPath); + return fs.existsSync(absolutePath) ? [absolutePath] : []; + } + return DEFAULT_CONFIG_PATHS.map((relativePath) => path.join(root, relativePath)).filter((file) => fs.existsSync(file)); +} + +function readRegistry(registryPath: string): RegistryServer[] { + if (!fs.existsSync(registryPath)) return []; + try { + const parsed = JSON.parse(fs.readFileSync(registryPath, "utf8")) as RegistryFile; + if (Array.isArray(parsed.servers)) return parsed.servers.map(normalizeRegistryServer).filter(Boolean) as RegistryServer[]; + } catch { + return []; + } + return []; +} + +function normalizeRegistryServer(value: unknown): RegistryServer | null { + if (!value || typeof value !== "object") return null; + const item = value as Record; + if (typeof item.name !== "string" || item.name.trim() === "") return null; + return { + name: item.name, + approved: typeof item.approved === "boolean" ? item.approved : undefined, + trustScore: typeof item.trustScore === "number" ? item.trustScore : undefined, + permissions: arrayOfStrings(item.permissions), + allowedCommands: arrayOfStrings(item.allowedCommands), + allowedUrls: arrayOfStrings(item.allowedUrls), + riskOwner: typeof item.riskOwner === "string" ? item.riskOwner : undefined, + riskReason: typeof item.riskReason === "string" ? item.riskReason : undefined + }; +} + +function readMcpServers(text: string): McpServerConfig[] { + try { + const parsed = JSON.parse(text) as Record; + const block = parsed.mcpServers ?? parsed.servers; + if (!block || typeof block !== "object" || Array.isArray(block)) return []; + return Object.entries(block as Record).map(([name, value]) => { + const config = typeof value === "object" && value !== null ? value as Record : {}; + return { + name, + command: typeof config.command === "string" ? config.command : undefined, + url: typeof config.url === "string" ? config.url : undefined, + env: config.env + }; + }); + } catch { + return []; + } +} + +function arrayOfStrings(value: unknown): string[] | undefined { + if (!Array.isArray(value)) return undefined; + return value.filter((item): item is string => typeof item === "string"); +} diff --git a/src/n8n/backup.ts b/src/n8n/backup.ts new file mode 100644 index 0000000..393294c --- /dev/null +++ b/src/n8n/backup.ts @@ -0,0 +1,70 @@ +import fs from "node:fs"; +import path from "node:path"; +import { listRepoFiles, readTextFile, resolveRoot } from "../core/files.js"; +import { redactSecretLikeText } from "../core/redaction.js"; +import { looksLikeN8nWorkflow } from "../scanners/n8n.js"; + +export interface N8nBackupOptions { + root: string; + backupDir: string; +} + +export interface N8nBackupReport { + generatedAt: string; + root: string; + backupDir: string; + files: Array<{ + source: string; + backup: string; + redacted: boolean; + }>; +} + +export function backupN8nWorkflows(options: N8nBackupOptions): N8nBackupReport { + const root = resolveRoot(options.root); + const backupDir = path.isAbsolute(options.backupDir) ? options.backupDir : path.join(root, options.backupDir); + const files = []; + + fs.mkdirSync(backupDir, { recursive: true }); + for (const file of listRepoFiles(root)) { + const text = readTextFile(file); + if (!text || !looksLikeN8nWorkflow(text, file.relativePath)) continue; + const redactedText = redactSecretLikeText(text); + const parsed = parseJson(redactedText); + const target = path.join(backupDir, file.relativePath.replaceAll("/", "__")); + fs.writeFileSync(target, parsed ? `${JSON.stringify(parsed, null, 2)}\n` : redactedText, "utf8"); + files.push({ + source: file.relativePath, + backup: path.relative(root, target).replaceAll("\\", "/"), + redacted: redactedText !== text + }); + } + + fs.writeFileSync(path.join(backupDir, "README.md"), [ + "# n8n Workflow Backup", + "", + "This directory is generated by Agent Reliability Kit.", + "", + "- Workflow JSON is formatted for Git review.", + "- Token-like values are redacted before writing.", + "- Review every public webhook, code node, and command execution node before sharing.", + "" + ].join("\n"), "utf8"); + + const report: N8nBackupReport = { + generatedAt: new Date().toISOString(), + root, + backupDir: path.relative(root, backupDir).replaceAll("\\", "/"), + files + }; + fs.writeFileSync(path.join(backupDir, "backup-report.json"), JSON.stringify(report, null, 2), "utf8"); + return report; +} + +function parseJson(text: string): unknown | null { + try { + return JSON.parse(text) as unknown; + } catch { + return null; + } +} diff --git a/src/scanners/n8n.ts b/src/scanners/n8n.ts new file mode 100644 index 0000000..d6b906c --- /dev/null +++ b/src/scanners/n8n.ts @@ -0,0 +1,131 @@ +import { findFiles, lineNumber, readTextFile } from "../core/files.js"; +import { findSecretLikeValues } from "../core/redaction.js"; +import type { Finding, ScanContext, ScannerResult } from "../types.js"; + +interface N8nNode { + name?: unknown; + type?: unknown; + parameters?: unknown; +} + +interface N8nWorkflow { + nodes?: unknown; +} + +export function scanN8nWorkflows(context: ScanContext): ScannerResult { + const findings: Finding[] = []; + const workflowFiles: string[] = []; + + for (const file of findFiles(context.files, /\.json$/i)) { + const text = readTextFile(file); + if (!text || !looksLikeN8nWorkflow(text, file.relativePath)) continue; + workflowFiles.push(file.relativePath); + + for (const secret of findSecretLikeValues(text)) { + findings.push({ + id: "n8n.secret-like-value", + title: `n8n workflow contains possible ${secret.label}`, + severity: "critical", + scanner: "n8n-safety", + file: file.relativePath, + line: lineNumber(text, secret.index), + evidence: "[redacted]", + why: "n8n workflow exports are often copied into Git, support tickets, and templates, so embedded credentials can leak quickly.", + next: "Move the value to n8n credentials or environment variables, rotate it if real, and keep only redacted workflow backups in Git." + }); + } + + const parsed = parseJson(text); + if (!parsed) continue; + const nodes = getWorkflowNodes(parsed); + for (const node of nodes) { + const nodeName = typeof node.name === "string" ? node.name : "unnamed node"; + const nodeType = typeof node.type === "string" ? node.type : ""; + const parametersText = JSON.stringify(node.parameters ?? {}); + const nodeLine = lineNumber(text, findNodeIndex(text, nodeName, nodeType)); + + if (/webhook/i.test(nodeType) && !hasWebhookAuthentication(node.parameters)) { + findings.push({ + id: "n8n.public-webhook", + title: "n8n webhook node has no explicit authentication", + severity: "high", + scanner: "n8n-safety", + file: file.relativePath, + line: nodeLine, + evidence: nodeName, + why: "Public n8n webhooks can trigger automations from the internet when authentication is missing or implicit.", + next: "Require authentication, restrict the webhook path, and document who may call it." + }); + } + + if (/executeCommand|ssh/i.test(nodeType)) { + findings.push({ + id: "n8n.command-execution-node", + title: "n8n workflow can execute commands", + severity: "critical", + scanner: "n8n-safety", + file: file.relativePath, + line: nodeLine, + evidence: nodeName, + why: "Command execution nodes can turn a workflow import into host-level code execution.", + next: "Remove the node, isolate it on a locked-down worker, or require a documented human approval step." + }); + } + + if (/code|function/i.test(nodeType) && /(eval\(|child_process|process\.env|require\(["']fs["']|fetch\()/i.test(parametersText)) { + findings.push({ + id: "n8n.risky-code-node", + title: "n8n code node uses risky runtime APIs", + severity: "high", + scanner: "n8n-safety", + file: file.relativePath, + line: nodeLine, + evidence: nodeName, + why: "Code nodes that read environment variables, evaluate strings, or call network/file APIs are hard to review in shared workflow templates.", + next: "Move risky code into a reviewed service, add tests, and keep the workflow node as a narrow API call." + }); + } + } + } + + return { + findings, + facts: { + n8nWorkflowFiles: workflowFiles + } + }; +} + +export function looksLikeN8nWorkflow(text: string, relativePath = ""): boolean { + if (/(^|\/)(n8n|workflows?)\//i.test(relativePath)) return true; + return /"nodes"\s*:\s*\[/.test(text) && /"n8n-nodes-base\./.test(text); +} + +function parseJson(text: string): unknown | null { + try { + return JSON.parse(text) as unknown; + } catch { + return null; + } +} + +function getWorkflowNodes(value: unknown): N8nNode[] { + const workflow = value as N8nWorkflow; + if (!Array.isArray(workflow.nodes)) return []; + return workflow.nodes.filter((node): node is N8nNode => typeof node === "object" && node !== null); +} + +function hasWebhookAuthentication(parameters: unknown): boolean { + if (!parameters || typeof parameters !== "object") return false; + const params = parameters as Record; + const auth = params.authentication ?? params.httpMethod; + if (typeof auth === "string" && /none|noAuth/i.test(auth)) return false; + return typeof params.authentication === "string" && params.authentication.trim() !== ""; +} + +function findNodeIndex(text: string, nodeName: string, nodeType: string): number { + const byName = text.indexOf(nodeName); + if (byName >= 0) return byName; + const byType = text.indexOf(nodeType); + return byType >= 0 ? byType : 0; +} diff --git a/src/team/teamAudit.ts b/src/team/teamAudit.ts new file mode 100644 index 0000000..855c107 --- /dev/null +++ b/src/team/teamAudit.ts @@ -0,0 +1,239 @@ +import fs from "node:fs"; +import path from "node:path"; +import { scanRepository } from "../core/scan.js"; +import type { Report } from "../types.js"; + +export interface TeamAuditOptions { + root: string; + policyPath?: string; + outDir: string; + slackPayloadPath?: string; +} + +interface TeamPolicy { + minScore: number; + maxCritical: number; + maxHigh: number; + requiredFiles: string[]; + requireMcpRegistry: boolean; + slackChannel?: string; +} + +interface HistoryItem { + generatedAt: string; + score: number; + critical: number; + high: number; +} + +export interface TeamAuditReport { + generatedAt: string; + root: string; + status: "pass" | "fail"; + policyPath: string; + policy: TeamPolicy; + current: HistoryItem; + history: HistoryItem[]; + checks: Array<{ + name: string; + passed: boolean; + detail: string; + }>; + slackPayload: SlackPayload; +} + +interface SlackPayload { + text: string; + channel?: string; + blocks: Array<{ + type: "section"; + text: { + type: "mrkdwn"; + text: string; + }; + }>; +} + +const DEFAULT_POLICY: TeamPolicy = { + minScore: 85, + maxCritical: 0, + maxHigh: 0, + requiredFiles: ["AGENTS.md", "SECURITY.md", "README.md"], + requireMcpRegistry: false +}; + +export function runTeamAudit(options: TeamAuditOptions): TeamAuditReport { + const root = path.resolve(options.root); + const outDir = path.resolve(root, options.outDir); + const policyPath = path.resolve(root, options.policyPath ?? ".agent-reliability/team-policy.json"); + const policy = readPolicy(policyPath); + const report = scanRepository(root); + + fs.mkdirSync(outDir, { recursive: true }); + const historyDir = path.join(outDir, "history"); + fs.mkdirSync(historyDir, { recursive: true }); + const historyFile = path.join(historyDir, `${safeTimestamp(report.generatedAt)}.json`); + fs.writeFileSync(historyFile, JSON.stringify(report, null, 2), "utf8"); + + const current = toHistoryItem(report); + const history = readHistory(historyDir); + const checks = [ + { + name: "Minimum reliability score", + passed: report.score >= policy.minScore, + detail: `${report.score}/100 >= ${policy.minScore}/100` + }, + { + name: "Critical finding budget", + passed: report.summary.critical <= policy.maxCritical, + detail: `${report.summary.critical} critical <= ${policy.maxCritical}` + }, + { + name: "High finding budget", + passed: report.summary.high <= policy.maxHigh, + detail: `${report.summary.high} high <= ${policy.maxHigh}` + }, + ...policy.requiredFiles.map((relativePath) => ({ + name: `Required file ${relativePath}`, + passed: fs.existsSync(path.join(root, relativePath)), + detail: fs.existsSync(path.join(root, relativePath)) ? "present" : "missing" + })), + { + name: "Private MCP registry", + passed: !policy.requireMcpRegistry || fs.existsSync(path.join(root, ".agent-reliability", "mcp-registry.json")), + detail: policy.requireMcpRegistry ? ".agent-reliability/mcp-registry.json required" : "not required by policy" + } + ]; + const status = checks.every((check) => check.passed) ? "pass" : "fail"; + const slackPayload = buildSlackPayload(status, policy, current, checks); + const audit: TeamAuditReport = { + generatedAt: new Date().toISOString(), + root, + status, + policyPath: path.relative(root, policyPath).replaceAll("\\", "/"), + policy, + current, + history, + checks, + slackPayload + }; + + fs.writeFileSync(path.join(outDir, "team-audit.json"), JSON.stringify(audit, null, 2), "utf8"); + fs.writeFileSync(path.join(outDir, "team-audit.md"), formatTeamAuditMarkdown(audit), "utf8"); + const slackPath = path.resolve(root, options.slackPayloadPath ?? path.join(options.outDir, "slack-payload.json")); + fs.mkdirSync(path.dirname(slackPath), { recursive: true }); + fs.writeFileSync(slackPath, JSON.stringify(slackPayload, null, 2), "utf8"); + return audit; +} + +export function formatTeamAuditMarkdown(report: TeamAuditReport): string { + const lines = [ + "# Team Audit Report", + "", + `Generated: ${report.generatedAt}`, + "", + `Status: **${report.status.toUpperCase()}**`, + `Policy: \`${report.policyPath}\``, + "", + "## Current Scan", + "", + `- Score: ${report.current.score}/100`, + `- Critical: ${report.current.critical}`, + `- High: ${report.current.high}`, + "", + "## Policy Checks", + "", + "| Check | Status | Detail |", + "| --- | --- | --- |" + ]; + for (const check of report.checks) { + lines.push(`| ${check.name} | ${check.passed ? "pass" : "fail"} | ${check.detail} |`); + } + lines.push("", "## Scan History", ""); + if (report.history.length === 0) { + lines.push("No scan history yet."); + } else { + lines.push("| Generated | Score | Critical | High |", "| --- | ---: | ---: | ---: |"); + for (const item of report.history.slice(-10)) { + lines.push(`| ${item.generatedAt} | ${item.score} | ${item.critical} | ${item.high} |`); + } + } + lines.push("", "## Slack Payload", "", "A dry-run Slack payload was written locally. No webhook is called by this command."); + return lines.join("\n"); +} + +function readPolicy(policyPath: string): TeamPolicy { + if (!fs.existsSync(policyPath)) return DEFAULT_POLICY; + try { + const parsed = JSON.parse(fs.readFileSync(policyPath, "utf8")) as Partial; + return { + minScore: numberOr(parsed.minScore, DEFAULT_POLICY.minScore), + maxCritical: numberOr(parsed.maxCritical, DEFAULT_POLICY.maxCritical), + maxHigh: numberOr(parsed.maxHigh, DEFAULT_POLICY.maxHigh), + requiredFiles: Array.isArray(parsed.requiredFiles) ? parsed.requiredFiles.map(String) : DEFAULT_POLICY.requiredFiles, + requireMcpRegistry: typeof parsed.requireMcpRegistry === "boolean" ? parsed.requireMcpRegistry : DEFAULT_POLICY.requireMcpRegistry, + slackChannel: typeof parsed.slackChannel === "string" ? parsed.slackChannel : undefined + }; + } catch { + return DEFAULT_POLICY; + } +} + +function readHistory(historyDir: string): HistoryItem[] { + if (!fs.existsSync(historyDir)) return []; + return fs.readdirSync(historyDir) + .filter((file) => file.endsWith(".json")) + .map((file) => path.join(historyDir, file)) + .map(readHistoryFile) + .filter((item): item is HistoryItem => item !== null) + .sort((left, right) => left.generatedAt.localeCompare(right.generatedAt)); +} + +function readHistoryFile(file: string): HistoryItem | null { + try { + return toHistoryItem(JSON.parse(fs.readFileSync(file, "utf8")) as Report); + } catch { + return null; + } +} + +function toHistoryItem(report: Report): HistoryItem { + return { + generatedAt: report.generatedAt, + score: report.score, + critical: report.summary.critical, + high: report.summary.high + }; +} + +function buildSlackPayload(status: "pass" | "fail", policy: TeamPolicy, current: HistoryItem, checks: TeamAuditReport["checks"]): SlackPayload { + const failed = checks.filter((check) => !check.passed); + return { + text: `Agent Reliability team audit ${status.toUpperCase()}: score ${current.score}/100, ${current.critical} critical, ${current.high} high.`, + channel: policy.slackChannel, + blocks: [ + { + type: "section", + text: { + type: "mrkdwn", + text: `*Agent Reliability team audit:* ${status.toUpperCase()}\nScore: ${current.score}/100\nCritical: ${current.critical}\nHigh: ${current.high}` + } + }, + { + type: "section", + text: { + type: "mrkdwn", + text: failed.length === 0 ? "No failed policy checks." : `Failed checks:\n${failed.map((check) => `- ${check.name}: ${check.detail}`).join("\n")}` + } + } + ] + }; +} + +function numberOr(value: unknown, fallback: number): number { + return typeof value === "number" && Number.isFinite(value) ? value : fallback; +} + +function safeTimestamp(value: string): string { + return value.replace(/[:.]/g, "-"); +} diff --git a/tests/cli.test.ts b/tests/cli.test.ts index b0a7d1a..8542b8c 100644 --- a/tests/cli.test.ts +++ b/tests/cli.test.ts @@ -100,4 +100,76 @@ describe("runCli", () => { expect(() => JSON.parse(output)).not.toThrow(); expect(output).not.toContain("Written reports:"); }); + + it("writes team audit history and a dry-run Slack payload", () => { + const repo = createFixtureCopy("clean-node"); + fs.mkdirSync(path.join(repo, ".agent-reliability"), { recursive: true }); + fs.writeFileSync(path.join(repo, ".agent-reliability", "team-policy.json"), JSON.stringify({ + minScore: 80, + maxCritical: 0, + maxHigh: 0, + requiredFiles: ["README.md", "AGENTS.md"], + requireMcpRegistry: false, + slackChannel: "#agent-reliability" + }, null, 2), "utf8"); + const capture = createCapture(); + const code = runCli(["team-audit", repo, "--out", ".agent-reliability/team"], capture.io); + + expect(code).toBe(0); + expect(fs.existsSync(path.join(repo, ".agent-reliability", "team", "team-audit.json"))).toBe(true); + expect(fs.existsSync(path.join(repo, ".agent-reliability", "team", "slack-payload.json"))).toBe(true); + expect(fs.readdirSync(path.join(repo, ".agent-reliability", "team", "history")).some((file) => file.endsWith(".json"))).toBe(true); + }); + + it("checks MCP configs against a private registry", () => { + const repo = createFixtureCopy("mcp-registry"); + const capture = createCapture(); + const code = runCli(["mcp-registry", repo, "--out", ".agent-reliability/mcp-audit"], capture.io); + const report = JSON.parse(fs.readFileSync(path.join(repo, ".agent-reliability", "mcp-audit", "mcp-registry-report.json"), "utf8")) as { + status: string; + findings: Array<{ id: string }>; + }; + + expect(code).toBe(1); + expect(report.status).toBe("fail"); + expect(report.findings.map((finding) => finding.id)).toContain("mcp.server.disabled"); + expect(report.findings.map((finding) => finding.id)).toContain("mcp.server.not-allowlisted"); + expect(report.findings.map((finding) => finding.id)).toContain("mcp.url.not-approved"); + }); + + it("writes n8n-only reports and redacted workflow backups", () => { + const repo = createFixtureCopy("n8n-risk"); + const scanCapture = createCapture(); + const scanCode = runCli(["n8n-scan", repo, "--out", ".agent-reliability/n8n", "--format", "json", "--stdout", "--min-score", "0"], scanCapture.io); + const scanReport = JSON.parse(scanCapture.stdout.join("\n")) as { findings: Array<{ scanner: string; id: string }> }; + + expect(scanCode).toBe(1); + expect(scanReport.findings.every((finding) => finding.scanner === "n8n-safety")).toBe(true); + expect(scanReport.findings.map((finding) => finding.id)).toContain("n8n.command-execution-node"); + + const backupCapture = createCapture(); + const backupCode = runCli(["n8n-backup", repo, "--backup-dir", ".agent-reliability/n8n-backup"], backupCapture.io); + const backupFile = path.join(repo, ".agent-reliability", "n8n-backup", "workflows__risky.json"); + + expect(backupCode).toBe(0); + expect(fs.existsSync(backupFile)).toBe(true); + expect(fs.readFileSync(backupFile, "utf8")).toContain("[redacted]"); + }); + + it("summarizes AI trace costs and fails over budget", () => { + const repo = createFixtureCopy("cost-trace"); + const capture = createCapture(); + const code = runCli(["cost-report", repo, "--budget-usd", "0.50", "--out", ".agent-reliability/cost"], capture.io); + const report = JSON.parse(fs.readFileSync(path.join(repo, ".agent-reliability", "cost", "cost-report.json"), "utf8")) as { + status: string; + total: { costUsd: number; totalTokens: number }; + byModel: Array<{ provider: string }>; + }; + + expect(code).toBe(1); + expect(report.status).toBe("warn"); + expect(report.total.costUsd).toBe(1); + expect(report.total.totalTokens).toBe(4500); + expect(report.byModel.map((bucket) => bucket.provider)).toContain("openai"); + }); }); diff --git a/tests/fixtures/clean-node/SECURITY.md b/tests/fixtures/clean-node/SECURITY.md new file mode 100644 index 0000000..48a8df5 --- /dev/null +++ b/tests/fixtures/clean-node/SECURITY.md @@ -0,0 +1,3 @@ +# Security Policy + +Do not include real secrets in issues, fixtures, examples, or reports. diff --git a/tests/fixtures/mcp-registry/.mcp.json b/tests/fixtures/mcp-registry/.mcp.json new file mode 100644 index 0000000..c8517c3 --- /dev/null +++ b/tests/fixtures/mcp-registry/.mcp.json @@ -0,0 +1,17 @@ +{ + "mcpServers": { + "filesystem": { + "command": "node", + "args": ["server.js"], + "env": { + "ROOT": "." + } + }, + "browser": { + "url": "https://unapproved.example/mcp" + }, + "unknown-remote": { + "url": "https://unknown.example/mcp" + } + } +} diff --git a/tests/fixtures/n8n-risk/README.md b/tests/fixtures/n8n-risk/README.md new file mode 100644 index 0000000..244c21a --- /dev/null +++ b/tests/fixtures/n8n-risk/README.md @@ -0,0 +1,3 @@ +# n8n Risk Fixture + +Synthetic workflow fixture for n8n safety scanner tests. diff --git a/tests/fixtures/n8n-risk/workflows/risky.json b/tests/fixtures/n8n-risk/workflows/risky.json new file mode 100644 index 0000000..5607baa --- /dev/null +++ b/tests/fixtures/n8n-risk/workflows/risky.json @@ -0,0 +1,35 @@ +{ + "name": "Risky n8n workflow fixture", + "nodes": [ + { + "name": "Public intake", + "type": "n8n-nodes-base.webhook", + "parameters": { + "path": "public-intake", + "authentication": "none" + } + }, + { + "name": "Run deployment command", + "type": "n8n-nodes-base.executeCommand", + "parameters": { + "command": "deploy.sh" + } + }, + { + "name": "Risky code", + "type": "n8n-nodes-base.code", + "parameters": { + "jsCode": "const token = process.env.DEPLOY_TOKEN; eval($json.body);" + } + }, + { + "name": "Synthetic secret note", + "type": "n8n-nodes-base.set", + "parameters": { + "notes": "token=SCANNER_ONLY_VALUE_ZYXWVUTSRQPONMLK" + } + } + ], + "connections": {} +} diff --git a/tests/scan.test.ts b/tests/scan.test.ts index 428c483..bcf0716 100644 --- a/tests/scan.test.ts +++ b/tests/scan.test.ts @@ -55,6 +55,16 @@ describe("scanRepository", () => { expect(ids).toContain("ci.no-validation-command"); }); + it("detects risky n8n workflow exports", () => { + const report = scanRepository(path.join(fixtures, "n8n-risk")); + const ids = report.findings.map((finding) => finding.id); + expect(ids).toContain("n8n.public-webhook"); + expect(ids).toContain("n8n.command-execution-node"); + expect(ids).toContain("n8n.risky-code-node"); + expect(ids).toContain("n8n.secret-like-value"); + expect(report.facts.n8nWorkflowFiles).toEqual(["workflows/risky.json"]); + }); + it("detects inline unsafe GitHub Actions syntax", () => { const temp = fs.mkdtempSync(path.join(os.tmpdir(), "ark-inline-action-")); fs.mkdirSync(path.join(temp, ".github", "workflows"), { recursive: true }); From 390f924da2fee72e6946109d33dfcf848654c0d9 Mon Sep 17 00:00:00 2001 From: Aolinge <153434584+aolingge@users.noreply.github.com> Date: Tue, 28 Apr 2026 19:47:01 +0800 Subject: [PATCH 5/5] test: include fixture inputs for CI --- .../.agent-reliability/traces/run.jsonl | 2 ++ .../.agent-reliability/mcp-registry.json | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/fixtures/cost-trace/.agent-reliability/traces/run.jsonl create mode 100644 tests/fixtures/mcp-registry/.agent-reliability/mcp-registry.json diff --git a/tests/fixtures/cost-trace/.agent-reliability/traces/run.jsonl b/tests/fixtures/cost-trace/.agent-reliability/traces/run.jsonl new file mode 100644 index 0000000..0549bcb --- /dev/null +++ b/tests/fixtures/cost-trace/.agent-reliability/traces/run.jsonl @@ -0,0 +1,2 @@ +{"provider":"openai","model":"gpt-5.2","inputTokens":1000,"outputTokens":500,"costUsd":0.25} +{"provider":"anthropic","model":"claude-sonnet","usage":{"prompt_tokens":2000,"completion_tokens":1000},"cost_usd":0.75} diff --git a/tests/fixtures/mcp-registry/.agent-reliability/mcp-registry.json b/tests/fixtures/mcp-registry/.agent-reliability/mcp-registry.json new file mode 100644 index 0000000..977ce7f --- /dev/null +++ b/tests/fixtures/mcp-registry/.agent-reliability/mcp-registry.json @@ -0,0 +1,20 @@ +{ + "servers": [ + { + "name": "filesystem", + "approved": true, + "trustScore": 90, + "permissions": ["filesystem"], + "allowedCommands": ["node"], + "riskOwner": "platform", + "riskReason": "Read-only fixture server for local examples." + }, + { + "name": "browser", + "approved": false, + "trustScore": 45, + "permissions": ["browser", "network"], + "allowedUrls": ["https://approved.example/mcp"] + } + ] +}