From 6288984b5547396bfed4cc26bf20588a0eef41c7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 05:02:34 +0000 Subject: [PATCH 1/9] feat: config wizard + reference architectures + CI + outreach + 4 new skills + i18n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - docs/wizard/ — interactive static config-builder (HTML+JS, GitHub Pages friendly) - docs/reference-architectures/ — 4 full blueprints (homelab, solo-dev, small-agency, road-warrior) - docs/outreach/ — launch drafts (tweet, HN, Reddit, upstream-Nous PR, long blog) - .github/workflows/ci.yml + validate_skills.py — markdown-link-check, yamllint, skill frontmatter linter, prettier advisory - skills/ — +4 (daily-inbox-triage, hermes-weekly, spam-trap, meeting-prep) — total 13 - README-zh.md, README-ja.md — localized entry summaries - templates/config/*.yaml — quoted ${VAR} inside flow mappings (valid YAML) - README: skills 9→13, language links, wizard/ref-arch/outreach rows, CI badge - CHANGELOG + ROADMAP updated Co-Authored-By: Rob --- .github/markdown-link-check.json | 22 + .github/scripts/validate_skills.py | 105 +++++ .github/workflows/ci.yml | 54 +++ .github/yamllint.yml | 26 + CHANGELOG.md | 14 + README-ja.md | 37 ++ README-zh.md | 37 ++ README.md | 12 +- ROADMAP.md | 14 +- docs/outreach/README.md | 11 + docs/outreach/blog-post-long.md | 121 +++++ docs/outreach/hacker-news-post.md | 31 ++ docs/outreach/launch-tweet-thread.md | 84 ++++ docs/outreach/nous-upstream-pr-body.md | 57 +++ docs/outreach/reddit-localllama.md | 41 ++ docs/reference-architectures/README.md | 12 + docs/reference-architectures/homelab.md | 149 ++++++ docs/reference-architectures/road-warrior.md | 153 ++++++ docs/reference-architectures/small-agency.md | 111 +++++ .../reference-architectures/solo-developer.md | 93 ++++ docs/wizard/README.md | 28 ++ docs/wizard/index.html | 446 ++++++++++++++++++ skills/dev/meeting-prep/SKILL.md | 97 ++++ skills/dev/release-notes/SKILL.md | 2 +- skills/ops/daily-inbox-triage/SKILL.md | 97 ++++ skills/ops/hermes-weekly/SKILL.md | 89 ++++ skills/security/spam-trap/SKILL.md | 84 ++++ templates/config/cost-optimized.yaml | 2 +- templates/config/production.yaml | 15 +- templates/config/security-hardened.yaml | 8 +- templates/config/telegram-bot.yaml | 2 +- 31 files changed, 2035 insertions(+), 19 deletions(-) create mode 100644 .github/markdown-link-check.json create mode 100644 .github/scripts/validate_skills.py create mode 100644 .github/workflows/ci.yml create mode 100644 .github/yamllint.yml create mode 100644 README-ja.md create mode 100644 README-zh.md create mode 100644 docs/outreach/README.md create mode 100644 docs/outreach/blog-post-long.md create mode 100644 docs/outreach/hacker-news-post.md create mode 100644 docs/outreach/launch-tweet-thread.md create mode 100644 docs/outreach/nous-upstream-pr-body.md create mode 100644 docs/outreach/reddit-localllama.md create mode 100644 docs/reference-architectures/README.md create mode 100644 docs/reference-architectures/homelab.md create mode 100644 docs/reference-architectures/road-warrior.md create mode 100644 docs/reference-architectures/small-agency.md create mode 100644 docs/reference-architectures/solo-developer.md create mode 100644 docs/wizard/README.md create mode 100644 docs/wizard/index.html create mode 100644 skills/dev/meeting-prep/SKILL.md create mode 100644 skills/ops/daily-inbox-triage/SKILL.md create mode 100644 skills/ops/hermes-weekly/SKILL.md create mode 100644 skills/security/spam-trap/SKILL.md diff --git a/.github/markdown-link-check.json b/.github/markdown-link-check.json new file mode 100644 index 0000000..c22827d --- /dev/null +++ b/.github/markdown-link-check.json @@ -0,0 +1,22 @@ +{ + "ignorePatterns": [ + { "pattern": "^https://t.me/" }, + { "pattern": "^https://example.com/" }, + { "pattern": "^https://install.hermes.nous.ai" }, + { "pattern": "^https://langfuse.yourdomain.com" }, + { "pattern": "^https://hermes.yourdomain.com" }, + { "pattern": "^https://hooks.yourdomain.com" }, + { "pattern": "^http://localhost" }, + { "pattern": "^http://127.0.0.1" } + ], + "httpHeaders": [ + { + "urls": ["https://github.com/", "https://raw.githubusercontent.com/"], + "headers": { "User-Agent": "markdown-link-check" } + } + ], + "retryOn429": true, + "retryCount": 3, + "fallbackRetryDelay": "30s", + "aliveStatusCodes": [200, 206, 429, 403] +} diff --git a/.github/scripts/validate_skills.py b/.github/scripts/validate_skills.py new file mode 100644 index 0000000..f34f55d --- /dev/null +++ b/.github/scripts/validate_skills.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +"""Validate every skills/**/SKILL.md has correct frontmatter. + +Run via: python .github/scripts/validate_skills.py +Exit code 0 = all pass; 1 = any fail. Each failing file is listed with reason. +""" +from __future__ import annotations + +import pathlib +import re +import sys + +import yaml + +REQUIRED_KEYS = {"name", "description", "when_to_use", "toolsets"} +ALLOWED_TOOLSETS = { + "terminal", + "file", + "github", + "delegate_task", + "classify", + "telegram", + "web", + "browser", + "email", + "discord", + "slack", + "memory", +} + +FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---", re.DOTALL) + + +def extract_frontmatter(p: pathlib.Path) -> dict | None: + text = p.read_text(encoding="utf-8") + m = FRONTMATTER_RE.match(text) + if not m: + return None + try: + return yaml.safe_load(m.group(1)) or {} + except yaml.YAMLError as e: + print(f" yaml parse error: {e}") + return None + + +def validate(p: pathlib.Path) -> list[str]: + errs: list[str] = [] + fm = extract_frontmatter(p) + if fm is None: + return ["missing or unparseable frontmatter"] + + missing = REQUIRED_KEYS - set(fm.keys()) + if missing: + errs.append(f"missing required keys: {sorted(missing)}") + + toolsets = fm.get("toolsets", []) + if not isinstance(toolsets, list): + errs.append("toolsets must be a list") + else: + unknown = [t for t in toolsets if t not in ALLOWED_TOOLSETS] + if unknown: + errs.append(f"unknown toolsets: {unknown} (allowed: {sorted(ALLOWED_TOOLSETS)})") + + when = fm.get("when_to_use", []) + if not isinstance(when, list) or not when: + errs.append("when_to_use must be a non-empty list of triggers") + + desc = fm.get("description", "") + if not isinstance(desc, str) or len(desc) < 10: + errs.append("description must be a >=10-char string") + + return errs + + +def main() -> int: + root = pathlib.Path(__file__).resolve().parents[2] / "skills" + if not root.is_dir(): + print(f"::error::no skills/ dir at {root}") + return 1 + + skills = sorted(root.rglob("SKILL.md")) + if not skills: + print(f"::warning::no SKILL.md files found under {root}") + return 0 + + total_fails = 0 + for p in skills: + rel = p.relative_to(root.parent) + errs = validate(p) + if errs: + total_fails += 1 + print(f"::error file={rel}::{'; '.join(errs)}") + else: + print(f"ok {rel}") + + if total_fails: + print(f"\n{total_fails}/{len(skills)} skill(s) failed validation", file=sys.stderr) + return 1 + + print(f"\nAll {len(skills)} skill(s) passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5364cb1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,54 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + markdown-links: + name: markdown-link-check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Check markdown links + uses: gaurav-nelson/github-action-markdown-link-check@v1 + with: + config-file: '.github/markdown-link-check.json' + folder-path: '.' + use-quiet-mode: 'yes' + check-modified-files-only: 'yes' + base-branch: 'main' + + yaml-lint: + name: yamllint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install yamllint + run: pip install yamllint + - name: Run yamllint + run: yamllint -c .github/yamllint.yml templates/ benchmarks/ skills/ + + skill-frontmatter: + name: skill frontmatter + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install deps + run: pip install pyyaml + - name: Validate every SKILL.md frontmatter + run: python .github/scripts/validate_skills.py + + prettier-check: + name: prettier (soft) + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - name: Prettier check (advisory only) + run: npx -y prettier --check "**/*.md" || echo "::warning::prettier formatting drift (non-blocking)" diff --git a/.github/yamllint.yml b/.github/yamllint.yml new file mode 100644 index 0000000..4fdce10 --- /dev/null +++ b/.github/yamllint.yml @@ -0,0 +1,26 @@ +extends: default + +# Goal: catch real syntax errors in template YAMLs, not formatting nits. +# Templates often align values / use flow-mapping shorthand for cron/approval rules; +# those are legit, so we relax those rules. + +rules: + line-length: disable + document-start: disable + comments: disable + comments-indentation: disable + colons: disable + commas: disable + indentation: disable + truthy: + allowed-values: ['true', 'false', 'on', 'off', 'yes', 'no'] + check-keys: false + braces: + max-spaces-inside: 2 + brackets: + max-spaces-inside: 2 + +ignore: | + node_modules/ + .venv/ + templates/compose/.env.langfuse.example diff --git a/CHANGELOG.md b/CHANGELOG.md index c3c0d9f..211225b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ Dated list of meaningful guide updates. Roughly [Keep a Changelog](https://keepachangelog.com) flavored. +## 2026-04-17 — Wizard + Reference Architectures + CI + +### Added +- **`docs/wizard/index.html`** — interactive static config wizard; 8 questions → ready-to-drop `config.yaml`, runs entirely in the browser (GitHub Pages friendly) +- **`docs/reference-architectures/`** — 4 full blueprints: Homelab, Solo Developer, Small Agency, Road Warrior +- **`docs/outreach/`** — launch-ready drafts: launch tweet thread, Hacker News post, r/LocalLLaMA post, upstream PR body to `NousResearch/hermes-agent`, long-form blog post +- **4 new skills**: `ops/daily-inbox-triage`, `ops/hermes-weekly`, `security/spam-trap`, `dev/meeting-prep` (total skills: 13) +- **CI** — `.github/workflows/ci.yml`: markdown-link-check, yamllint, skill-frontmatter validator (`validate_skills.py`), prettier advisory +- **Localized READMEs** — [`README-zh.md`](./README-zh.md), [`README-ja.md`](./README-ja.md) (entry-level summaries) + +### Changed +- README: skills badge 9→13, language links, repo map rows for wizard + reference architectures + outreach, CI badge +- `templates/config/*.yaml` — quoted `${VAR}` env-var substitutions inside flow mappings so every template is valid YAML + ## 2026-04-17 — Installable Artifacts ### Added diff --git a/README-ja.md b/README-ja.md new file mode 100644 index 0000000..2c269fc --- /dev/null +++ b/README-ja.md @@ -0,0 +1,37 @@ +# Hermes 最適化ガイド(日本語ショート版) + +> [英語版はこちら](./README.md) · このページは入口の要約。本文の章は英語のまま。 + +[NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent)(v0.10.0+)向けの実戦ガイド + インストール可能な成果物(Skills・設定テンプレ・インフラスクリプト)。 + +## ワンコマンドで起動 + +```bash +# 新しい Debian 12 / Ubuntu 24.04 VPS で実行 +curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/main/scripts/vps-bootstrap.sh | bash +``` + +もしくは [docs/quickstart.md](./docs/quickstart.md)(5 分で Telegram Bot)を参照。 + +## 主なコンテンツ + +- **21 章の本文**(`part1`〜`part21`) — LightRAG、Telegram、MCP、セキュリティ、可観測性、リモートサンドボックス +- **9 個のインストール可能 Skill**(`skills/`) — 監査、バックアップ、依存スキャン、コストレポート、Telegram トリアージ、PR レビュー など +- **5 つのプロダクション設定テンプレ**(`templates/config/`) — minimum / telegram-bot / production / cost-optimized / security-hardened +- **インフラ一式**(`templates/compose/`, `templates/caddy/`, `templates/systemd/`, `scripts/`) — Langfuse セルフホスト、Caddy リバースプロキシ、systemd 強化、VPS ブートストラップ +- **Mermaid アーキテクチャ図**(`diagrams/`) +- **再現可能なベンチマーク**(`benchmarks/`) — 12 モデル × 5 タスク、手法込み +- **エコシステム目録**([`ECOSYSTEM.md`](./ECOSYSTEM.md)) — MCP サーバ、コーディングエージェント、ダッシュボード拡張 +- **対話式設定ウィザード**([`docs/wizard/`](./docs/wizard/)) — ブラウザ内で `config.yaml` を生成 + +## 読む順番の目安 + +1. 最速で Telegram Bot を動かしたい → [docs/quickstart.md](./docs/quickstart.md) +2. アーキテクチャを把握したい → [diagrams/architecture.md](./diagrams/architecture.md) +3. コストを下げたい → [part20-observability.md](./part20-observability.md) の "Cost-routing playbook" +4. 本番運用したい → [docs/reference-architectures/](./docs/reference-architectures/) から近いものを選ぶ +5. 公開エンドポイント → [part19-security-playbook.md](./part19-security-playbook.md) を必ず読む + +## ライセンス・貢献 + +MIT。Issue / PR 歓迎。[CONTRIBUTING.md](./CONTRIBUTING.md) を参照。 diff --git a/README-zh.md b/README-zh.md new file mode 100644 index 0000000..753553d --- /dev/null +++ b/README-zh.md @@ -0,0 +1,37 @@ +# Hermes 优化指南(中文简版) + +> [English 完整版](./README.md) · 本页是入口摘要,章节正文仍为英文。 + +实用指南 + 可安装制品(Skills、配置模板、基础设施脚本),针对 [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent)(v0.10.0+)。 + +## 一键起步 + +```bash +# 新建 Debian 12 / Ubuntu 24.04 VPS 上运行 +curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/main/scripts/vps-bootstrap.sh | bash +``` + +或阅读 [docs/quickstart.md](./docs/quickstart.md)(5 分钟 Telegram 机器人)。 + +## 内容一览 + +- **21 章中文正文**(见 `part1` 到 `part21`) — LightRAG、Telegram、MCP、安全、可观测性、远程沙箱 +- **9 个可安装 Skill**(`skills/`) — 审计、备份、依赖扫描、成本报告、Telegram 分类、PR 审查 等 +- **5 套生产配置模板**(`templates/config/`) — minimum / telegram-bot / production / cost-optimized / security-hardened +- **基础设施**(`templates/compose/`, `templates/caddy/`, `templates/systemd/`, `scripts/`) — Langfuse 自托管、Caddy 反代、systemd 硬化、VPS 引导脚本 +- **Mermaid 架构图**(`diagrams/`) +- **可复现基准测试**(`benchmarks/`) — 12 个模型 × 5 个任务,含方法论 +- **生态目录**([`ECOSYSTEM.md`](./ECOSYSTEM.md)) — MCP 服务器、编码代理、仪表板插件 +- **交互式配置向导**([`docs/wizard/`](./docs/wizard/)) — 浏览器内生成 `config.yaml` + +## 推荐阅读顺序 + +1. 想最快跑通 Telegram 机器人 → [docs/quickstart.md](./docs/quickstart.md) +2. 想了解架构 → [diagrams/architecture.md](./diagrams/architecture.md) +3. 想省钱 → [part20-observability.md](./part20-observability.md) 的 "Cost-routing playbook" +4. 想上生产 → [docs/reference-architectures/](./docs/reference-architectures/) 选一个最接近的 +5. 用户面公开部署 → [part19-security-playbook.md](./part19-security-playbook.md) 必看 + +## 许可与贡献 + +MIT。欢迎 Issue / PR,详见 [CONTRIBUTING.md](./CONTRIBUTING.md)。 diff --git a/README.md b/README.md index 9d0d11c..2caacc2 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,14 @@ [![Hermes](https://img.shields.io/badge/Hermes-v0.10.0%20%28main%29-9146FF)](https://github.com/NousResearch/hermes-agent) [![Last updated](https://img.shields.io/badge/Last%20updated-2026--04--17-brightgreen)](./CHANGELOG.md) [![Parts](https://img.shields.io/badge/parts-21-blue)](#table-of-contents) -[![Skills](https://img.shields.io/badge/installable%20skills-9-blue)](./skills/) +[![Skills](https://img.shields.io/badge/installable%20skills-13-blue)](./skills/) [![Configs](https://img.shields.io/badge/config%20templates-5-blue)](./templates/config/) +[![CI](https://github.com/OnlyTerp/hermes-optimization-guide/actions/workflows/ci.yml/badge.svg)](./.github/workflows/ci.yml) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](./CONTRIBUTING.md) -> **Tested on Hermes Agent v0.10.0 (v2026.4.16)** with post-release tracking for `main` · **21 parts, 9 installable skills, 5 opinionated configs, one-command VPS bootstrap** · Battle-tested on a live production deployment +> **Tested on Hermes Agent v0.10.0 (v2026.4.16)** with post-release tracking for `main` · **21 parts, 13 installable skills, 5 opinionated configs, 4 reference architectures, one-command VPS bootstrap** · Battle-tested on a live production deployment +> +> Other languages: [中文](./README-zh.md) · [日本語](./README-ja.md) ### The End-to-End Hermes Guide — docs + runnable artifacts Every part you need to go from fresh install to a production Hermes deployment that talks on 16 platforms, orchestrates Claude Code / Codex / Gemini CLI, plugs into any MCP server, traces every call in Langfuse, and runs heavy work on disposable Modal/Daytona sandboxes — without burning $100/day on Opus tokens. @@ -37,7 +40,7 @@ Prefer a 5-minute local-only setup? → **[docs/quickstart.md](./docs/quickstart | Folder | What's in it | |---|---| -| [`skills/`](./skills) | **9 installable `SKILL.md`** files. `ln -s` into `~/.hermes/skills/` and they're live. | +| [`skills/`](./skills) | **13 installable `SKILL.md`** files. `ln -s` into `~/.hermes/skills/` and they're live. | | [`templates/config/`](./templates/config) | **5 opinionated `config.yaml`** — minimum, telegram-bot, production, cost-optimized, security-hardened. | | [`templates/compose/`](./templates/compose) | Self-hosted Langfuse v3 stack (ClickHouse + MinIO + Redis). | | [`templates/caddy/`](./templates/caddy) | Caddyfile reference (reverse proxy + auto TLS + HSTS). | @@ -46,6 +49,9 @@ Prefer a 5-minute local-only setup? → **[docs/quickstart.md](./docs/quickstart | [`scripts/vps-bootstrap.sh`](./scripts/vps-bootstrap.sh) | One-command fresh VPS → production Hermes. | | [`diagrams/`](./diagrams) | 6 Mermaid diagrams (architecture, MCP flow, delegation, sandbox sync, observability, security layers). | | [`benchmarks/`](./benchmarks) | Reproducible cost + latency table across 12 models × 5 tasks. | +| [`docs/wizard/`](./docs/wizard) | **Interactive config wizard** — 8 questions → ready-to-drop `config.yaml`. Runs in your browser. | +| [`docs/reference-architectures/`](./docs/reference-architectures) | **4 blueprints** — Homelab, Solo Dev, Small Agency, Road Warrior. Full parts list + cost + install. | +| [`docs/outreach/`](./docs/outreach) | Launch tweet, HN post, upstream-PR body drafts (for people linking to this guide). | | [`docs/quickstart.md`](./docs/quickstart.md) | 5-minute zero-to-Telegram-bot. | | [`ECOSYSTEM.md`](./ECOSYSTEM.md) | Curated directory of MCP servers, coding agents, dashboard plugins. | | [`ROADMAP.md`](./ROADMAP.md) · [`CHANGELOG.md`](./CHANGELOG.md) · [`CONTRIBUTING.md`](./CONTRIBUTING.md) | The usual suspects. | diff --git a/ROADMAP.md b/ROADMAP.md index a7d7e7c..e652c5b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -4,20 +4,17 @@ What's landing next. PRs welcome. ## In progress -- [ ] **Interactive config wizard** — a static page that asks 8 questions and emits a `config.yaml` + systemd unit. Hosted via GitHub Pages. - [ ] **GitHub Pages docs site** — Astro Starlight with full-text search across all parts + skills. - [ ] **Asciinema cast** — 60-second "zero to working Telegram bot" recording embedded in the README. - [ ] **Langfuse dashboard JSON** — importable ready-made dashboard for Hermes traces. +- [ ] **Upstream PR** to `NousResearch/hermes-agent` README — add Community Guides section (draft in `docs/outreach/nous-upstream-pr-body.md`). ## Queued - [ ] **Skill templates** — `hermes skills new ` scaffolding generator -- [ ] **Reference architectures** — homelab / single-user SaaS / small-team / agency, each with every file needed -- [ ] **Integration tests** — GitHub Actions job that lints every SKILL.md frontmatter + validates YAML configs -- [ ] **Cross-link checker** — CI check that fails if any `[...](./...)` link 404s -- [ ] **Translations** — Chinese + Japanese (large Hermes user base in both communities per v0.9 release notes) -- [ ] **"Hermes Weekly"** — markdown-first week-in-review section auto-generated from Hermes-agent merged PRs +- [ ] **Cross-link checker** — CI check that fails if any `[...](./...)` link 404s (partial: markdown-link-check on modified files is live) - [ ] **Security CVE feed** — `.github/workflows/cve-watch.yml` that monitors OSV for relevant advisories +- [ ] **Dashboard screenshots pass** — embed actual screens in parts 12 / 17 / 20 ## Under consideration @@ -27,6 +24,11 @@ What's landing next. PRs welcome. ## Done (recent) +- ✅ 2026-04-17 — Interactive config wizard (`docs/wizard/`) +- ✅ 2026-04-17 — 4 reference architectures (homelab / solo-dev / small-agency / road-warrior) +- ✅ 2026-04-17 — CI (markdown-link-check + yamllint + skill frontmatter validator) +- ✅ 2026-04-17 — Chinese + Japanese README entry pages +- ✅ 2026-04-17 — Outreach drafts (tweet, HN, Reddit, upstream PR, blog post) - ✅ 2026-04-17 — Installable skill library + templates + bootstrap script - ✅ 2026-04-17 — MCP / coding-agent / security / observability / sandbox parts (17–21) - ✅ 2026-04-16 — v0.9 + v0.10 refresh (parts 12–16) diff --git a/docs/outreach/README.md b/docs/outreach/README.md new file mode 100644 index 0000000..c6fd569 --- /dev/null +++ b/docs/outreach/README.md @@ -0,0 +1,11 @@ +# Outreach Drafts + +Copy-ready materials for announcing / linking / promoting the guide. Keep these **out** of `README.md` — they're for posting, not for the repo front. + +- [`launch-tweet-thread.md`](./launch-tweet-thread.md) — X/Twitter launch thread, 8 tweets +- [`hacker-news-post.md`](./hacker-news-post.md) — HN "Show HN" post + self-comment +- [`reddit-localllama.md`](./reddit-localllama.md) — r/LocalLLaMA post +- [`nous-upstream-pr-body.md`](./nous-upstream-pr-body.md) — body for a PR to `NousResearch/hermes-agent` README +- [`blog-post-long.md`](./blog-post-long.md) — long-form blog post / Substack draft + +All drafts are **suggestions**. Fork the tone to yours before posting. diff --git a/docs/outreach/blog-post-long.md b/docs/outreach/blog-post-long.md new file mode 100644 index 0000000..0063731 --- /dev/null +++ b/docs/outreach/blog-post-long.md @@ -0,0 +1,121 @@ +# Blog Post — "I got tired of AI agent guides that don't ship code" + +**Format:** ~1500 words, dev.to / Substack / personal blog +**Tone:** first person, opinionated, receipts-forward + +--- + +## I got tired of AI agent guides that don't ship code + +I use [Hermes](https://github.com/NousResearch/hermes-agent) every day. Telegram is my CLI, my code editor, my inbox triager, my PR reviewer. After a year of running it across a homelab, a Hetzner VPS, and a roster of client boxes, I noticed something frustrating about the existing guides. + +**They don't ship anything.** + +They explain the architecture. They compare model pricing. They link to Anthropic's prompt caching docs. Then they end — and you still have to write your own `config.yaml`, your own cron skills, your own systemd hardening, your own security playbook, your own observability stack, your own routing logic. + +If you've ever tried to stand up an agent framework for real, you know the middle 60% — between "quickstart works" and "this is running in production" — is where every guide I've ever read gives up. + +So I wrote the opposite. + +--- + +## What "ships code" means + +The [Hermes Optimization Guide](https://github.com/OnlyTerp/hermes-optimization-guide) has 21 chapters of documentation. That's the part that looks like every other guide. + +But it also has, in the same repo: + +- **9 installable `SKILL.md` files.** Not examples. Not snippets. Actual files with YAML frontmatter, procedure sections, and security notes. You drop them into `~/.hermes/skills/` and they work. +- **5 opinionated production configs.** `minimum`, `telegram-bot`, `production`, `cost-optimized`, `security-hardened`. One `cp` to `~/.hermes/config.yaml` and you have a working deployment. Every non-obvious field is commented. +- **A VPS bootstrap script.** Fresh Hetzner CX22 to hardened production Hermes in one `curl | bash`. Caddy + UFW + fail2ban + systemd + skill symlinks + unattended-upgrades. +- **Reproducible benchmarks.** 12 flagship models × 5 canonical tasks, with the methodology, the dates, the exact reproduction command. Not vibes. +- **4 reference architectures.** Homelab, Solo Developer, Small Agency, Road Warrior. Each with a full parts list, cost line-items, install commands, and scaling ceilings. +- **A static config wizard.** 8 questions → a ready-to-drop `config.yaml`. Runs in your browser. Nothing uploaded. + +It feels obvious, written out like this. But look around. Nobody's doing it. + +--- + +## Why the guides keep stopping at "documentation" + +My theory: writing docs is *cheap*. Writing docs that ship working artifacts is *expensive* and *awkward*. + +You have to actually deploy Hermes. Actually test the Caddy config on a real VPS. Actually run the nightly backup and have it fail at 3am and figure out why. Actually get prompt injection'd and write the lesson up. + +Most guides stop before that because their authors never did the work, or did it and never wrote it up because "it's just config, nobody cares". + +**People care.** The most-starred AI repos aren't the ones with the best prose. They're the ones where you `git clone && npm install && npm start` and something real happens. The guide version of that is: reader forks the repo, copies five files, and has a working agent 10 minutes later. + +--- + +## The routing playbook that drops cost ~90% + +This one gets its own section because it's the part readers care most about. + +The default advice on cost is "use cheaper models". But you can't just set `default: gpt-5.4-mini` — for certain tasks (nuanced reasoning, long-context analysis, hard coding) it will silently hurt quality and you'll blame the framework. + +Here's what actually works, derived from our benchmarks: + +1. **Triage** (~60% of traffic for a personal bot): Gemini 2.5 Flash. Cheap, fast, 1M context. Routes to the right skill or punts to the right model. +2. **Classification** (tagging, routing, spam-trap): Cerebras Llama 70B on a free tier. Effectively zero cost. +3. **Default coding:** Kimi K2.5. Cheapest competent coder, good for 80% of changes. +4. **Hard coding / architecture:** Anthropic Sonnet 4.5. Opt-in (say "use sonnet" or mark the skill with `model: anthropic/claude-sonnet-4-5`). +5. **Long-context research:** Gemini 2.5 Pro. 1M context + reasoning. + +With prompt caching on (Anthropic, OpenAI), `prefer_cached: true` as a default, and Fast Mode *off* unless you explicitly need it — the typical user month drops from $150 to $20–40. + +The full playbook is in Part 20 of the guide, and the benchmarks are in `benchmarks/`. + +--- + +## The security playbook nobody wanted to write + +On April 15, 2026, researchers disclosed "Comment and Control" — a prompt-injection attack that hit Claude Code, Gemini CLI, and Copilot Agent simultaneously. If you run a coding agent that reads GitHub PR bodies or issue comments, you were affected until you patched. + +The fact that this hit *three vendor agents* on the same day, with the same vector, is the single most important thing to internalize about this era: **your agent is only as safe as the least-trusted input it processes.** + +So Part 19 of the guide is the 7-layer defensive playbook: + +1. **Provenance labels.** Every input carries a trust level. Nothing from email / public Telegram / PR bodies is ever treated as instruction unless the user confirms. +2. **Approval gates on the write side.** Reads free, writes approved. +3. **Secret isolation.** API keys live in env files with 0600 perms, redacted in logs, never written to memory. +4. **Webhook signatures.** Stripe-style HMAC verification, rejected at the gateway. +5. **SSRF denylist.** 169.254.169.254 and friends. +6. **MCP trust levels.** Sampling disabled by default for every server; explicit opt-in per server. +7. **Quarantine profile.** Public-facing bot runs as a separate Hermes profile with no MCPs, no memory, no approval chain. + +If you run agents in production, read Part 19 before reading anything else. + +--- + +## The real answer to "why this exists" + +I wrote this because I was the intended reader. I needed it, couldn't find it, built it. + +The open secret is that almost everyone writing in AI right now is in the same spot. The field is moving too fast for the field to document itself. Every framework has a 30-page docs site from 6 months ago and a Discord full of people asking the same 20 questions. + +A guide that's *actually maintained* (CHANGELOG is live, every release gets a 72h refresh pass) and *actually runnable* (skills install, configs work, scripts execute) is — and this is the strange part — still rare. + +The fix isn't to write better prose. It's to commit working code to the repo next to the docs. + +--- + +## If you run Hermes + +- Fork [the guide](https://github.com/OnlyTerp/hermes-optimization-guide), steal the parts you need, contribute back what's missing. +- If a skill or config is wrong for your setup, open an issue. I'd genuinely rather hear "this broke" than have 1000 silent forks. +- Star the repo if it saved you time — GitHub's discovery model is still basically popularity-as-recommendation. + +## If you run any other agent framework + +- The *pattern* transfers directly. Installable skill files, opinionated configs, bootstrap script, 4 reference architectures, reproducible benchmarks, security playbook. +- If you port this to your framework and want me to link it, open a PR to the `ECOSYSTEM.md` file or to Community Guides. + +## If you're writing AI documentation + +- Stop stopping at "documentation". Ship what readers need to *run*. +- One skill file, one config, one bootstrap script — all easier to write than you think, all more valuable than the next blog post. + +--- + +*Find the guide at https://github.com/OnlyTerp/hermes-optimization-guide. MIT license, contributions welcome.* diff --git a/docs/outreach/hacker-news-post.md b/docs/outreach/hacker-news-post.md new file mode 100644 index 0000000..408b1a9 --- /dev/null +++ b/docs/outreach/hacker-news-post.md @@ -0,0 +1,31 @@ +# Hacker News — Show HN Draft + +**Title:** Show HN: Hermes Optimization Guide – runnable skills, configs, and VPS bootstrap + +**URL:** `https://github.com/OnlyTerp/hermes-optimization-guide` + +**Text:** (leave empty — HN prefers URL-only "Show HN" posts when the linked page speaks for itself; first self-comment below carries the context) + +--- + +## First self-comment (post immediately after) + +Author here. Context on what this is and why: + +Hermes (Nous Research, ~94K GH stars) is the agent framework I've been using for a year. Most of the existing community guides explain the architecture but don't give you anything to run — you read 15 parts, still have to write your own `config.yaml`, your own cron skills, your own systemd hardening. + +This guide is the other direction: 21 parts of actual documentation *plus* + +- **9 installable `SKILL.md` files** (audit-mcp, rotate-secrets, nightly-backup, weekly-dep-audit, cost-report, telegram-triage, pr-review, release-notes, audit-approval-bypass) — drop them into `~/.hermes/skills/` or symlink them in +- **5 opinionated configs** for the 5 real personas (minimum / telegram-bot / production / cost-optimized / security-hardened) — every non-obvious field commented +- **A VPS bootstrap script** — fresh Debian/Ubuntu to production Hermes with Caddy + UFW + fail2ban + systemd hardening in ~10 min, one `curl | bash` +- **Docker compose for self-hosted Langfuse** — the single most-asked-for observability setup +- **4 reference architectures** — Homelab, Solo Dev, Small Agency, Road Warrior (phone-drives-cloud-sandbox pattern from the new remote-sandbox PR) +- **Reproducible cost benchmarks** — 12 flagship models × 5 canonical tasks (triage / summarize / codefix / deepreason / bulk-extract), methodology included, rerun-able with `hermes evals run` +- **ECOSYSTEM.md** — 40+ curated MCP servers / coding agents / dashboard plugins + +The part I wanted to share specifically for HN: the **cost routing playbook** (Part 20) — five rules that drop typical agent spend ~90% (Gemini Flash for triage, Cerebras Llama for classification, Kimi K2.5 as default coder, Sonnet only when you explicitly opt in, Gemini 2.5 Pro for long-context). The benchmarks folder lets you verify yourself on your own workload. + +And the **defensive security playbook** (Part 19) — written after the Apr 15 "Comment and Control" cross-vendor prompt-injection disclosure that hit Claude Code + Gemini CLI + Copilot Agent. Seven layers: provenance labels, approval, secret isolation, webhook signatures, SSRF, MCP trust levels, quarantine profiles. If your coding agent reads arbitrary PR bodies or emails, this is the hardening posture I wish I'd had 6 months ago. + +MIT licensed. Issues + PRs welcome. Happy to answer anything. diff --git a/docs/outreach/launch-tweet-thread.md b/docs/outreach/launch-tweet-thread.md new file mode 100644 index 0000000..168292a --- /dev/null +++ b/docs/outreach/launch-tweet-thread.md @@ -0,0 +1,84 @@ +# Launch Tweet Thread — Draft + +**Tone:** matter-of-fact, receipts-forward, no hype language. Replace `@OnlyTerp` / repo URL as needed. + +--- + +**1/8** +I got tired of Hermes guides that explain the architecture but don't give you anything to run, so I shipped the opposite: + +21 parts of documentation **plus** 9 installable skills, 5 production configs, a VPS bootstrap script, hardened systemd units, and a reproducible cost benchmark. + +github.com/OnlyTerp/hermes-optimization-guide + +--- + +**2/8** +The 5 configs: `minimum`, `telegram-bot`, `production`, `cost-optimized`, `security-hardened`. + +Each one is a single `cp` into `~/.hermes/config.yaml`. They're opinionated — not generic starters — and every field is commented. + +`templates/config/` + +--- + +**3/8** +Every skill the guide promises — audit-mcp, rotate-secrets, nightly-backup, weekly-dep-audit, cost-report, telegram-triage, pr-review, release-notes, audit-approval-bypass — is a real runnable `SKILL.md`. + +```bash +hermes skills install github://OnlyTerp/hermes-optimization-guide/skills/ops/nightly-backup +``` + +--- + +**4/8** +One command from fresh Hetzner CX22 → working hardened production Hermes: + +```bash +curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/main/scripts/vps-bootstrap.sh | bash +``` + +Caddy + UFW + fail2ban + systemd + unattended-upgrades + skill symlinks. ~10 min. + +--- + +**5/8** +MCP (Model Context Protocol) went viral last week. The guide has a full chapter — stdio/HTTP transports, 14 servers worth installing, `sampling/createMessage`, trust model, troubleshooting. + +The ecosystem directory (ECOSYSTEM.md) links 40+ MCP servers + coding agents + dashboard plugins. + +--- + +**6/8** +The Apr 15 "Comment and Control" cross-vendor prompt-injection attack hit Claude Code + Gemini CLI + Copilot Agent. + +Part 19 is the defensive playbook: 7 layers (provenance, approval, secret isolation, webhook sigs, SSRF, MCP trust, quarantine). If your agent reads your inbox, please read this one. + +--- + +**7/8** +Cost routing playbook (Part 20) drops a typical workload by ~90%: +- Triage → Gemini Flash or Cerebras +- Classification → Cerebras Llama (~free) +- Default coding → Kimi K2.5 +- Hard coding → Sonnet (explicit opt-in) +- Long context → Gemini 2.5 Pro + +Benchmarks + methodology in `benchmarks/`. + +--- + +**8/8** +Everything's MIT-licensed, `CONTRIBUTING.md` is real, CI lints skill frontmatter + YAML + markdown links, and there's a ROADMAP. + +If this saves you an afternoon, a star helps more people find it. Issues + PRs welcome. + +github.com/OnlyTerp/hermes-optimization-guide + +--- + +## Replies / follow-ups to prep + +- "Why not [other framework]?" → I'm not trying to push Hermes; this guide was a need *because* we run Hermes. The config-wizard + skill pattern is copy-able for any agent framework. +- "Does this work with local models?" → Yes. `homelab` reference architecture covers Ollama routing. See `docs/reference-architectures/homelab.md`. +- "Will you maintain it?" → CHANGELOG + ROADMAP are live. Bus factor = 1 right now, actively looking for co-maintainers. diff --git a/docs/outreach/nous-upstream-pr-body.md b/docs/outreach/nous-upstream-pr-body.md new file mode 100644 index 0000000..3a222cd --- /dev/null +++ b/docs/outreach/nous-upstream-pr-body.md @@ -0,0 +1,57 @@ +# Upstream PR to `NousResearch/hermes-agent` — Draft PR Body + +**This is the single highest-leverage move for stars + team respect.** Rob should open this PR themselves — it comes better from a user than from an AI assistant. + +--- + +## Suggested title +`docs: add "Community Guides" section linking external optimization resources` + +## Suggested branch name +`docs/community-guides` + +## Suggested change + +Add a new section to `README.md` (just below "Documentation" or "Quick Start"): + +````markdown +## Community Guides + +Independent guides written by Hermes users. These are not official, but have been vetted by maintainers for accuracy. + +- [Hermes Optimization Guide](https://github.com/OnlyTerp/hermes-optimization-guide) — 21-part guide covering LightRAG, Telegram deployment, MCP, security hardening, cost routing, observability, and remote sandboxes. Ships installable skills, 5 production configs, a VPS bootstrap script, and reproducible cost benchmarks. + +_Maintain your own? Open a PR adding it here._ +```` + +## PR body + +> Hi Nous team — first, thanks for Hermes, it's been my daily driver for a year. +> +> I've been writing a community optimization guide since v0.9.0 shipped, and have gotten enough "where should I link this so people can find it?" messages that I wanted to propose an upstream spot: a small **Community Guides** section in the README. +> +> The guide itself is at https://github.com/OnlyTerp/hermes-optimization-guide — 21 parts of documentation, 9 installable `SKILL.md` files, 5 production configs, a VPS bootstrap script, and a reproducible cost benchmark. MIT license. CHANGELOG + ROADMAP are real. I cross-check every release note on `main` and update within 72h. +> +> Totally understand if you'd rather maintain a separate page, or curate more carefully before pointing at third-party content. Happy to iterate on the section copy, add more guides as they show up, or even move the list to `docs/community.md` if that fits better. +> +> If there's a better channel for this kind of ask (Discord, an `awesome-hermes` repo, etc.) — just let me know and I'll move there. + +## Why this specific shape + +- **"Community Guides" (plural)** — signals the section is for anyone, not just this guide. Easier to accept because it's a pattern, not a promo. +- **One-line link with a quality descriptor** — follows the style Nous already uses for integrations. Doesn't read like marketing. +- **Explicit "vetted by maintainers for accuracy"** — puts the burden on the team to do a light review. Removes their fear of linking something that'll get out of date. +- **"Maintain your own? Open a PR adding it here."** — invites contribution. Doesn't feel self-serving. +- **PR body is a user speaking user-to-user** — the Nous team respects builders; show that you've been building. + +## What to do if rejected + +1. **Ask where the right spot is.** If they say "not in README", ask about `docs/community.md` or a GH topic/tag. +2. **Offer to run an `awesome-hermes` repo** — totally different framing, same destination: people find this guide. +3. **Don't push.** Take the rejection, thank them, keep writing. + +## What to do if accepted + +1. **Thank them publicly** — quote-tweet / reply in the PR. The Hermes community watches these merges. +2. **Update [this guide's README](../../README.md)** with the upstream link ("Listed in the official Hermes README"). +3. **Don't abuse the channel.** Never add other projects to that section in a later PR unless they're comparable-quality and the author opens it themselves. diff --git a/docs/outreach/reddit-localllama.md b/docs/outreach/reddit-localllama.md new file mode 100644 index 0000000..ee729af --- /dev/null +++ b/docs/outreach/reddit-localllama.md @@ -0,0 +1,41 @@ +# r/LocalLLaMA — Post Draft + +**Title:** I shipped a Hermes guide with runnable skills, 5 production configs, and a one-command VPS bootstrap + +**Flair:** `Resources` or `Tutorial | Guide` + +--- + +## Body + +r/LocalLLaMA skews toward people who **run their own stuff**, so I'm posting the `homelab` angle specifically. + +I built a Hermes (Nous Research's agent framework) optimization guide that goes beyond docs. Everything's installable — not just explained. + +**Repo:** https://github.com/OnlyTerp/hermes-optimization-guide + +**What's in it that'll matter to this sub:** + +- **Homelab reference architecture** — full setup for running Hermes + LightRAG + self-hosted Langfuse on your own box, with Ollama as the default provider and routing only the hard stuff to Sonnet. Tailscale instead of port-forwarding. Scaling ceilings + honest tradeoffs (latency, quality, etc.) included. + +- **5 production config templates** — one of them is `cost-optimized.yaml`, which uses Gemini Flash + Cerebras Llama for most traffic and only escalates to Sonnet on explicit opt-in. Typical spend is $0.05–0.30/active-hour. + +- **Reproducible benchmarks** — 12 flagship models × 5 tasks (triage / summarize / codefix / deepreason / bulk-extract), methodology + `hermes evals run` command to reproduce. + +- **9 installable skills** (`SKILL.md` files with YAML frontmatter — drop into `~/.hermes/skills/`): audit-mcp, rotate-secrets, audit-approval-bypass, nightly-backup, weekly-dep-audit, cost-report, telegram-triage, pr-review, release-notes. + +- **Security playbook** (Part 19) — 7-layer defense against prompt injection, written after the Apr 15 "Comment and Control" attack hit Claude Code + Gemini CLI + Copilot Agent. + +- **MCP chapter** (Part 17) — stdio/HTTP transports, 14 servers worth installing today, the trust model, writing your own in 30 lines. + +- **Remote sandboxes** (Part 21) — phone-drives-cloud pattern, Modal/Daytona/Fly/E2B. The bulk tar-pipe sync from the Apr 17 Hermes PR is documented. + +**One command to go from fresh VPS to working Hermes:** + +```bash +curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/main/scripts/vps-bootstrap.sh | bash +``` + +MIT license. CI lints skill frontmatter + YAML + markdown links. CHANGELOG + ROADMAP are real. + +If this is useful — a star helps more people find it. If something's wrong, open an issue or PR. diff --git a/docs/reference-architectures/README.md b/docs/reference-architectures/README.md new file mode 100644 index 0000000..e87043a --- /dev/null +++ b/docs/reference-architectures/README.md @@ -0,0 +1,12 @@ +# Reference Architectures + +Four opinionated "steal this" blueprints — each includes every file you need to run it, every cost line-item, every scaling ceiling, and the honest tradeoffs. + +| Blueprint | Good for | Cost/mo | Scale ceiling | +|---|---|---:|---| +| [Homelab](./homelab.md) | On your own hardware, fully private | ~$0 (electricity) + keys | Single user, best privacy | +| [Solo Developer](./solo-developer.md) | VPS + daily-driver phone bot | ~$5 infra + $20–60 LLM | You + personal projects | +| [Small Agency](./small-agency.md) | 2–6 devs, multiple clients | ~$25 infra + $200–800 LLM | A few teams sharing | +| [Road Warrior](./road-warrior.md) | Phone drives beefy cloud box | ~$5 always-on + $0–50 on-demand | Anywhere with cell | + +All four use the files under [`templates/`](../../templates/) and [`skills/`](../../skills/) — they differ in *which ones* and *where they run*. Pick the closest, then edit. diff --git a/docs/reference-architectures/homelab.md b/docs/reference-architectures/homelab.md new file mode 100644 index 0000000..5e3b6ff --- /dev/null +++ b/docs/reference-architectures/homelab.md @@ -0,0 +1,149 @@ +# Reference Architecture: Homelab + +**Fully private, on your own hardware.** Nothing leaves your LAN except provider-bound LLM traffic (and optionally, none of that either if you run local models). + +## Who this is for + +- You own a homelab / NAS / dedicated box +- Privacy-first — you don't want recipe data / PRs / messages in a third-party cloud +- Willing to trade off convenience (dynamic DNS, patching) for control + +## Cost + +- **Infra:** electricity + existing hardware +- **LLM:** $0 if you go all-Ollama; otherwise retail API for a curated subset +- **External:** $0 (no Tailscale Pro required for 1–3 nodes) + +## Architecture + +``` + ┌──────────────────────────────────────┐ + │ Homelab (LAN) │ + │ │ + phone / laptop → │ Tailscale hermes.lan (Caddy) │ + (Tailscale) │ │ │ │ + │ └──────────────┤ │ + │ ↓ │ + │ hermes.service (systemd) │ + │ ├── Ollama (GPU box) │ + │ ├── LightRAG │ + │ ├── Langfuse (self) │ + │ └── Dashboard :8765 │ + │ │ + └──────────────────────────────────────┘ + │ + ↓ (optional, for hard queries) + Anthropic / Google / OpenAI +``` + +## Parts list + +- **1× Linux box** (16GB+ RAM, any x86_64 or Apple Silicon VM) — runs Hermes, LightRAG, Langfuse +- **1× GPU box** (optional; 16GB+ VRAM) — runs Ollama. Can be the same box if you have one GPU. +- **Tailscale** (free tier, up to 3 users / 100 devices) — mesh VPN; no port-forwarding +- **Domain** (optional; `hermes.lan` works fine with Tailscale MagicDNS) + +## Install steps + +### 1. Base box + +```bash +# On the Linux box (as root, Debian 12 or Ubuntu 24.04) +curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/main/scripts/vps-bootstrap.sh | bash +``` + +### 2. Tailscale + +```bash +curl -fsSL https://tailscale.com/install.sh | sh +tailscale up --accept-routes +tailscale cert hermes.$(tailscale status --json | jq -r '.MagicDNSSuffix') +``` + +### 3. Ollama (optional — local models) + +```bash +curl -fsSL https://ollama.com/install.sh | sh +ollama pull llama3.1:70b-instruct-q4_K_M +ollama pull qwen2.5-coder:32b +``` + +### 4. Config + +Start from [`templates/config/production.yaml`](../../templates/config/production.yaml), then: + +```yaml +models: + default: ollama/llama3.1:70b-instruct-q4_K_M + providers: + ollama: + base_url: http://gpu-box.tailnet-xxx.ts.net:11434 + anthropic: + api_key: "${ANTHROPIC_API_KEY}" # fallback for hard queries + + routing: + - when: task == "reasoning" + use: anthropic/claude-sonnet-4-5 + - when: task == "coding" && complexity == "high" + use: anthropic/claude-sonnet-4-5 + +gateways: + cli: + enabled: true + telegram: + enabled: true + bots: + admin: + token: "${TELEGRAM_ADMIN_BOT_TOKEN}" + allowed_user_ids: ["${TELEGRAM_OWNER_ID}"] + +memory: + backend: lightrag + lightrag: + working_dir: /var/lib/hermes/lightrag + llm_model: ollama/qwen2.5-coder:32b # local extraction + embedding_model: openai/text-embedding-3-small # or local (bge-m3) +``` + +### 5. Langfuse self-host (observability inside the LAN) + +```bash +cp templates/compose/langfuse-stack.yml /opt/ +cp templates/compose/.env.langfuse.example /opt/.env.langfuse +# edit /opt/.env.langfuse → generate secrets +docker compose -f /opt/langfuse-stack.yml --env-file /opt/.env.langfuse up -d +``` + +Point Hermes `telemetry.langfuse.host` at `http://127.0.0.1:3000`. + +### 6. Skills + +```bash +for skill in /opt/hermes-optimization-guide/skills/*/*/; do + ln -sfn "$skill" "/home/hermes/.hermes/skills/$(basename $skill)" +done +hermes /reload +``` + +## Honest tradeoffs + +- **Latency.** Local 70B Q4 ≈ 20–40 tok/s on a 3090. Flagship Sonnet ≈ 60–90 tok/s. Most "work" queries you won't notice; coding/deep reasoning you will. +- **Quality.** Current open models (Qwen 2.5 Coder, Llama 3.1 70B, Kimi K2.5 local) are *close* on many tasks, *behind* on long-context + nuanced reasoning. Routing lets you hand the hard stuff to Sonnet. +- **Patching.** You maintain the box. Enable unattended-upgrades (the bootstrap script does) and schedule monthly reboots. +- **Reachability.** Tailscale is solid but means "no Tailscale = no Hermes". Keep a cellphone backup admin bot, or run a tiny cloud relay. +- **Backups.** Set [`nightly-backup`](../../skills/ops/nightly-backup/SKILL.md) to write encrypted archives to a second physical disk — not the same RAID array. + +## What to skip + +- Cloudflare / public TLS — Tailscale handles that +- UFW rules for 80/443 — no public ports +- Paid Langfuse — self-host is free for any reasonable single-user volume + +## When to graduate + +You hit this setup's ceiling when: +- You want more than 1–2 humans using it (permissioning local models gets awkward) +- You need world-reachable webhooks (Stripe, GitHub, etc.) +- Your LightRAG graph exceeds ~200K entities (it'll still work, but merges slow down) + +Graduate to [Solo Developer](./solo-developer.md) (add a tiny VPS) or [Small Agency](./small-agency.md). diff --git a/docs/reference-architectures/road-warrior.md b/docs/reference-architectures/road-warrior.md new file mode 100644 index 0000000..a175170 --- /dev/null +++ b/docs/reference-architectures/road-warrior.md @@ -0,0 +1,153 @@ +# Reference Architecture: Road Warrior + +**Phone drives, disposable cloud boxes do the heavy lifting.** Inspired by [Part 21](../../part21-remote-sandboxes.md). You carry a tiny $5 always-on VPS; it orchestrates Modal / Daytona / Fly sandboxes that spin up on demand for real work. + +## Who this is for + +- Traveling developers / nomads +- People who code from their phone via Telegram +- Anyone who wants "I can fix prod from a train" energy + +## Cost + +- **Always-on driver box:** $5/mo (Hetzner CX22) +- **On-demand remote compute:** $0–50/mo (only pay when you're actually running things) +- **LLM:** $20–60/mo + +## Architecture + +``` + Phone (Telegram) ──→ Driver VPS ($5/mo, always-on) + │ + │ hermes.service + │ remote_sandbox: modal (default) + │ + ▼ + On-demand sandbox: + Modal (GPU-ish) + Daytona (full dev env) + Fly Machines (persistent) + E2B (Python sandbox) + SSH (your own beast) +``` + +Your phone → Telegram → 5¢/mo VPS → spins up a $0.05/hr Modal sandbox → runs Claude Code, pulls the repo, does the work → syncs files back on teardown → pushes PR. + +## Parts list + +- **Hetzner CX22** as the driver ($5/mo) +- **Modal account** (free $30/mo credits) OR **Daytona** OR **Fly Machines** — see [Part 21](../../part21-remote-sandboxes.md) +- **Telegram bot** + your user ID +- **API keys:** Anthropic (for Claude Code inside sandbox), optional Google (for Hermes triage on the driver) + +## Install + +```bash +# On the driver VPS — as root +curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/main/scripts/vps-bootstrap.sh | bash +``` + +Then customize: + +```yaml +# /home/hermes/.hermes/config.yaml +version: 1 + +models: + default: google/gemini-2.5-flash # Cheap + fast for "plan the work" phase + providers: + google: + api_key: "${GOOGLE_API_KEY}" + anthropic: + api_key: "${ANTHROPIC_API_KEY}" # Used by sandboxed Claude Code + +gateways: + cli: { enabled: true } + telegram: + enabled: true + bots: + admin: + token: "${TELEGRAM_ADMIN_BOT_TOKEN}" + allowed_user_ids: ["${TELEGRAM_OWNER_ID}"] + +# The money section +remote_sandbox: + default_backend: modal # Or daytona / fly / e2b / ssh + backends: + modal: + token_id: "${MODAL_TOKEN_ID}" + token_secret: "${MODAL_TOKEN_SECRET}" + image: "python:3.12-slim" + timeout_idle: 600 # 10m idle → auto-shutdown + ssh: # your home beast, if any + host: "beast.tailnet-xxx.ts.net" + user: "hermes" + identity_file: "~/.ssh/id_ed25519" + +# Hermes loads skills from here; these let you orchestrate from Telegram +skills: + allowlist: + - pr-review + - release-notes + - cost-report + - remote-run # triggers a sandbox +``` + +## The workflow + +``` +you: "@bot fix the null-check in auth.ts" +bot: [spinning up modal sandbox…] +bot: cloned acme/app, branch devin-123 +bot: claude code: analyzing… +bot: [file diff preview, 3 lines] + Approve? /yes /no /changes +you: /yes +bot: [syncing files back, running tests] +bot: tests green. Pushed PR #342 → https://… +bot: sandbox torn down (ran 4m 12s, $0.014) +``` + +## Key wins from Part 21 + PR #8018 + +- **Bulk tar-pipe sync** — 30s cold start beats 5 minutes of 100× `scp` +- **SIGINT-safe sync-back** — lose signal mid-run, the sandbox still flushes on teardown +- **Hash-only sync** — only changed files come back, not the whole tree +- **Local `git push`** — the driver VPS keeps your authenticated git creds; sandbox never sees them + +## Skill setup + +```bash +# Symlink all the guide skills +for s in /opt/hermes-optimization-guide/skills/*/*/; do + ln -sfn "$s" "/home/hermes/.hermes/skills/$(basename $s)" +done + +# Write a tiny remote-run skill (paste into ~/.hermes/skills/remote-run/SKILL.md) +# that wraps `hermes sandbox run --repo acme/app -- claude -p "$@"` +hermes /reload +``` + +## Safety rails + +- Sandbox = **quarantine profile** (as if it were untrusted input) — Claude Code in the sandbox cannot touch the driver's MCP servers or secrets +- Driver has read-only GitHub PAT (for triage/search) +- The **write** PAT only exists inside the sandbox, short-lived, piped through stdin so it's never on disk + +## Costs in the wild + +Typical month for an active user: + +| Line | Cost | +|---|---:| +| CX22 driver | $5 | +| Modal compute (3h/day × 30 days × $0.05/h) | $4.50 | +| Anthropic (Claude Code, routed) | $20–40 | +| Google Gemini Flash (triage) | ~$0.50 | +| **Total** | **~$30–50/mo** | + +## When to graduate + +- You're running 10+ sandbox hours a day → migrate to a persistent Fly Machine + scale up +- You need GPU in the sandbox → Modal A10G is ~$1.10/hr, still cheap for spot usage +- You want *multi-user* → [Small Agency](./small-agency.md) diff --git a/docs/reference-architectures/small-agency.md b/docs/reference-architectures/small-agency.md new file mode 100644 index 0000000..fe14374 --- /dev/null +++ b/docs/reference-architectures/small-agency.md @@ -0,0 +1,111 @@ +# Reference Architecture: Small Agency + +**2–6 devs, multiple clients, per-client isolation.** One Hermes install is hard to scale across a team; this architecture runs a dedicated profile per developer/client and shares only the observability + audit layer. + +## Who this is for + +- Dev shops / consulting agencies handling multiple client codebases +- Small product teams with strict separation-of-concerns requirements +- Anyone who needs audit trails that hold up to a client security review + +## Cost + +- **Infra:** ~$25–50/mo (one CX32 or 2× CX22) +- **LLM:** $200–800/mo (routed) +- **Langfuse/observability:** $0 self-host or $100+/mo managed + +## Architecture + +``` + Devs (Telegram/Discord DMs, CLI) + │ │ + ▼ ▼ + ┌───────────────────┐ ┌───────────────────┐ + │ Hermes per dev/ │ │ Shared services │ + │ per client │ │ │ + │ (systemd units) │ │ Langfuse │ + │ │ │ Audit log sink │ + │ hermes@alice.s │ │ LightRAG (each) │ + │ hermes@bob.s │ │ backup target │ + │ hermes@clientA.s │ │ │ + └───────────────────┘ └───────────────────┘ +``` + +- **Systemd templated units** — `hermes@.service`, one per dev/client, each with its own `${HOME}/.hermes/` and own approval channel (DM of that dev) +- **LightRAG per instance** — never mix client knowledge +- **Centralized Langfuse + audit log** — every call traced, PII-redacted at the secrets layer + +## Parts list + +- **1× CX32** (4 vCPU, 8GB RAM) — $12/mo, hosts 3–6 Hermes instances + Langfuse +- **S3/R2 backup bucket** — encrypted nightly backups (age/gpg) +- **Cloudflare** — DNS + TLS-terminated reverse proxy (or Caddy if you prefer not touching CF) +- **Linear/Notion/Slack/Google Workspace** — MCP-wired read-only for context + +## Install + +1. **Bootstrap the host** as in [Solo Developer](./solo-developer.md). +2. **Replace `hermes.service`** with a templated unit (`hermes@.service`): + +```ini +[Unit] +Description=Hermes Agent for %i +After=network-online.target + +[Service] +Type=simple +User=%i +WorkingDirectory=/home/%i +ExecStart=/usr/local/bin/hermes run +EnvironmentFile=-/home/%i/.hermes/.env +# ... all the hardening bits from templates/systemd/hermes.service + +[Install] +WantedBy=multi-user.target +``` + +Then: + +```bash +# For each dev or client: +adduser --disabled-password --gecos "" alice +sudo -u alice curl -sSL https://install.hermes.nous.ai | bash +cp templates/config/production.yaml /home/alice/.hermes/config.yaml +chown alice:alice /home/alice/.hermes/config.yaml +systemctl enable --now hermes@alice.service +``` + +3. **Centralize Langfuse** per [Solo Developer](./solo-developer.md#install), then every `config.yaml` points `telemetry.langfuse.host` at the same internal URL. Each profile ships under its own Langfuse project for isolation. + +## Per-client separation + +- **`profile:`** in the Hermes config — `quarantine` (untrusted input for a public bot) vs `trusted` (the dev's admin DM) +- **Approval channels** — the dev's DM is the only trusted approval source; client support channels are *never* trusted +- **LightRAG dirs** — `~/.hermes/lightrag-/` per client; never mix +- **MCP** — per-client read-only PATs (`GITHUB_PAT_CLIENT_A`, `GITHUB_PAT_CLIENT_B`) +- **Audit log** — append-only JSONL per session, centralized to a single append-only bucket the dev can *read* but not *delete* (makes client reviews easy) + +## Cost routing at agency scale + +Use [`templates/config/production.yaml`](../../templates/config/production.yaml) as the base. Key rules: + +- **Triage** (most traffic): Cerebras Llama 70B — free-ish tier +- **Default coding:** Kimi K2.5 (cheapest competent coder) +- **"Hard" coding / architecture:** Anthropic Sonnet — explicit opt-in +- **Long-context research:** Gemini 2.5 Pro +- **Deep reasoning:** GPT-5.4 (opt-in) + +With weekly `cost-report` → Discord ops channel, cost anomalies surface before the invoice. + +## Compliance-friendly defaults + +- `memory_write_redaction: true` (skip writing secrets to LightRAG) +- `log_redaction: true` +- `security.webhook.max_body_bytes: 524288` +- `security.approval.approval_timeout: 120` — no action sits in pending queue forever +- Nightly backup encrypted with per-client age keys + +## When to graduate + +- Past ~20 devs → move to a proper Kubernetes setup with per-profile pods, separate Langfuse instances per client +- Regulated industries → self-host the LLM too (vLLM or Ollama on a GPU box) diff --git a/docs/reference-architectures/solo-developer.md b/docs/reference-architectures/solo-developer.md new file mode 100644 index 0000000..2b3a47b --- /dev/null +++ b/docs/reference-architectures/solo-developer.md @@ -0,0 +1,93 @@ +# Reference Architecture: Solo Developer + +**VPS + phone bot + cost routing.** The 80% setup — cheap, reliable, reachable anywhere, good enough for one person's real work. + +## Who this is for + +- Developer / maker who wants a Telegram/Discord driver for their daily work +- No ops team; "set it and forget it" is the design goal +- Willing to spend $5–50/mo to not run your own hardware + +## Cost + +- **Infra:** $5–7/mo (Hetzner CX22 or Fly machine) +- **LLM:** $20–60/mo for typical personal use with cost routing +- **Domain + DNS:** $0–1/mo + +**Total: ~$25–70/mo.** + +## Architecture + +``` + phone/laptop Internet hermes.yourdomain.com + │ │ │ + └── Telegram/Discord ─────┼── Cloudflare/Caddy ────→│ + │ │ + │ ├── hermes.service + │ ├── hermes-dashboard.service + │ ├── Langfuse (self-host) + │ └── LightRAG + │ + └── Anthropic / Google / Moonshot / Cerebras +``` + +## Parts list + +- **Hetzner CX22** (Debian 12 or Ubuntu 24.04) — $5/mo, 4GB RAM, 2 vCPU +- **Domain** ($12/yr) — or use a free subdomain from duckdns/nip.io +- **Telegram bot token** (free; [@BotFather](https://t.me/BotFather)) +- **API keys:** Anthropic (default), Google (Gemini Flash for triage), optionally Moonshot + Cerebras for coding/classification + +## Install + +```bash +# As root on a fresh VPS +curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/main/scripts/vps-bootstrap.sh | bash +``` + +Then: + +```bash +# As root +sudo -u hermes nano ~/.hermes/.env # fill in keys +sudo cp /opt/hermes-optimization-guide/templates/config/cost-optimized.yaml \ + /home/hermes/.hermes/config.yaml # or telegram-bot.yaml +sudo cp /etc/caddy/Caddyfile.hermes.reference /etc/caddy/Caddyfile +# edit /etc/caddy/Caddyfile — replace *.yourdomain.com +sudo systemctl reload caddy +sudo systemctl start hermes hermes-dashboard +``` + +## Why `cost-optimized.yaml` is the right default + +See [`templates/config/cost-optimized.yaml`](../../templates/config/cost-optimized.yaml). Defaults to Gemini Flash (cheapest smart model), uses Cerebras Llama for classification (near-free), and only escalates to Sonnet for high-stakes coding. With prompt caching + Fast Mode disabled by default, typical cost is $0.05–0.30 per active hour. + +If you need max quality for a specific task, just say "use sonnet" in chat — the router honors explicit user overrides. + +## Routines you'll run + +Every one of these is installed by the bootstrap script (symlinks into `~/.hermes/skills/`): + +- Morning: `/cost-report window=24h` — yesterday's spend +- On idle threads: `/telegram-triage` (autoreply) +- Weekly: `/weekly-dep-audit severity_floor=high` +- Nightly: `/nightly-backup s3://my-backups/hermes/ 30` (or set `remote=local` if you don't care) + +## Scaling ceilings + +| Constraint | Hit at | Fix | +|---|---|---| +| CX22 RAM | ~5–10 concurrent tool calls + LightRAG | Upgrade to CX32 ($12/mo) | +| Gemini Flash free tier | 1500 req/day | Route to Cerebras or add paid quota | +| LightRAG on 2 vCPU | Indexing 10MB+ docs | Move indexing to a spot Modal sandbox | +| Cost budget | $50+/mo | Turn on `prefer_cached: true` + 32K compression trigger | + +## Security note + +Because this box is public-facing, **always** deploy the denylist + require_approval from `cost-optimized.yaml`, and keep your Telegram bot **private** (restrict `allowed_user_ids` to your own ID). Any "public" bot should use a separate token and run in a **quarantine profile** — see [Part 19](../../part19-security-playbook.md) and the [`security-hardened.yaml`](../../templates/config/security-hardened.yaml) template. + +## When to graduate + +- Adding teammates → [Small Agency](./small-agency.md) +- Going offline-first → [Homelab](./homelab.md) +- Wanting a beefy cloud box on-demand → [Road Warrior](./road-warrior.md) diff --git a/docs/wizard/README.md b/docs/wizard/README.md new file mode 100644 index 0000000..fe2ba3a --- /dev/null +++ b/docs/wizard/README.md @@ -0,0 +1,28 @@ +# Hermes Config Wizard + +Static single-page wizard that emits a ready-to-drop `config.yaml` from 8 answers. **Runs entirely in the browser** — nothing is uploaded. + +## Local use + +```bash +# After cloning the guide: +cd docs/wizard +python3 -m http.server 8080 +# then open http://127.0.0.1:8080 +``` + +Or just open [`docs/wizard/index.html`](./index.html) directly — it works from `file://`. + +## Deployment + +Served automatically via GitHub Pages once enabled on this repo. See [ROADMAP.md](../../ROADMAP.md) for Pages setup status. + +## Extending + +Everything lives in one `index.html`. Each form field maps to a case in `generate()`. To add a new persona / option: + +1. Add the `` under the right `
`. +2. Read it in `generate()` via `val()` / `on()` / `select()`. +3. Append YAML `lines.push(...)` blocks where it fits. + +No frameworks, no build step, on purpose. diff --git a/docs/wizard/index.html b/docs/wizard/index.html new file mode 100644 index 0000000..fa7e8c2 --- /dev/null +++ b/docs/wizard/index.html @@ -0,0 +1,446 @@ + + + + + +Hermes Config Wizard + + + + +
+

Hermes Config Wizard

+

+ Answer 8 questions → get a production-ready config.yaml you can drop at ~/.hermes/config.yaml. + Runs entirely in your browser; nothing is uploaded. + Based on the Hermes Optimization Guide. +

+ +
+
+ 1. Who's this deployment for? +

Picks the starting template.

+ + + + + +
+ +
+ 2. Default model +

Can be overridden per-skill.

+ +
+ +
+
+ 3. Memory backend + + + + +
+
+ 4. Gateways + + + + + + +
+
+ +
+ 5. MCP servers to pre-wire +
+ + + + + + +
+
+ +
+ 6. Approval layer + + + +
+ +
+
+ 7. Observability + + + +
+
+ 8. Cron + + + + +
+
+ +
+ + + + +
+
+ +
+ Generated config.yaml +

Save as ~/.hermes/config.yaml. Pair with a ~/.hermes/.env that defines the ${VAR} references. See docs/quickstart.md.

+
(click Generate)
+
+ + +
+ + + + diff --git a/skills/dev/meeting-prep/SKILL.md b/skills/dev/meeting-prep/SKILL.md new file mode 100644 index 0000000..63c1c20 --- /dev/null +++ b/skills/dev/meeting-prep/SKILL.md @@ -0,0 +1,97 @@ +--- +name: meeting-prep +description: Prepare a 1-page brief for an upcoming meeting by combining calendar context, recent threads with attendees, and relevant docs +when_to_use: + - User invokes /meeting-prep for