diff --git a/.changeset/README.md b/.changeset/README.md deleted file mode 100644 index e5b6d8d6a6..0000000000 --- a/.changeset/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Changesets - -Hello and welcome! This folder has been automatically generated by `@changesets/cli`, a build tool that works -with multi-package repos, or single-package repos to help you version and publish your code. You can -find the full documentation for it [in our repository](https://github.com/changesets/changesets) - -We have a quick list of common questions to get you started engaging with this project in -[our documentation](https://github.com/changesets/changesets/blob/main/docs/common-questions.md) diff --git a/.changeset/config.json b/.changeset/config.json deleted file mode 100644 index c8468bd067..0000000000 --- a/.changeset/config.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "$schema": "https://unpkg.com/@changesets/config@3.1.2/schema.json", - "changelog": "@changesets/cli/changelog", - "commit": false, - "fixed": [], - "linked": [ - [ - "@aoagents/ao-core", - "@aoagents/ao-cli", - "@aoagents/ao", - "@aoagents/ao-notifier-macos", - "@aoagents/ao-plugin-runtime-tmux", - "@aoagents/ao-plugin-runtime-process", - "@aoagents/ao-plugin-agent-claude-code", - "@aoagents/ao-plugin-agent-codex", - "@aoagents/ao-plugin-agent-aider", - "@aoagents/ao-plugin-agent-opencode", - "@aoagents/ao-plugin-agent-cursor", - "@aoagents/ao-plugin-agent-kimicode", - "@aoagents/ao-plugin-workspace-worktree", - "@aoagents/ao-plugin-workspace-clone", - "@aoagents/ao-plugin-tracker-github", - "@aoagents/ao-plugin-tracker-linear", - "@aoagents/ao-plugin-tracker-gitlab", - "@aoagents/ao-plugin-scm-github", - "@aoagents/ao-plugin-scm-gitlab", - "@aoagents/ao-plugin-notifier-desktop", - "@aoagents/ao-plugin-notifier-slack", - "@aoagents/ao-plugin-notifier-webhook", - "@aoagents/ao-plugin-notifier-composio", - "@aoagents/ao-plugin-notifier-dashboard", - "@aoagents/ao-plugin-notifier-discord", - "@aoagents/ao-plugin-notifier-openclaw", - "@aoagents/ao-plugin-terminal-iterm2", - "@aoagents/ao-plugin-terminal-web", - "@aoagents/ao-web" - ] - ], - "snapshot": { - "useCalculatedVersion": true, - "prereleaseTemplate": "{tag}-{commit}" - }, - "access": "public", - "baseBranch": "main", - "updateInternalDependencies": "patch", - "ignore": ["@aoagents/ao-integration-tests"] -} diff --git a/.cursor/BUGBOT.md b/.cursor/BUGBOT.md deleted file mode 100644 index 279d57ccab..0000000000 --- a/.cursor/BUGBOT.md +++ /dev/null @@ -1,29 +0,0 @@ -# BugBot Configuration - -## Project Context - -Agent Orchestrator is a TypeScript monorepo for managing parallel AI coding agents. It uses pnpm workspaces with packages under `packages/`. - -## Tech Stack - -- TypeScript (strict mode, ESM with `.js` extensions in imports) -- Node.js 20+ (use `node:` prefix for built-in modules) -- pnpm workspaces -- Next.js 15 (App Router) for web dashboard -- Commander.js for CLI -- vitest for testing - -## Review Focus - -- **Security**: Watch for command injection (especially in shell/tmux/git/PowerShell commands and Windows named-pipe session IDs — `validateSessionId()` should guard those), AppleScript injection, GraphQL injection, unsanitized user input in API routes -- **Shell execution**: Prefer `execFile` over `exec` to avoid shell injection. Flag any use of `exec` or string concatenation in shell commands -- **Plugin pattern**: Plugins must export `{ manifest, create } satisfies PluginModule` with types from `@aoagents/ao-core` -- **Type safety**: Flag `as unknown as T` casts, unguarded `JSON.parse`, and type re-declarations that should import from core -- **Resource leaks**: Check for uncleared intervals/timeouts, uncleaned event listeners, missing `cancel()` on streams -- **ESM compliance**: Imports must use `.js` extension for local files, `node:` prefix for builtins - -## Ignore - -- `packages/web/src/lib/mock-data.ts` — temporary mock data, will be replaced -- `scripts/` — legacy bash scripts, not part of the TypeScript codebase -- `artifacts/` — design documents, not code diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000..3550a30f2d --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.eslintignore b/.eslintignore deleted file mode 100644 index 1d55b4b0a3..0000000000 --- a/.eslintignore +++ /dev/null @@ -1,34 +0,0 @@ -# Exclude unnecessary directories and files from ESLint - -# Dependencies -node_modules/ -**/node_modules/ - -# Build outputs -dist/ -packages/*/dist/ -packages/*/dist-server/ - -# Test files -coverage/ -*.coverage.js -packages/*/coverage/ - -# Configuration files -eslint.config.js -.prettierrc -.prettierignore -tsconfig.json - -# Web/Next.js specific (build artifacts) -packages/web/.next/ -packages/web/next-env.d.ts - -# Documentation -docs/ -*.md - -# Misc -.DS_Store -*.swp -.cache diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index cdd9facea6..0000000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,250 +0,0 @@ -# Copilot Instructions - -Instructions for GitHub Copilot when generating code, reviewing PRs, and suggesting changes in this repository. - -## Project Overview - -Agent Orchestrator (AO) is a TypeScript monorepo that manages fleets of parallel AI coding agents. Each agent gets its own git worktree, branch, and PR. The system handles CI feedback routing, review comment handling, and session lifecycle. - -**Stack:** TypeScript (strict), pnpm monorepo, Next.js 15 + React 19, Tailwind CSS v4, Vitest, ESLint flat config. - -**Architecture:** 8 plugin slots (Runtime, Agent, Workspace, Tracker, SCM, Notifier, Terminal, Lifecycle). All interfaces are defined in `packages/core/src/types.ts`. There is no database; the system uses flat files and memory. - -Full conventions: `CLAUDE.md`. Plugin development: `docs/DEVELOPMENT.md`. Design system: `DESIGN.md`. - ---- - -## Code Generation Rules - -### Think Before Generating - -- If a task is ambiguous, suggest the two most likely interpretations and ask which one applies. Do not choose silently. -- If there is a simpler approach than the one requested, say so. Push back when warranted. -- State assumptions explicitly when generating non-trivial code. - -### Simplicity First - -- No speculative features. No abstractions for single-use code. No "flexibility" that was not requested. -- Plugin slots are the extension point. If the user asks for configurability, consider whether a new plugin slot is the right answer instead. -- If you are generating 200 lines and it could be 50, rewrite it. -- Do not add error handling for impossible scenarios. - -### Match Existing Patterns - -- Before generating new code in an existing file, read how similar features are already implemented in that same file. Match the pattern. -- Do not introduce new patterns when established ones already exist. Search the codebase first. -- Match existing naming conventions, import styles, and file organization. -- Use `@aoagents/ao-core` for cross-package imports. -- Use the `workspace:*` protocol in `package.json`. - -### TypeScript Strict Mode - -- No `any` types unless they are in test files, where `any` and `console.log` are allowed. -- Use `import type { Foo }` for type-only imports. -- Prefix unused variables with `_`. -- Do not use `eval`, `new Function`, or `require()`; use ES module imports. - -### Web / UI Specific - -- Use Tailwind utility classes only. Do not use inline `style=` attributes. -- Use CSS custom properties via `var(--color-*)` from the `globals.css` `@theme` block. Never hardcode hex colors. -- Do not use external UI component libraries such as Radix, shadcn, or Headless UI. -- Preserve the dark theme at all times. -- Border radius must be `0px` everywhere except status dots and avatar circles. Hard edges are part of the visual identity. -- Mark client components with `"use client"`. Use server components for pages. -- SSE updates run at a 5-second interval via the `useSessionEvents` hook. Do not change this interval. -- Keep component files under 400 lines. - ---- - -## PR Review Instructions - -### What to Focus On - -These are the areas where Copilot review adds the most value: issues CI cannot catch. - -**1. Design over implementation.** A perfectly coded bad design is worse than a messy good one. Question: -- Side-channel communication, such as hidden flags or dynamic attribute setting -- Boolean parameters that switch between fundamentally different behaviors and should be separate code paths -- New internal contracts between components without interface documentation -- Missing migration paths for behavioral changes - -**2. Pattern consistency.** If a file uses one pattern and the PR introduces a different one, flag it. Common violations: -- Using class attributes in one place and instance properties in another for the same concept -- Mixing callback styles when the file uses one style consistently -- Introducing a new error-handling pattern when the file uses `throw new Error("msg", { cause: err })` - -**3. State machine safety.** Changes to `SessionStatus`, `ActivityState`, or lifecycle transitions require extra scrutiny: -- Verify that no invalid state transitions are introduced -- Check that `isTerminalSession()` and `TERMINAL_STATUSES` are updated if new statuses are added -- Flag any change that could cause a session to be incorrectly marked `killed` or `exited` - -**4. Plugin interface stability.** Any change to interfaces in `types.ts` is potentially breaking: -- New required methods on plugin interfaces break all existing plugins -- Changed method signatures break all existing plugins -- New optional methods are acceptable -- Flag any non-optional interface change as "breaking — requires updating all N plugins implementing this slot" - -**5. Backward compatibility.** Flag changes to: -- CLI flags or arguments in `packages/cli/` -- Config schema, including `agent-orchestrator.yaml` structure and Zod validation in `packages/core/src/config.ts` -- Exported types from `packages/core/src/index.ts`, which are a stable public API and should not break -- Default config values or behavior - -**6. Plugin isolation.** Plugins must never import each other directly. They communicate through: -- The `Session` object -- The `LifecycleManager` event system -- Core utilities exported from `@aoagents/ao-core` - -**7. Resource cleanup.** Check that: -- File handles, subprocesses, and runtime sessions (tmux on Unix, ConPTY pty-host processes on Windows) are cleaned up on all exit paths: success, error, and early return -- `destroy()` methods exist and use best-effort semantics -- There are no resource leaks in error paths - -**8. Shell safety.** Any command construction must use `shellEscape()` from `@aoagents/ao-core` for all dynamic arguments. Flag raw string interpolation in shell commands. - -### What to Ignore - -These are handled by automated tooling and should not be raised in review: - -- Formatting, whitespace, and trailing commas; Prettier handles them -- Import ordering; ESLint handles it -- Type errors; TypeScript strict mode and CI catch them -- Lint rule violations; ESLint and CI catch them -- Conventional commit format; CI validates it -- Test file style, including `any` types and `console.log`; relaxed rules apply there - -### High-Risk Files - -These files have a wide blast radius and deserve extra scrutiny: - -| File | Why it's risky | -|------|----------------| -| `packages/core/src/types.ts` | All 8 plugin interfaces live here. Changes can break every plugin. | -| `packages/core/src/lifecycle-manager.ts` | State machine and polling loop with subtle state dependencies. | -| `packages/core/src/session-manager.ts` | Session CRUD + stale runtime reconciliation. `list()` persists `runtime_lost` to disk when enrichment detects dead runtimes. Invariant violations can cause phantom `killed` or `exited` sessions. | -| `packages/core/src/lifecycle-state.ts` | Canonical lifecycle → legacy status mapping. New terminal reasons (e.g. `runtime_lost`) must be added to `deriveLegacyStatus()`. | -| `packages/cli/src/commands/start.ts` | ao start/stop + Ctrl+C shutdown. Cross-project scoping logic is subtle — `ao stop ` must not kill parent process. On Windows, also calls `sweepWindowsPtyHosts()` to gracefully tear down detached ConPTY pty-host processes that `taskkill /T` cannot reach. | -| `packages/core/src/config.ts` | Zod validation schema. Changes affect every `ao` command. | -| `packages/core/src/index.ts` | Stable public API. Do not break it without deprecation. | -| `packages/web/src/app/globals.css` | Design tokens used by 50+ components. Renaming tokens breaks the UI. | -| `packages/cli/src/index.ts` | CLI entry point. Flag and argument changes are user-facing. | -| `agent-orchestrator.yaml.example` | Config reference. It must stay in sync with the Zod schema. | - -### Behavioral Rules for Reviews - -1. **If it is worth mentioning, it is worth fixing.** Do not leave "nits" or minor suggestions. Only raise actionable findings with specific remediation. -2. **Reference file paths and line numbers.** Name the specific function, class, or pattern the author should use instead. Do not give generic advice like "consider using a different approach." -3. **Do not suggest refactoring adjacent code that already works.** Review the diff, not the whole file. -4. **Every finding must trace to a specific line in the diff.** If you cannot point to the line, do not raise it. -5. **Do not repeat points.** Each observation should appear exactly once in the review. -6. **Assume competence.** The author knows the codebase. Explain only non-obvious context: why something is risky, not what it does. -7. **For backward-compatible deprecations, provide the specific pattern:** - - TypeScript: `@deprecated` JSDoc, `console.warn`, and preserved old behavior during the deprecation period - - Config: keep the old key working with a warning and add the new key - - CLI: keep the old flag working and add a deprecation notice to `--help` - -### Review Output Format - -Omit sections where you have no findings. Do not write "No concerns" for empty sections. - -Summary -[1-2 sentence overall assessment] - -Architecture & Design -[Pattern violations, design issues, missing abstractions] - -State Machine / Lifecycle -[Any changes to session status, activity state, or transitions] - -Plugin Interface Stability -[Breaking interface changes, new required methods] - -Backward Compatibility -[Breaking changes to CLI, config, or exported APIs] - -Testing -[Missing edge cases, uncovered error paths, test adequacy] - -Security -[Shell injection, credential exposure, input validation] - -Performance -[Unnecessary allocations, missing cleanup, hot path regressions] - ---- - -## Common Patterns to Use - -### Plugin Implementation - -```typescript -import type { PluginModule, Runtime } from "@aoagents/ao-core"; - -export const manifest = { - name: "tmux", - slot: "runtime" as const, - description: "tmux session runtime", - version: "0.1.0", -}; - -export function create(config?: Record): Runtime { - // Validate config here and store it via closure. - return { /* ... */ }; -} - -export function detect(): boolean { - /* ... */ -} - -export default { manifest, create, detect } satisfies PluginModule; -``` - -### Error Handling - -```typescript -// Wrap with cause for debugging. -throw new Error("Failed to create tmux session", { cause: err }); - -// Return null for "not found", throw for unexpected errors. -const issue = await tracker.getIssue("123"); // null if not found -``` - -### Activity Detection - -```typescript -// Always implement the full cascade: -// 1. Process check (exited if not running) -// 2. Actionable states (waiting_input/blocked from JSONL) -// 3. Native signal (agent-specific API) -// 4. JSONL entry fallback (MUST NOT skip — use getActivityFallbackState()) -``` - -### Shell Commands - -```typescript -import { shellEscape } from "@aoagents/ao-core"; - -const cmd = `git checkout ${shellEscape(branchName)}`; -// NEVER: `git checkout ${branchName}` -``` - ---- - -## Common Mistakes to Flag - -- **Cross-plugin imports.** Plugin A importing plugin B directly. It must go through core. -- **Hardcoded secrets.** Use `process.env` and throw if the value is missing. -- **Shell injection.** Dynamic values in shell commands without `shellEscape()`. -- **Missing `setupWorkspaceHooks`.** A new agent plugin without metadata hooks means the dashboard will not show PRs. -- **Skipping JSONL fallback.** An agent plugin's `getActivityState` without `getActivityFallbackState()` means the dashboard shows no activity. -- **New `SessionStatus` without updating `isTerminalSession` / `TERMINAL_STATUSES`.** The session can get stuck in limbo. -- **New session reason without updating `deriveLegacyStatus()`.** Terminal reasons like `runtime_lost` must map to a legacy status (e.g. `killed`), or sessions show wrong status. -- **Scoping `useSessionEvents` with project filter in Dashboard.tsx.** The sidebar must see ALL sessions — only the Kanban filters by project (client-side via `projectSessions`). -- **ao stop killing parent process when targeting a specific project.** `ao stop ` must only kill that project's sessions, not the parent `ao start` process or dashboard. -- **CSS color hardcoding.** Using `#hex` or `rgb()` instead of `var(--color-*)` tokens. -- **Rounded corners.** Using `rounded-md` or `rounded-lg` on cards or buttons. Hard edges only. -- **External UI libraries.** Importing from Radix, shadcn, or Headless UI. Use native HTML and Tailwind. -- **SSE interval changes.** Modifying the 5-second polling interval in `useSessionEvents`. -- **Inline styles.** Using `style={{ ... }}` for theme values. Use Tailwind with `var(--token)` or a CSS class instead. -- **New `package.json` dependencies without justification.** The monorepo should stay lean. diff --git a/.github/scripts/coverage-report.mjs b/.github/scripts/coverage-report.mjs deleted file mode 100644 index 03a784763c..0000000000 --- a/.github/scripts/coverage-report.mjs +++ /dev/null @@ -1,172 +0,0 @@ -/** - * Parses Vitest JSON coverage reports, filters to PR-changed files, - * and writes a Markdown summary to coverage-comment.md. - * - * Expects: - * - changed-files.txt in cwd (one relative path per line) - * - coverage-final.json in each package's coverage/ directory - * - */ - -/* eslint-disable no-undef -- Node.js CI script; process/console are globals */ -import { readFileSync, writeFileSync, existsSync, realpathSync, readdirSync } from "node:fs"; -import { resolve, relative } from "node:path"; -import libCoverage from "istanbul-lib-coverage"; - -const COMMENT_TAG = ""; -const cwd = realpathSync(process.cwd()); - -// ── 1. Read changed files ────────────────────────────────────────── -const changedFiles = readFileSync("changed-files.txt", "utf-8") - .split("\n") - .map((f) => f.trim()) - .filter((f) => f && (f.endsWith(".ts") || f.endsWith(".tsx"))) - .filter((f) => !f.includes("__tests__") && !f.includes(".test.")); - -if (changedFiles.length === 0) { - const comment = `${COMMENT_TAG}\n## Test Coverage Report\n\n_No TypeScript source files changed in this PR._\n`; - writeFileSync("coverage-comment.md", comment); - process.exit(0); -} - -// ── 2. Discover and merge coverage-final.json files ──────────────── -function findCoverageFiles(baseDir) { - const results = []; - const packagesDir = resolve(baseDir, "packages"); - - function walk(dir) { - let entries; - try { - entries = readdirSync(dir, { withFileTypes: true }); - } catch { - return; - } - for (const entry of entries) { - if (entry.name === "node_modules") continue; - const full = resolve(dir, entry.name); - if (entry.isDirectory()) { - if (entry.name === "coverage") { - const jsonFile = resolve(full, "coverage-final.json"); - if (existsSync(jsonFile)) results.push(jsonFile); - } else { - walk(full); - } - } - } - } - - walk(packagesDir); - return results; -} - -const coverageMap = libCoverage.createCoverageMap({}); - -for (const jsonPath of findCoverageFiles(cwd)) { - const raw = JSON.parse(readFileSync(jsonPath, "utf-8")); - coverageMap.merge(raw); -} - -// ── 3. Filter to changed files and collect metrics ───────────────── -let totalLines = 0; -let coveredLines = 0; -const fileReports = []; - -for (const absPath of coverageMap.files()) { - // Normalize to handle symlinks (e.g. /tmp -> /private/tmp on macOS) - const realAbsPath = existsSync(absPath) ? realpathSync(absPath) : absPath; - const relPath = relative(cwd, realAbsPath); - - if (!changedFiles.includes(relPath)) continue; - - const fc = coverageMap.fileCoverageFor(absPath); - const summary = fc.toSummary(); - const lineCoverage = fc.getLineCoverage(); - - const fileTotalLines = summary.lines.total; - const fileCoveredLines = summary.lines.covered; - const uncoveredLineNums = Object.entries(lineCoverage) - .filter(([, hits]) => hits === 0) - .map(([line]) => Number(line)) - .sort((a, b) => a - b); - - totalLines += fileTotalLines; - coveredLines += fileCoveredLines; - - if (fileTotalLines > 0) { - fileReports.push({ - path: relPath, - total: fileTotalLines, - covered: fileCoveredLines, - pct: summary.lines.pct.toFixed(1), - uncoveredLines: uncoveredLineNums, - }); - } -} - -// ── 4. Build Markdown comment ────────────────────────────────────── - -/** Collapse consecutive line numbers into ranges: [1,2,3,7,9,10] -> "L1-L3, L7, L9-L10" */ -function consolidateRanges(lines) { - if (lines.length === 0) return ""; - const ranges = []; - let start = lines[0]; - let end = lines[0]; - - for (let i = 1; i < lines.length; i++) { - if (lines[i] === end + 1) { - end = lines[i]; - } else { - ranges.push(start === end ? `L${start}` : `L${start}-L${end}`); - start = lines[i]; - end = lines[i]; - } - } - ranges.push(start === end ? `L${start}` : `L${start}-L${end}`); - return ranges.join(", "); -} - -let comment = `${COMMENT_TAG}\n## Test Coverage Report\n\n`; - -if (fileReports.length === 0) { - comment += - "_Changed files have no coverage data (not instrumented or no tests ran)._\n"; -} else { - const pct = - totalLines > 0 ? ((coveredLines / totalLines) * 100).toFixed(1) : "0.0"; - const uncoveredTotal = totalLines - coveredLines; - - comment += "| Metric | Value |\n"; - comment += "|--------|-------|\n"; - comment += `| Lines covered | ${coveredLines}/${totalLines} |\n`; - comment += `| Lines not covered | ${uncoveredTotal}/${totalLines} |\n`; - comment += `| Overall coverage | ${pct}% |\n\n`; - - // Per-file breakdown - if (fileReports.length > 1) { - comment += "
\nPer-file breakdown\n\n"; - comment += "| File | Coverage |\n"; - comment += "|------|----------|\n"; - for (const f of fileReports.sort((a, b) => a.path.localeCompare(b.path))) { - comment += `| \`${f.path}\` | ${f.covered}/${f.total} (${f.pct}%) |\n`; - } - comment += "\n
\n\n"; - } - - // Uncovered lines section - const filesWithUncovered = fileReports.filter( - (f) => f.uncoveredLines.length > 0, - ); - if (filesWithUncovered.length > 0) { - comment += "### Uncovered lines\n\n"; - for (const file of filesWithUncovered.sort((a, b) => - a.path.localeCompare(b.path), - )) { - const ranges = consolidateRanges(file.uncoveredLines); - comment += `- \`${file.path}\`: ${ranges}\n`; - } - comment += "\n"; - } - -} - -writeFileSync("coverage-comment.md", comment); diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml deleted file mode 100644 index 5399239d97..0000000000 --- a/.github/workflows/canary.yml +++ /dev/null @@ -1,192 +0,0 @@ -name: Canary - -# Two-stage release pipeline (nightly canary side). -# -# Nightly canary creates snapshot versions, tags, and a GitHub prerelease. -# npm publishing is handled by a private cron job (AO) that polls GitHub -# releases and publishes when a new prerelease tag is ahead of the current -# npm nightly version. -# -# No NPM_TOKEN or publisher dispatch secrets are needed in this repo. -# The only secret used is GITHUB_TOKEN (automatic). -# -# Cron schedule: 23:30 IST = 18:00 UTC, on Fri,Sat,Sun,Mon,Tue -# (DOW 5,6,0,1,2). Wed/Thu are the bake window — no scheduled publishes. -# `workflow_dispatch` lets the release captain re-cut a nightly during -# bake when a fix lands and the Discord cohort should test the patched -# candidate. - -on: - schedule: - - cron: "0 18 * * 5,6,0,1,2" - workflow_dispatch: - -concurrency: - group: canary - cancel-in-progress: false - -permissions: - contents: write - -jobs: - canary: - name: Publish canary - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - ref: main - fetch-depth: 0 - - # Skip-if-unchanged guard: if the most recent prerelease in this - # repo was cut from the current tip of main, there's nothing new - # to publish — short-circuit the whole job. Prevents republishing - # the same SHA on every cron tick during quiet stretches. - # - # We filter to prereleases only (`isPrerelease == true`). If the - # most recent release were a stable cut from `release.yml`, an - # explicit `workflow_dispatch` nightly right after the stable - # release would otherwise be suppressed — which we don't want. - # - # The previous-nightly tag points at the snapshot commit created - # by the "Commit snapshot version bumps" step below. The snapshot - # commit's first parent is the source main HEAD, so `${LAST_TAG}^` - # is the right anchor to compare against `GITHUB_SHA` (main HEAD - # at this run). - # - # `workflow_dispatch` always proceeds — recovery path for partial - # failures. A human triggered the run, they want it to run. - - name: Check if main has new commits since the last nightly - id: check - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - if [ "$GITHUB_EVENT_NAME" = 'workflow_dispatch' ]; then - echo 'Manual trigger via workflow_dispatch — bypassing skip guard.' - echo 'skip=false' >> "$GITHUB_OUTPUT" - exit 0 - fi - - LAST_TAG=$(gh release list --json tagName,isPrerelease --jq '[.[] | select(.isPrerelease)][0].tagName // empty') - LAST_SHA="" - if [ -n "$LAST_TAG" ]; then - LAST_SHA=$(git rev-parse "${LAST_TAG}^" 2>/dev/null || echo "") - fi - if [ -n "$LAST_SHA" ] && [ "$LAST_SHA" = "$GITHUB_SHA" ]; then - echo "Last prerelease ($LAST_TAG) was cut from $GITHUB_SHA — skipping." - echo "skip=true" >> "$GITHUB_OUTPUT" - else - echo "skip=false" >> "$GITHUB_OUTPUT" - fi - - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - if: steps.check.outputs.skip != 'true' - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - if: steps.check.outputs.skip != 'true' - with: - node-version: 20 - cache: pnpm - # No `registry-url`: this workflow does not publish to npm. - - - if: steps.check.outputs.skip != 'true' - run: echo "HUSKY=0" >> $GITHUB_ENV - - - if: steps.check.outputs.skip != 'true' - run: pnpm install --frozen-lockfile - - - if: steps.check.outputs.skip != 'true' - run: pnpm -r build - - # Pre-publish guard: same as release.yml — catches workspace:* deps - # on private packages before they'd silently break a published - # install. Still runs here so the public repo blocks a malformed - # snapshot before it reaches npm. - - if: steps.check.outputs.skip != 'true' - run: node scripts/check-publishable-deps.mjs - - - name: Create snapshot versions - if: steps.check.outputs.skip != 'true' - run: | - # If no changesets exist (e.g. right after a Version Packages merge), - # create a minimal one. `changeset version --snapshot` consumes and - # deletes it, so no cleanup is needed. - if [ -z "$(ls .changeset/*.md 2>/dev/null | grep -vi 'README')" ]; then - node << 'SCRIPT' - const fs = require('fs'); - const pkgs = []; - - const addPackage = (m) => { - if (!fs.existsSync(m)) return; - const d = JSON.parse(fs.readFileSync(m, 'utf8')); - if (!d.private && d.publishConfig?.access === 'public') pkgs.push(d.name); - }; - - const scanPackages = (dir) => { - if (!fs.existsSync(dir)) return; - for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { - if (entry.isDirectory()) addPackage(`${dir}/${entry.name}/package.json`); - } - }; - - scanPackages('packages'); - scanPackages('packages/plugins'); - - let body = '---\n'; - pkgs.forEach(p => body += '"' + p + '": patch\n'); - body += '---\n\nchore: canary build\n'; - fs.writeFileSync('.changeset/canary-temp.md', body); - SCRIPT - fi - pnpm changeset version --snapshot nightly - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - # `pnpm changeset version --snapshot` modifies package.json files - # on disk but does NOT create a git commit. Without this commit, - # the umbrella tag we push below would point at the pre-snapshot - # HEAD, so when the npm publisher checks out the tag it would see - # the un-bumped versions and either publish wrong version numbers - # or fail trying to republish an existing version. - # - # The commit is NEVER pushed to main — only the tag below is - # pushed, and the snapshot commit travels with it as part of the - # tag's reachable history. `[skip ci]` is defensive in case any - # future change ever pushes this commit to a branch. - - name: Commit snapshot version bumps - if: steps.check.outputs.skip != 'true' - run: | - git config user.name 'github-actions[bot]' - git config user.email '41898282+github-actions[bot]@users.noreply.github.com' - git add packages/*/package.json packages/plugins/*/package.json .changeset/ - git diff --cached --quiet || git commit -m 'chore: snapshot version bump [skip ci]' - - # Push the umbrella `vX.Y.Z` tag pointing at the snapshot commit. - # The npm publisher resolves this tag to the snapshot commit with - # the bumped package.json versions. - # - # We deliberately skip `pnpm changeset tag`: it would create one - # tag per publishable package (~27 here) every night, which adds - # up fast on the nightly cadence. The npm publisher only consumes - # the umbrella tag, so the per-package tags are pure decoration. - - name: Tag snapshot versions - id: tag - if: steps.check.outputs.skip != 'true' - run: | - version=$(node -p "require('./packages/ao/package.json').version") - git tag "v$version" - git push origin "v$version" - echo "version=$version" >> "$GITHUB_OUTPUT" - - # Public-facing GitHub prerelease. `--prerelease` flags it as such - # so consumers and tooling that filter by stability stay correct. - # No `--target`: the tag was just pushed and GitHub resolves the - # commitish from it (avoids the race where another commit lands - # on main between the tag push and this step). - - name: Create GitHub prerelease - if: steps.check.outputs.skip != 'true' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh release create "v${{ steps.tag.outputs.version }}" \ - --prerelease \ - --generate-notes diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index e7d2d2030f..0000000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,103 +0,0 @@ -name: CI - -on: - push: - branches: [main] - pull_request: - branches: [main] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -# Minimal token scope — all jobs only checkout, install, build, and test. -# None of them push code, comment on PRs, or call mutating GitHub APIs. -permissions: - contents: read - -jobs: - lint: - name: Lint - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - - run: pnpm install --frozen-lockfile - - run: pnpm lint - - typecheck: - name: Typecheck - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - - run: pnpm install --frozen-lockfile - # Build all non-web packages - - run: pnpm -r --filter "!@aoagents/ao-web" build - # Typecheck all non-web packages - - run: pnpm -r --filter "!@aoagents/ao-web" typecheck - # Build web (Next.js build includes its own typecheck) - - run: pnpm --filter @aoagents/ao-web build - - test: - name: Test (${{ matrix.os }}) - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - - run: pnpm install --frozen-lockfile - - run: pnpm -r --filter "!@aoagents/ao-web" build - # Verify node-pty's Windows prebuild loads cleanly before any test that - # depends on it. A broken prebuild fails this step in seconds with a - # clear "node-pty" stack rather than a buried integration-test failure. - - name: Verify node-pty prebuild (Windows) - if: runner.os == 'Windows' - working-directory: packages/plugins/runtime-process - run: node -e "const p=require('node-pty');const t=p.spawn('cmd.exe',['/c','exit'],{cols:80,rows:24});t.onExit(({exitCode})=>process.exit(exitCode));setTimeout(()=>process.exit(2),5000)" - - run: pnpm test - - test-web: - name: Test Web (${{ matrix.os }}) - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - # tmux is the Linux/macOS terminal runtime backing direct-terminal-ws - # integration tests. Windows uses runtime-process + named pipes (covered - # by mux-websocket-windows.test.ts) — those tmux tests self-skip. - - name: Install tmux - if: runner.os == 'Linux' - run: sudo apt-get update && sudo apt-get install -y tmux - - name: Start tmux server - if: runner.os == 'Linux' - run: tmux start-server - - run: pnpm install --frozen-lockfile - - run: pnpm -r --filter "!@aoagents/ao-web" build - # Full web suite — components, hooks, libs, app routes, and server tests. - # Previously this job was scoped to server/__tests__/ only; broadening it - # closes a long-standing coverage gap on both Linux and Windows. - - run: pnpm --filter @aoagents/ao-web test diff --git a/.github/workflows/cli-e2e.yml b/.github/workflows/cli-e2e.yml new file mode 100644 index 0000000000..3073843283 --- /dev/null +++ b/.github/workflows/cli-e2e.yml @@ -0,0 +1,55 @@ +name: CLI E2E + +on: + push: + branches: [main] + pull_request: + paths: + - "backend/**" + - "test/cli/**" + - ".github/workflows/cli-e2e.yml" + +permissions: + contents: read + +jobs: + # Primary tier: the cross-platform Go E2E suite (build tag `e2e`) runs the real + # `ao` binary against isolated state on every OS GitHub hosts. These runners + # are the "VMs" — the only place that exercises the OS-specific process-detach + # paths (unix Setsid vs Windows CREATE_NEW_PROCESS_GROUP) and os.UserConfigDir + # resolution. The suite builds its own binary and self-allocates a free port. + native: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + defaults: + run: + working-directory: backend + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version: "1.25" + cache: false + + - name: CLI E2E (native) + run: go test -tags e2e -v ./internal/cli/... + + # Secondary hardening tier: prove that a freshly installed binary works on a + # clean machine with no Go toolchain and no developer state. The Dockerfile + # installs `ao` on PATH in a slim image and runs test/cli/install-check.sh. + # --init gives a real PID-1 reaper so the daemon the check starts is reaped + # after `stop` instead of lingering as a zombie. + container: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Build fresh-install image + run: docker build -f test/cli/Dockerfile -t ao-cli-smoke . + + - name: Fresh-install check (container) + run: docker run --rm --init ao-cli-smoke diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index f9898a7fb1..0000000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,119 +0,0 @@ -name: PR Coverage Report - -on: - pull_request: - branches: [main] - -concurrency: - group: coverage-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - pull-requests: write - -jobs: - coverage: - name: Coverage Report - runs-on: ubuntu-latest - # Non-blocking: never prevent merging even if this job fails - continue-on-error: true - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 - - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - - - name: Install tmux - run: sudo apt-get update && sudo apt-get install -y tmux - - - name: Start tmux server - run: tmux start-server - - - run: pnpm install --frozen-lockfile - - - name: Build packages - run: pnpm -r --filter '!@aoagents/ao-web' build - - - name: Get changed files - env: - BASE_REF: ${{ github.base_ref }} - run: | - git diff --name-only origin/$BASE_REF...HEAD -- '*.ts' '*.tsx' > changed-files.txt - echo "Changed TS files:" - cat changed-files.txt - - - name: Run tests with coverage - continue-on-error: true - run: | - CHANGED=$(cat changed-files.txt) - - # Only run coverage for packages that have changed files - if echo "$CHANGED" | grep -q '^packages/core/'; then - echo "::group::Core tests" - pnpm --filter @aoagents/ao-core exec vitest run --coverage.enabled --coverage.reporter=json || true - echo "::endgroup::" - fi - - if echo "$CHANGED" | grep -q '^packages/cli/'; then - echo "::group::CLI tests" - pnpm --filter @aoagents/ao-cli exec vitest run --coverage.enabled --coverage.reporter=json || true - echo "::endgroup::" - fi - - if echo "$CHANGED" | grep -q '^packages/web/'; then - echo "::group::Web tests" - pnpm --filter @aoagents/ao-web exec vitest run --coverage.enabled --coverage.reporter=json || true - echo "::endgroup::" - fi - - if echo "$CHANGED" | grep -q '^packages/plugins/'; then - # Run coverage for each changed plugin - for plugin_dir in $(echo "$CHANGED" | grep '^packages/plugins/' | cut -d/ -f1-3 | sort -u); do - if [ -f "$plugin_dir/package.json" ]; then - pkg_name=$(jq -r .name "$plugin_dir/package.json") - echo "::group::$pkg_name tests" - pnpm --filter "$pkg_name" exec vitest run --coverage.enabled --coverage.reporter=json || true - echo "::endgroup::" - fi - done - fi - - - name: Generate coverage report - run: node .github/scripts/coverage-report.mjs - - - name: Post or update PR comment - # Fork PR tokens lack comment permissions — don't fail the job - continue-on-error: true - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - PR_NUMBER=${{ github.event.pull_request.number }} - REPO=${{ github.repository }} - COMMENT_TAG="" - - # Find existing coverage comment - COMMENT_ID=$(gh api "repos/$REPO/issues/$PR_NUMBER/comments" \ - --paginate --jq ".[] | select(.body | startswith(\"$COMMENT_TAG\")) | .id" \ - | tail -1) - - if [ -n "$COMMENT_ID" ]; then - # Update may fail on fork PRs (403) — fall back to creating a new comment - if gh api "repos/$REPO/issues/comments/$COMMENT_ID" \ - -X PATCH \ - -F body=@coverage-comment.md 2>/dev/null; then - echo "Updated existing comment $COMMENT_ID" - else - echo "Could not update comment $COMMENT_ID, creating new one" - gh pr comment "$PR_NUMBER" --body-file coverage-comment.md - echo "Created new comment" - fi - else - gh pr comment "$PR_NUMBER" --body-file coverage-comment.md - echo "Created new comment" - fi diff --git a/.github/workflows/deploy-vps.yml b/.github/workflows/deploy-vps.yml deleted file mode 100644 index 7773266249..0000000000 --- a/.github/workflows/deploy-vps.yml +++ /dev/null @@ -1,110 +0,0 @@ -name: Deploy to VPS - -on: - workflow_dispatch: - inputs: - deploy_sha: - description: Optional exact commit SHA to deploy. Defaults to the head of the dispatched ref. - required: false - type: string - workflow_run: - workflows: [CI] - types: [completed] - branches: [main] - -concurrency: - group: deploy-vps - cancel-in-progress: false - -jobs: - deploy: - if: >- - github.event_name == 'workflow_dispatch' || - ( - github.event.workflow_run.conclusion == 'success' && - github.event.workflow_run.event == 'push' && - github.event.workflow_run.head_branch == 'main' - ) - runs-on: ubuntu-latest - steps: - - name: Resolve deploy target - id: resolve_deploy - env: - GH_TOKEN: ${{ github.token }} - DEPLOY_SHA_INPUT: ${{ inputs.deploy_sha }} - run: | - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - DEPLOY_SHA="$DEPLOY_SHA_INPUT" - if [ -z "$DEPLOY_SHA" ]; then - DEPLOY_SHA="${GITHUB_SHA}" - fi - if ! printf '%s' "$DEPLOY_SHA" | grep -Eq '^[0-9a-f]{40}$'; then - echo "Invalid deploy_sha: expected a 40-character lowercase hex SHA, got '$DEPLOY_SHA'." >&2 - exit 1 - fi - echo "Manual dispatch: deploying SHA $DEPLOY_SHA from ref ${GITHUB_REF_NAME}." - echo "deploy_sha=$DEPLOY_SHA" >> "$GITHUB_OUTPUT" - echo "fetch_ref=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT" - echo "should_deploy=true" >> "$GITHUB_OUTPUT" - exit 0 - fi - - DEPLOY_SHA="${{ github.event.workflow_run.head_sha }}" - MAIN_SHA=$(gh api repos/${{ github.repository }}/git/ref/heads/main --jq '.object.sha') - if [ "$DEPLOY_SHA" != "$MAIN_SHA" ]; then - echo "Skipping: CI SHA $DEPLOY_SHA is not the current tip of main ($MAIN_SHA). Likely a stale rerun." - echo "deploy_sha=$DEPLOY_SHA" >> "$GITHUB_OUTPUT" - echo "fetch_ref=main" >> "$GITHUB_OUTPUT" - echo "should_deploy=false" >> "$GITHUB_OUTPUT" - exit 0 - fi - echo "SHA verified: $DEPLOY_SHA is the current tip of main." - echo "deploy_sha=$DEPLOY_SHA" >> "$GITHUB_OUTPUT" - echo "fetch_ref=main" >> "$GITHUB_OUTPUT" - echo "should_deploy=true" >> "$GITHUB_OUTPUT" - - - name: Require VPS host fingerprint - env: - VPS_HOST_FINGERPRINT: ${{ secrets.VPS_HOST_FINGERPRINT }} - run: | - set -e - if [ -z "$VPS_HOST_FINGERPRINT" ]; then - echo "VPS_HOST_FINGERPRINT secret is required for host key verification." >&2 - exit 1 - fi - - - name: Deploy via SSH - if: steps.resolve_deploy.outputs.should_deploy == 'true' - env: - DEPLOY_SHA: ${{ steps.resolve_deploy.outputs.deploy_sha }} - FETCH_REF: ${{ steps.resolve_deploy.outputs.fetch_ref }} - uses: appleboy/ssh-action@2ead5e36573f08b82fbfce1504f1a4b05a647c6f - with: - host: ${{ secrets.VPS_HOST }} - username: aoagent - key: ${{ secrets.VPS_SSH_KEY }} - fingerprint: ${{ secrets.VPS_HOST_FINGERPRINT }} - envs: DEPLOY_SHA,FETCH_REF - script: | - set -e - cd /home/aoagent/agent-orchestrator - echo "==> Ensuring full worktree..." - if [ "$(git config --bool core.sparseCheckout || echo false)" = "true" ]; then - git sparse-checkout disable - fi - echo "==> Fetching latest changes for $FETCH_REF..." - git fetch origin "$FETCH_REF" - echo "==> Current: $(git rev-parse --short HEAD)" - echo "==> Deploying target SHA: ${DEPLOY_SHA:0:7}" - git checkout --force "$DEPLOY_SHA" - echo "==> Updated: $(git rev-parse --short HEAD)" - test -f packages/web/server/start-all.ts - echo "==> Installing dependencies..." - pnpm install --frozen-lockfile - echo "==> Building..." - pnpm build - echo "==> Release requirement: AO_ALLOW_FILESYSTEM_BROWSE=1 must be present in the ao web runtime for the multi-project add-project browser." - echo "==> Restarting services..." - pm2 restart ao --update-env - pm2 restart openclaw-gateway --update-env || true - echo "==> Deploy complete: $(git rev-parse --short HEAD)" diff --git a/.github/workflows/desktop-testing.yml b/.github/workflows/desktop-testing.yml new file mode 100644 index 0000000000..9b20be96d8 --- /dev/null +++ b/.github/workflows/desktop-testing.yml @@ -0,0 +1,79 @@ +name: Desktop testing build + +# Builds UNSIGNED desktop artifacts on a `0.0.0-testing-` tag and attaches +# them to a GitHub prerelease, so the packaging pipeline can be exercised +# end-to-end before any signing/notarization secrets exist. +# +# Per OS the current electron-forge makers produce: +# - macOS → .zip (the .dmg maker is a follow-up) +# - Windows → NSIS installer (.exe) +# - Linux → .deb and .rpm +# +# Each OS builds on its own native runner because build-daemon.mjs compiles the +# bundled `ao` daemon for the build host's platform; cross-OS packaging would +# ship the wrong daemon (issues #235/#256). The macOS runner is arm64, so the +# macOS artifact is arm64-only until per-arch builds are wired. +# +# Signing is intentionally OFF (no CSC_LINK / APPLE_ID / Windows cert), so these +# builds do NOT pass Gatekeeper/SmartScreen. They are for pipeline validation, +# not distribution. + +# Disabled: the Linux-only `linux-testing-build.yml` owns the 0.0.0-testing-* tag +# for now. Re-enable by restoring the `push.tags` trigger below when macOS/Windows +# testing builds are wanted again. +on: + workflow_dispatch: + # push: + # tags: + # - "0.0.0-testing-*" + +jobs: + build: + strategy: + fail-fast: false + matrix: + os: [macos-latest, windows-latest, ubuntu-latest] + runs-on: ${{ matrix.os }} + permissions: + contents: write + defaults: + run: + working-directory: frontend + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + cache-dependency-path: frontend/package-lock.json + # The daemon is compiled by build-daemon.mjs during premake, so the Go + # toolchain must be present and pinned on every runner. + - uses: actions/setup-go@v5 + with: + go-version-file: backend/go.mod + cache-dependency-path: backend/go.sum + # The Linux rpm maker needs rpmbuild, which ubuntu-latest does not ship. + - name: Install rpm tooling (Linux) + if: runner.os == 'Linux' + run: sudo apt-get update && sudo apt-get install -y rpm + - run: npm ci + - name: Build artifacts (unsigned) + run: npm run make + - name: Publish artifacts to the tag's GitHub release + shell: bash + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TAG: ${{ github.ref_name }} + run: | + set -euo pipefail + # Create the prerelease once. Parallel matrix jobs race here, so a + # second job's "already exists" failure is expected and ignored. + gh release create "$TAG" --prerelease --title "$TAG" \ + --notes "Unsigned desktop testing build (pipeline validation only — not signed or notarized)." \ + || true + # Upload every maker output. NUL-delimited to survive spaces in the + # app name ("Agent Orchestrator-..."); --clobber makes re-runs idempotent. + find out/make -type f -print0 | while IFS= read -r -d '' f; do + echo "uploading: $f" + gh release upload "$TAG" "$f" --clobber + done diff --git a/.github/workflows/frontend-release.yml b/.github/workflows/frontend-release.yml new file mode 100644 index 0000000000..1ab4012faf --- /dev/null +++ b/.github/workflows/frontend-release.yml @@ -0,0 +1,59 @@ +name: Desktop release + +# Builds and publishes the Electron desktop app via electron-forge. +# Generates a GitHub Release (draft) with installers + update manifests. +# Triggered by a `desktop-v*` tag or manually. +# +# Each target OS builds on its own runner so the bundled `ao` daemon is compiled +# natively for that platform. build-daemon.mjs keys the binary off the build +# host's platform, so cross-OS packaging (e.g. building the Windows installer on +# macOS) would ship a non-Windows binary named `ao` and the app could not launch +# the daemon (issues #235/#256). The per-OS matrix keeps host == target. +# +# ⚠️ Until macOS code signing + notarization secrets are configured (see +# frontend/docs/desktop-release.md), published builds are UNSIGNED and will +# NOT auto-update on macOS. The workflow still produces installable artifacts. + +on: + push: + tags: + - "desktop-v*" + workflow_dispatch: + +jobs: + release: + strategy: + fail-fast: false + matrix: + os: [macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + permissions: + contents: write + defaults: + run: + working-directory: frontend + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + cache-dependency-path: frontend/package-lock.json + # The daemon is compiled by build-daemon.mjs during prepackage/premake, so + # the Go toolchain must be present and pinned on every runner. + - uses: actions/setup-go@v5 + with: + go-version-file: backend/go.mod + cache-dependency-path: backend/go.sum + - run: npm ci + - name: Publish + run: npm run publish + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # macOS signing + notarization — add as repository secrets and + # set osxSign/osxNotarize in forge.config.ts to enable. + CSC_LINK: ${{ secrets.CSC_LINK }} + CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }} + APPLE_ID: ${{ secrets.APPLE_ID }} + APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml new file mode 100644 index 0000000000..e5c1de4602 --- /dev/null +++ b/.github/workflows/frontend.yml @@ -0,0 +1,41 @@ +name: Frontend + +# Runs the renderer vitest suite. This suite was silently dead for months +# because no workflow executed it (vitest only auto-loads vite.config.ts / +# vitest.config.ts, and the repo had neither until #171) — this job is the +# guard against that happening again. +# +# Typecheck is intentionally NOT run here yet: forge.config.ts and +# update-electron-app carry pre-existing type errors. Add `npm run typecheck` +# once those are fixed. + +on: + push: + branches: [main] + pull_request: + paths: + - "frontend/**" + - ".github/workflows/frontend.yml" + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + defaults: + run: + working-directory: frontend + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + cache-dependency-path: frontend/package-lock.json + + - run: npm ci + + - name: Run vitest suite + run: npx vitest run diff --git a/.github/workflows/gitleaks.yml b/.github/workflows/gitleaks.yml new file mode 100644 index 0000000000..15c70781d1 --- /dev/null +++ b/.github/workflows/gitleaks.yml @@ -0,0 +1,22 @@ +name: gitleaks + +on: + push: + branches: [main] + pull_request: + +permissions: + contents: read + +jobs: + scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # gitleaks-action v1 scans for committed secrets and needs no license + # key (v2 requires GITLEAKS_LICENSE for organization repos). + - name: Scan for secrets + uses: zricethezav/gitleaks-action@v1.6.0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml new file mode 100644 index 0000000000..da49d50428 --- /dev/null +++ b/.github/workflows/go.yml @@ -0,0 +1,97 @@ +name: Go + +on: + push: + branches: [main] + pull_request: + paths: + - "backend/**" + - "frontend/src/api/schema.ts" + - "package.json" + - ".github/workflows/go.yml" + +permissions: + contents: read + +jobs: + build-test: + runs-on: ubuntu-latest + defaults: + run: + working-directory: backend + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + # Read the version from go.mod so CI can't drift from the module + # (it previously pinned 1.22 while go.mod declared 1.25). + go-version-file: backend/go.mod + cache: false + + - name: Check formatting + run: | + unformatted=$(gofmt -l .) + if [ -n "$unformatted" ]; then + echo "These files need gofmt:" + echo "$unformatted" + exit 1 + fi + + - name: Build + run: go build ./... + + - name: Vet + run: go vet ./... + + - name: Test + run: go test -race ./... + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: backend/go.mod + cache: false + + - name: golangci-lint + # v8 of the action drives golangci-lint v2 (the schema this config uses); + # the v6 action speaks v1 CLI flags and errors against a v2 binary. + uses: golangci/golangci-lint-action@v8 + with: + # Pinned for reproducibility: bump intentionally rather than letting an + # upstream release change CI. Must be built with Go >= the module's + # (go.mod is 1.25); v2.12.2 is built with go1.25 — older v2 tags + # (e.g. v2.1.x) are built with go1.24 and refuse to analyze 1.25 code. + version: v2.12.2 + working-directory: backend + # Blocking on the full ruleset: the tree is clean at zero findings, so + # any new issue fails CI rather than being grandfathered. + + api-drift: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: backend/go.mod + cache: false + + - uses: actions/setup-node@v4 + with: + node-version: "24" + + - name: Install dependencies + run: npm ci + + - name: Regenerate API spec and TS types + run: npm run api + + # openapi.yaml drift is already caught by TestBuild_MatchesEmbedded in + # the build-test job (go test -race ./...). Only schema.ts needs checking here. + - name: Check for schema.ts drift + run: git diff --exit-code -- frontend/src/api/schema.ts diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml deleted file mode 100644 index 4c11a164d6..0000000000 --- a/.github/workflows/integration-tests.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: Integration Tests - -on: - push: - branches: [main] - pull_request: - branches: [main] - workflow_dispatch: # allow manual runs - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - integration: - name: Integration Tests - runs-on: ubuntu-latest - timeout-minutes: 20 - - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - - # --- Install tmux --- - - name: Install tmux - run: sudo apt-get update && sudo apt-get install -y tmux - - # --- Start tmux server --- - - name: Start tmux server - run: tmux start-server - - # --- Install agent binaries --- - - name: Install Claude Code - run: npm install -g @anthropic-ai/claude-code - - - name: Install Codex - run: npm install -g @openai/codex - - - name: Install Aider - run: pip install aider-chat - - - name: Install OpenCode - run: npm install -g opencode-ai - - # --- Build project --- - - run: pnpm install --frozen-lockfile - - run: pnpm -r --filter '!@aoagents/ao-web' build - - # --- Run integration tests --- - - name: Run integration tests - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - LINEAR_API_KEY: ${{ secrets.LINEAR_API_KEY }} - LINEAR_TEAM_ID: ${{ secrets.LINEAR_TEAM_ID }} - # Note: COMPOSIO_API_KEY is intentionally not passed here. - # When both keys are set, the plugin prefers the Composio transport - # which requires @aoagents/core SDK installed. The direct LINEAR_API_KEY - # transport needs no extra dependencies. - run: pnpm test:integration diff --git a/.github/workflows/onboarding-test.yml b/.github/workflows/onboarding-test.yml deleted file mode 100644 index ba6561b426..0000000000 --- a/.github/workflows/onboarding-test.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: Onboarding Integration Test - -on: - pull_request: - paths: - - 'packages/**' - - 'scripts/setup.sh' - - 'tests/integration/**' - - '.github/workflows/onboarding-test.yml' - push: - branches: - - main - -jobs: - onboarding-test: - name: Test Fresh Onboarding - runs-on: ubuntu-latest - timeout-minutes: 15 - - steps: - - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 - - - name: Build test image - working-directory: tests/integration - run: docker compose build - - - name: Run onboarding test - id: test - working-directory: tests/integration - run: | - docker compose up --abort-on-container-exit --exit-code-from onboarding-test - - - name: Extract metrics - if: always() - run: | - # Extract onboarding time from container logs - docker logs ao-onboarding-test 2>&1 | grep "Total onboarding time" || echo "Metrics not available" - - - name: Cleanup - if: always() - working-directory: tests/integration - run: docker compose down -v - - - name: Upload test logs - if: failure() - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 - with: - name: onboarding-test-logs - path: | - tests/integration/*.log - retention-days: 7 diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml new file mode 100644 index 0000000000..b9a10b8df7 --- /dev/null +++ b/.github/workflows/prettier.yml @@ -0,0 +1,38 @@ +name: Prettier + +# Auto-formats the codebase on every push and commits the result back. +# Formatting is a CI concern — developers never need to run Prettier locally +# and formatted output never shows up as local uncommitted changes. +# +# GitHub Actions does not re-trigger workflows on commits made with GITHUB_TOKEN, +# so there is no feedback loop risk. + +on: + push: + branches-ignore: + - main + - "entire/**" + - "worktree-**" + +jobs: + format: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.ref }} + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Format with Prettier + run: npx --yes prettier@3 --write . + + - name: Commit formatted files + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git diff --quiet && exit 0 + git add -A + git commit -m "chore: format with prettier [skip ci]" + git push diff --git a/.github/workflows/react-doctor.yml b/.github/workflows/react-doctor.yml new file mode 100644 index 0000000000..7c62f244ea --- /dev/null +++ b/.github/workflows/react-doctor.yml @@ -0,0 +1,32 @@ +name: React Doctor + +on: + push: + branches: [main] + paths: + - "frontend/src/landing/**" + - ".github/workflows/react-doctor.yml" + pull_request: + paths: + - "frontend/src/landing/**" + - ".github/workflows/react-doctor.yml" + +permissions: + contents: read + pull-requests: write + statuses: write + +jobs: + doctor: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # Composite action: installs react-doctor itself, runs the scan against + # the landing site, posts a sticky PR summary + inline review comments, + # and publishes a commit status. Default blocking=error means only + # error-severity findings fail the job; warnings are reported but don't + # block. + - uses: millionco/react-doctor@v2 + with: + directory: frontend/src/landing diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index e318134eb2..0000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,152 +0,0 @@ -name: Release - -# Two-stage release pipeline. -# -# This public repo is responsible for version bumps, tagging, and creating -# the GitHub release. npm publishing is handled by a private cron job (AO) -# that polls GitHub releases and publishes when a new tag is ahead of the -# current npm version. -# -# No NPM_TOKEN or publisher dispatch secrets are needed in this repo. -# The only secret used is GITHUB_TOKEN (automatic). -# -# See CONTRIBUTING.md → "Release architecture" for the full picture. - -on: - workflow_run: - # Depends on the workflow named "CI" in .github/workflows/ci.yml — if - # you rename that file or change its `name:` field, update this string - # too. GitHub matches by name (not filename) and silently no-ops on - # mismatch — so a rename here will mean releases never trigger again. - workflows: [CI] - types: [completed] - branches: [main] - -concurrency: - group: release - cancel-in-progress: false - -permissions: - contents: write - pull-requests: write - -jobs: - release: - name: Release - runs-on: ubuntu-latest - if: >- - github.event.workflow_run.conclusion == 'success' && - github.event.workflow_run.event == 'push' && - github.event.workflow_run.head_branch == 'main' - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - ref: ${{ github.event.workflow_run.head_sha }} - fetch-depth: 0 - - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - # No `registry-url`: this workflow does not publish to npm. - - run: echo "HUSKY=0" >> $GITHUB_ENV - - run: pnpm install --frozen-lockfile - - run: pnpm -r build - # Pre-publish guard: catches the case where a publishable package has a - # workspace:* runtime dep on a `private: true` package — pnpm would - # rewrite the dep on publish to a version that doesn't exist on npm, - # breaking `npm install -g @aoagents/ao`. Still runs here so the - # public repo blocks a malformed version bump before it reaches npm. - - run: node scripts/check-publishable-deps.mjs - - # changesets/action manages the "Version Packages" PR — when there - # are pending changesets it opens/updates the PR; when there are - # none (i.e. that PR was just merged) it would normally invoke the - # `publish:` command. We deliberately omit `publish:` so the action - # never runs `changeset publish`. npm publishing is handled by a - # private cron that detects the GitHub release. - - uses: changesets/action@63a615b9cd06ba9a3e6d13796c7fbcb080a60a0b - id: changesets - with: - version: pnpm changeset version - title: "chore: version packages" - commit: "chore: version packages" - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - # Determine release state. Each downstream step (tag push, GH - # release creation) is gated on its own piece of state so the - # workflow is idempotent and recovers cleanly on re-run after a - # partial failure. - # - # `is_release_commit` is the crucial signal: it filters out - # regular commits to main (which also have `hasChangesets == - # 'false'` but should NOT trigger a publish). We detect a - # version-bump commit by comparing the umbrella package's - # version against its value in the parent commit — a Version - # Packages merge changes that version, a regular commit does not. - # The umbrella `@aoagents/ao` is the canonical version source for - # the `vX.Y.Z` tag scheme and is part of the linked group, so - # any release that bumps the cohort will bump this file. - - name: Determine release state - id: state - if: steps.changesets.outputs.hasChangesets == 'false' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - version=$(node -p "require('./packages/ao/package.json').version") - echo "version=$version" >> "$GITHUB_OUTPUT" - - prev_version="" - if git rev-parse HEAD^ >/dev/null 2>&1; then - prev_version=$(git show HEAD^:packages/ao/package.json 2>/dev/null | jq -r .version 2>/dev/null || echo "") - fi - if [ -n "$prev_version" ] && [ "$prev_version" != "$version" ]; then - echo "is_release_commit=true" >> "$GITHUB_OUTPUT" - else - echo "is_release_commit=false" >> "$GITHUB_OUTPUT" - fi - - if git ls-remote --tags --exit-code origin "refs/tags/v$version" >/dev/null 2>&1; then - echo "tag_on_remote=true" >> "$GITHUB_OUTPUT" - else - echo "tag_on_remote=false" >> "$GITHUB_OUTPUT" - fi - - if gh release view "v$version" --json tagName >/dev/null 2>&1; then - echo "release_exists=true" >> "$GITHUB_OUTPUT" - else - echo "release_exists=false" >> "$GITHUB_OUTPUT" - fi - - # Push the umbrella `vX.Y.Z` tag only if this is a fresh - # version-bump commit AND the tag isn't already on the remote. - # Skipped on re-runs where the tag was pushed on a prior run - # (idempotent recovery). - # - # We deliberately skip `pnpm changeset tag`: it would create one - # tag per publishable package (~27 here) on every release, which - # creates partial-recovery conflicts on re-run when `git push --tags` - # tries to re-push existing per-package tags. The npm publisher - # only consumes the umbrella tag, so the per-package tags add no - # value. - - name: Tag versioned packages - if: steps.state.outputs.is_release_commit == 'true' && steps.state.outputs.tag_on_remote == 'false' - run: | - git tag "v${{ steps.state.outputs.version }}" - git push origin "v${{ steps.state.outputs.version }}" - - # Public-facing GitHub release. Stable channel — not a prerelease. - # No `--target`: the `vX.Y.Z` tag is on the remote at the - # version-bump commit, and `gh release create` resolves the - # commitish from the existing tag. Avoids the race where another - # commit lands on main between the tag push and this step, - # pulling unrelated commits into the auto-generated release notes. - # Skipped if a release for this version already exists. - - name: Create GitHub release - if: steps.state.outputs.is_release_commit == 'true' && steps.state.outputs.release_exists == 'false' - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh release create "v${{ steps.state.outputs.version }}" \ - --generate-notes diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml deleted file mode 100644 index 8fdc736615..0000000000 --- a/.github/workflows/security.yml +++ /dev/null @@ -1,115 +0,0 @@ -name: Security - -on: - push: - branches: [main] - pull_request: - branches: [main] - schedule: - # Run weekly to catch new vulnerabilities - - cron: "0 8 * * 1" - workflow_dispatch: # Allow manual triggering - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - gitleaks: - name: Scan for Secrets - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - with: - fetch-depth: 0 # Full history to ensure base/head SHAs are available for PR scans - - - name: Install Gitleaks - run: | - set -euo pipefail - GITLEAKS_VERSION="8.24.3" - GITLEAKS_BASE_URL="https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}" - RUNNER_ARCH="$(uname -m)" - - case "${RUNNER_ARCH}" in - x86_64|amd64) - GITLEAKS_ARCH="x64" - ;; - aarch64|arm64) - GITLEAKS_ARCH="arm64" - ;; - *) - echo "Unsupported runner architecture: ${RUNNER_ARCH}" >&2 - exit 1 - ;; - esac - - ARCHIVE_NAME="gitleaks_${GITLEAKS_VERSION}_linux_${GITLEAKS_ARCH}.tar.gz" - - # Download gitleaks archive using the release filename so checksum verification works - curl -sSfL "${GITLEAKS_BASE_URL}/${ARCHIVE_NAME}" -o "${ARCHIVE_NAME}" - - # Download the combined checksums file - curl -sSfL "${GITLEAKS_BASE_URL}/gitleaks_${GITLEAKS_VERSION}_checksums.txt" -o gitleaks_checksums.txt - - # Verify checksum (sha256sum -c expects the filename in the checksums file to match the local file) - grep "${ARCHIVE_NAME}" gitleaks_checksums.txt | sha256sum -c - - - # Extract the verified binary to a user-writable directory and add it to PATH - INSTALL_DIR="${RUNNER_TEMP}/gitleaks-bin" - mkdir -p "${INSTALL_DIR}" - tar -xzf "${ARCHIVE_NAME}" -C "${INSTALL_DIR}" gitleaks - echo "${INSTALL_DIR}" >> "${GITHUB_PATH}" - - # Clean up - rm -f "${ARCHIVE_NAME}" gitleaks_checksums.txt - - - name: Run Gitleaks - run: | - if [ "${{ github.event_name }}" = "pull_request" ]; then - # Ensure the base and head SHAs exist locally when checkout uses the PR merge ref - git fetch origin ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} --depth=1 - gitleaks detect --source . --verbose --log-opts "${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}" - else - gitleaks detect --source . --verbose --log-opts "-n 10" - fi - - dependency-review: - name: Dependency Review - runs-on: ubuntu-latest - if: github.event_name == 'pull_request' - steps: - - name: Checkout code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - - name: Dependency Review - uses: actions/dependency-review-action@56339e523c0409420f6c2c9a2f4292bbb3c07dd3 - with: - fail-on-severity: moderate - - npm-audit: - name: NPM Audit - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - - name: Setup pnpm - uses: pnpm/action-setup@7088e561eb65bb68695d245aa206f005ef30921d - - - name: Setup Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 - with: - node-version: 20 - cache: pnpm - - - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Run npm audit - run: pnpm audit --audit-level=moderate - continue-on-error: true # Don't fail build on vulnerabilities in deps - - - name: Run pnpm audit (strict) - run: pnpm audit --prod --audit-level=high - continue-on-error: true # npm's legacy audit endpoint returns 410 Gone — non-blocking until pnpm upgrades to bulk advisory API diff --git a/.github/workflows/testing-build.yml b/.github/workflows/testing-build.yml new file mode 100644 index 0000000000..03a9223a39 --- /dev/null +++ b/.github/workflows/testing-build.yml @@ -0,0 +1,118 @@ +name: Testing build (all platforms) + +# Unsigned testing builds for Linux + Windows + macOS in one matrix. Click +# "Run workflow" in the Actions tab, or push a 0.0.0-testing-* tag. All three jobs +# publish to a single 0.0.0-testing- prerelease (distinct asset names). +# +# Per OS: +# Linux -> .deb +# Windows -> NSIS installer (.exe) +# macOS -> .zip (arm64; dmg + signing are follow-ups) +# +# Unsigned: macOS is quarantined/Gatekeeper-blocked once downloaded +# (xattr -dr com.apple.quarantine "Agent Orchestrator.app"); Windows SmartScreen +# warns ("More info" -> "Run anyway"). Each OS builds on its own native runner so +# build-daemon.mjs compiles the bundled ao for that platform (host == target). + +on: + workflow_dispatch: + push: + tags: + - "0.0.0-testing-*" + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: "@electron-forge/maker-deb" + - os: windows-latest + # Our custom NSIS maker's `name` (see makers/maker-nsis.ts); forge + # `--targets` matches the configured maker instance by this name. + target: "nsis" + - os: macos-latest + target: "@electron-forge/maker-zip" + runs-on: ${{ matrix.os }} + permissions: + contents: write + defaults: + run: + working-directory: frontend + env: + # Pure-Go sqlite (modernc) needs no cgo; on Linux this also keeps the daemon + # static and portable across glibc. + CGO_ENABLED: 0 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + cache-dependency-path: frontend/package-lock.json + - uses: actions/setup-go@v5 + with: + go-version-file: backend/go.mod + cache-dependency-path: backend/go.sum + - run: npm ci + - name: Build (unsigned) + # `npm run make` keeps the premake daemon build; --targets restricts to this + # platform's maker. + run: npm run make -- --targets ${{ matrix.target }} + # Smoke-install the NSIS installer on a clean, native x64 Windows runner. + # This is a build-vs-host verdict: if it installs here it proves the artifact + # is good and a failing user machine is host-side (AV/disk/signing); if it + # fails here the build/NSIS config is wrong. continue-on-error so a failed + # install never blocks publishing the artifacts. The runner has no real-time + # AV blocking, so a clean install here does NOT prove SmartScreen/Defender + # won't reject the unsigned binaries on end-user machines. + - name: Smoke-install the Windows installer + if: runner.os == 'Windows' + continue-on-error: true + timeout-minutes: 5 + shell: pwsh + run: | + $setup = Get-ChildItem -Path out/make -Recurse -Filter '*.exe' | + Where-Object { $_.Name -like '*Setup*' } | Select-Object -First 1 + if (-not $setup) { $setup = Get-ChildItem -Path out/make -Recurse -Filter '*.exe' | Select-Object -First 1 } + if (-not $setup) { Write-Host '::error::no NSIS installer (.exe) produced under out/make'; exit 1 } + Write-Host "Running $($setup.FullName) /S (silent)" + # electron-builder NSIS (assisted installer): /S installs silently. + $proc = Start-Process -FilePath $setup.FullName -ArgumentList '/S' -PassThru -Wait + Write-Host "Installer exit code: $($proc.ExitCode)" + # Per-user assisted install lands under %LOCALAPPDATA%\Programs; a + # per-machine install would land under Program Files. + $installDir = @( + (Join-Path $env:LOCALAPPDATA 'Programs\Agent Orchestrator'), + (Join-Path ${env:ProgramFiles} 'Agent Orchestrator') + ) | Where-Object { Test-Path $_ } | Select-Object -First 1 + if ($installDir) { + Write-Host "INSTALL OK: $installDir created" + Get-ChildItem $installDir | Select-Object Name | Format-Table -AutoSize + } else { + Write-Host "::warning::INSTALL: no known install dir found (checked LOCALAPPDATA\Programs and Program Files)" + } + - name: Publish to a 0.0.0-testing- prerelease + shell: bash + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + # Tag push: use the pushed tag. Manual run: mint 0.0.0-testing-. + if [ "${GITHUB_REF_TYPE}" = "tag" ]; then + TAG="${GITHUB_REF_NAME}" + else + TAG="0.0.0-testing-${GITHUB_SHA::7}" + fi + # Matrix jobs race here; first one creates the release, the rest hit + # "already exists" which is fine (|| true). Distinct asset names + --clobber + # make uploads idempotent across re-runs. + gh release create "$TAG" --prerelease --target "$GITHUB_SHA" --title "$TAG" \ + --notes "Unsigned testing build (Linux .deb / Windows NSIS .exe / macOS .zip). Not signed; for testing only." \ + || true + # NUL-delimited to survive spaces in the app name ("Agent Orchestrator-..."). + find out/make -type f -print0 | while IFS= read -r -d '' f; do + echo "uploading: $f" + gh release upload "$TAG" "$f" --clobber + done diff --git a/.gitignore b/.gitignore index 44e7f9935f..596f24d8b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,71 +1,64 @@ +# Node / Electron node_modules/ +.pnpm/ dist/ -.pnpm-store/ -.next/ -.next-dev/ -*.tsbuildinfo -coverage/ -*.patch -*-context.md -packages/web/screenshots/ -.playwright-cli/ +out/ +build/ +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* -# Environment files (secrets) -.env -.env.local -.env.*.local -.env.production.local -.env.development.local -.env.test.local - -# Credentials and secrets -*.key -*.pem -*.p12 -*.pfx -*.cer -*.crt -*.der -*.csr -secrets.yaml -secrets.yml -credentials.json -credentials.yaml -*-credentials.* -.secrets/ -.credentials/ - -# API keys and tokens -.token -.api-key -*-token.txt -*-api-key.txt +# Go +.go/ +bin/ +*.test +*.out +vendor/ +# compiled daemon binary +/backend/backend +agent-orchestrator.yaml -# Cloud provider credentials -.aws/ -.gcloud/ -.azure/ +# Backend runtime data artifacts (SQLite store + WAL, CDC event log). +# Created at AO_DATA_DIR (outside the repo by default); ignored here so a +# data dir pointed at the tree never gets committed. +*.db +*.db-shm +*.db-wal +session-events.jsonl +session-events.jsonl.* -# SSH keys -id_rsa -id_dsa -id_ecdsa -id_ed25519 -*.ppk +# Agent Orchestrator local session state +.ao/ -# Development symlinks (created per-worktree, not committed) -.claude -packages/web/agent-orchestrator.yaml +# AO reviewer scratch output. The reviewer agent runs inside the worker's +# worktree; its review writeup must never be committed onto the worker branch. +/review.md -# Local agent orchestrator config (may contain secrets) -agent-orchestrator.yaml +# Environment +.direnv/ +.env +.env.* +!.env.example -# Agent configuration and activity logs -.claude/ -.opencode/ -.ao/ +# Editor / IDE +.vscode/ +.idea/ +*.swp +*~ -# OS-specific files +# OS .DS_Store Thumbs.db -package-lock.json + +# Personal local overrides (not for the team) +.envrc.local + +# electron-forge / vite build output +.vite/ +dist-electron/ +# electron-builder debug dump, written to the cwd on every NSIS build +builder-debug.yml + +# playwright artifacts +frontend/test-results/ diff --git a/.gitleaks.toml b/.gitleaks.toml deleted file mode 100644 index 2c0b755818..0000000000 --- a/.gitleaks.toml +++ /dev/null @@ -1,31 +0,0 @@ -# Gitleaks configuration for Agent Orchestrator -# Prevents accidental commits of secrets, API keys, tokens, etc. - -title = "Agent Orchestrator Secret Scanning" - -# Use all default gitleaks rules -[extend] -useDefault = true - -# Allowlist to ignore false positives -[allowlist] -description = "Allowlisted patterns" - -paths = [ - "node_modules/", - "dist/", - ".next/", - "coverage/", - "pnpm-lock.yaml", -] - -regexes = [ - # Environment variable references - "\\$\\{[A-Z_]+\\}", - - # Placeholder values - "your-api-key-here", - "your-token-here", - "example\\.com", - -] diff --git a/.husky/pre-commit b/.husky/pre-commit deleted file mode 100755 index 4ff4faf366..0000000000 --- a/.husky/pre-commit +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh - -# Gitleaks pre-commit hook -# Scans staged files for secrets before allowing commit - -echo "🔒 Scanning staged files for secrets..." - -if ! command -v gitleaks > /dev/null 2>&1; then - echo "" - echo "❌ gitleaks is not installed!" - echo "" - echo "Install gitleaks to enable secret scanning:" - echo " macOS: brew install gitleaks" - echo " Linux: See https://github.com/gitleaks/gitleaks#installing" - echo "" - echo "Secret scanning is REQUIRED to prevent credential leaks." - echo "Commit blocked until gitleaks is installed." - echo "" - exit 1 -fi - -# Run gitleaks on staged files only -gitleaks protect --staged --verbose - -if [ $? -ne 0 ]; then - echo "" - echo "❌ Secret(s) detected in staged files!" - echo "" - echo "To fix:" - echo " 1. Remove the secret from the file" - echo " 2. Use environment variables instead: \${SECRET_NAME}" - echo " 3. Add to .env.local (which is in .gitignore)" - echo " 4. Update agent-orchestrator.yaml.example with placeholder values" - echo "" - echo "If this is a false positive, update .gitleaks.toml allowlist" - echo "" - exit 1 -fi - -echo "✅ No secrets detected" diff --git a/.issue-assets/1145-image.png b/.issue-assets/1145-image.png deleted file mode 100644 index a1fd366b29..0000000000 Binary files a/.issue-assets/1145-image.png and /dev/null differ diff --git a/.issue-assets/1736-notifier-logging.png b/.issue-assets/1736-notifier-logging.png deleted file mode 100644 index e6c09b5b19..0000000000 Binary files a/.issue-assets/1736-notifier-logging.png and /dev/null differ diff --git a/.npmrc b/.npmrc deleted file mode 100644 index 94a06c2180..0000000000 --- a/.npmrc +++ /dev/null @@ -1 +0,0 @@ -access=public diff --git a/.prettierignore b/.prettierignore index 4b4adffa85..4be88241fe 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,6 +1,17 @@ -dist/ -node_modules/ -.next/ -coverage/ -*.tsbuildinfo -pnpm-lock.yaml +# Generated — never hand-edit; regenerated by `npm run api` / sqlc / openapi-typescript +frontend/src/api/schema.ts +backend/internal/httpd/apispec/openapi.yaml + +# Build outputs +frontend/dist +frontend/dist-electron +frontend/release +frontend/test-results +frontend/playwright-report + +# Lockfiles +package-lock.json +frontend/package-lock.json + +# Go uses gofmt, not Prettier +backend/ diff --git a/.prettierrc b/.prettierrc index 40a190018d..2b07565cec 100644 --- a/.prettierrc +++ b/.prettierrc @@ -1,9 +1,9 @@ { - "semi": true, - "singleQuote": false, - "trailingComma": "all", - "tabWidth": 2, - "printWidth": 100, - "bracketSpacing": true, - "arrowParens": "always" + "useTabs": true, + "tabWidth": 2, + "printWidth": 120, + "singleQuote": false, + "trailingComma": "all", + "semi": true, + "arrowParens": "always" } diff --git a/AGENTS.md b/AGENTS.md index 7b4c071254..82cb2b0f2a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,79 +1,129 @@ # AGENTS.md -> Full project context, architecture, conventions, and plugin standards are in **CLAUDE.md**. +Operational guidance for coding agents working in this repository. Keep changes small, match the current rewrite architecture, and prefer the documented daemon/API boundaries over behavior from the old TypeScript implementation. + +## Repo layout + +- `backend/` — Go rewrite of Agent Orchestrator: Cobra `ao` CLI, loopback HTTP daemon, services, SQLite storage, lifecycle/reaper, runtime/workspace/agent/tracker adapters, terminal mux, and tests. +- `frontend/` — Electron + React supervisor wired to the daemon via the generated typed client. Treat it as a thin supervisor/UI surface; do not move daemon logic into it. +- `docs/` — current architecture/status notes. Start here before changing lifecycle, CLI, agents, storage, or daemon behavior. +- `test/` — external smoke/e2e assets, including the CLI fresh-install container check. +- `.github/workflows/` — CI definitions. Mirror these commands locally when possible. ## Commands +From the repo root unless noted: + +```bash +npm run lint # backend go test ./... + golangci-lint v2.12.2 +npm run frontend:typecheck # frontend TypeScript check +npm run sqlc # regenerate backend/internal/storage/sqlite/gen from queries/schema +npm run api # regenerate OpenAPI spec + frontend TS types (see API contract changes below) +npx @redwoodjs/agent-ci run --all # local workflow validation; requires Docker socket +``` + +Backend-specific checks: + +```bash +cd backend +go build ./... +go test ./... +go test -race ./... +go vet ./... +go run ./cmd/ao start +``` + +Frontend-specific checks: + ```bash -pnpm install # Install dependencies -pnpm build # Build all packages -pnpm dev # Web dashboard dev server (Next.js + 2 WS servers) -pnpm typecheck # Type check all packages -pnpm test # All tests (excludes web) -pnpm --filter @aoagents/ao-web test # Web tests -pnpm lint # ESLint check -pnpm lint:fix # ESLint fix -pnpm format # Prettier format +cd frontend +npm run typecheck +npm run build ``` -## Architecture TL;DR +When showing or demoing frontend changes, run `ao preview [url]` from inside the session so the change renders in the desktop browser panel (the inspector rail's Browser tab); do not just describe it. -Monorepo (pnpm) with packages: `core`, `cli`, `web`, and `plugins/*`. The web dashboard is a Next.js 15 app (App Router) with React 19 and Tailwind CSS v4. Data flows from `agent-orchestrator.yaml` through core's `loadConfig()` to API routes, served via SSR and a 5s-interval SSE stream. Terminal sessions use WebSocket connections to tmux PTYs. See CLAUDE.md for the full plugin architecture (8 slots), session lifecycle, and data flow. +## Where to look first -## Working Principles +- `README.md` — current run/config/test quickstart. +- `docs/README.md` — docs index. +- `docs/architecture.md` — backend mental model, package layout, lifecycle/session/service boundaries, and load-bearing rules. +- `docs/STATUS.md` — what is shipped on `main` today and what is still in flight. +- `docs/cli/README.md` — intended CLI shape: thin Cobra client over daemon HTTP, never direct storage/runtime access. +- `docs/agent/README.md` — agent adapter contract and hook behavior. +- `CLAUDE.md` — compatibility pointer for Claude Code; it directs agents back to `AGENTS.md`. -- **Think before coding.** State assumptions. Ask when unclear. Push back when a simpler approach exists. -- **Simplicity first.** No speculative features. No abstractions for single-use code. Plugin slots are the extension point. -- **Surgical changes.** Touch only what you must. Match existing style. Don't refactor things that aren't broken. Every changed line traces to the task. -- **Goal-driven.** Define verifiable success criteria before implementing. Write tests that reproduce bugs before fixing them. +For code entry points: -Full guidelines with AO-specific context: see "Working Principles" in CLAUDE.md. +- CLI commands: `backend/internal/cli/*.go`; follow nearby command/test patterns before adding a new style. +- HTTP controllers and DTOs: `backend/internal/httpd/controllers/`. +- Service read/write boundaries: `backend/internal/service/`. +- Domain vocabulary: `backend/internal/domain/`. +- Port contracts: `backend/internal/ports/`. +- SQLite queries/migrations/store: `backend/internal/storage/sqlite/`. +- Generated sqlc code: `backend/internal/storage/sqlite/gen/`. -## Skills +## Coding conventions -Agents working on this repo should use these checked-in skills: +- Keep every change surgical and directly tied to the task. Avoid drive-by cleanup, broad renames, formatting churn, speculative abstractions, and architectural refactors unless the task explicitly asks for them. +- Follow existing Go package boundaries. CLI code should call daemon HTTP routes through shared CLI client helpers; it should not open SQLite, spawn runtimes, or call adapters directly. +- Keep Cobra commands in the relevant command file and table-test them in the style of `backend/internal/cli/*_test.go`. +- Mirror existing response/request DTOs in the CLI instead of importing HTTP controller packages into CLI code, unless the package already establishes that dependency. +- Return usage errors as `usageError` so CLI misuse exits 2; runtime/daemon failures should exit 1. +- Preserve API error envelopes and request IDs when surfacing daemon errors. +- Use `context.Context` as the first argument for functions that do I/O or blocking work. +- Do not add abstractions for one-off use cases. Add helpers only when they remove duplication across real call sites. +- Tests should cover the user-visible behavior and boundary being changed: happy path, validation/missing args, daemon error envelopes, and any destructive confirmation path. -### Bug Triage (`skills/bug-triage/`) +## Hard rules and boundaries -**When to use:** Any time a bug is reported — in chat, issues, or live observation. +- The daemon is a loopback-only sidecar. Do not make the bind host configurable or expose it beyond `127.0.0.1`. +- The CLI is a thin client. Do not port old in-process TypeScript CLI behavior that bypasses daemon HTTP routes. +- Do not store derived/display session status. Status is derived from durable facts (`activity_state`, `is_terminated`, PR/check/comment facts) at service read time. +- Do not treat failed/unknown runtime probes as proof a session is dead. +- Do not force-delete dirty registered worktrees. +- Do not modify already-merged SQLite migrations. Add a new migration instead. +- Do not hand-edit `backend/internal/storage/sqlite/gen/*`; change `backend/internal/storage/sqlite/queries/*` or migrations and run `npm run sqlc`. +- SQLite change events come from DB triggers into `change_log`; do not add parallel manual CDC emission from store methods unless the architecture changes explicitly. +- Keep generated OpenAPI/API DTO drift in mind: controller response shapes live in `backend/internal/httpd/controllers/dto.go` and tests may assert CLI/HTTP wire compatibility. +- Do not add network calls to tests unless the package already has an integration/e2e pattern for them. Prefer `httptest`, fakes, and injected dependencies. +- Do not commit local run state, daemon data, temporary worktrees, build outputs, or credentials. +- All app state lives under `~/.ao` only. The daemon's data dir, `running.json`, worktrees, and the Electron supervisor's `userData` (Chromium cache, cookies, local/session storage, crash dumps) must resolve under `~/.ao` (overridable via `AO_DATA_DIR`/`AO_RUN_FILE`). Never write to or read from `~/Library/Application Support` or any other OS default app-data location. `main.ts` pins Electron's `userData` to `~/.ao/electron`; do not remove that override or rely on Electron's default path. -**What it covers:** -- Full triage workflow: gather context → search duplicates → file/update GitHub issues → push fix PRs -- Root cause analysis with `git log -S` archaeology and upstream dependency research -- GitHub API-based file editing (no local checkout needed) via `scripts/push_fix_to_github.py` -- NPM package regression diffing -- Remote code inspection when the repo isn't cloned locally +## API contract changes -**How to load:** Read `skills/bug-triage/SKILL.md` and follow its step-by-step workflow. The `scripts/` directory contains executable tools: -- `push_fix_to_github.py` — Push a single-file fix and create a PR entirely via GitHub API +The daemon API is code-first. The OpenAPI spec and frontend TypeScript types are generated artifacts — edit the source, then regenerate. -**Always pull latest main before triaging.** Stale code = bad triage. No exceptions. +**Source files to edit:** -## Key Files +- `backend/internal/httpd/controllers/dto.go` — request/response shapes. +- `backend/internal/httpd/apispec/specgen/build.go` — operation registry; add a `schemaNames` entry for any new named type. -- `packages/core/src/types.ts` — All plugin interfaces (Agent, Runtime, Workspace, etc.) -- `packages/core/src/session-manager.ts` — Session CRUD + stale runtime reconciliation (detects dead runtimes, persists `runtime_lost`) -- `packages/core/src/lifecycle-manager.ts` — State machine + polling loop -- `packages/core/src/lifecycle-state.ts` — Canonical lifecycle → legacy status mapping (`deriveLegacyStatus`) -- `packages/cli/src/commands/start.ts` — ao start/stop commands + Ctrl+C graceful shutdown -- `packages/cli/src/lib/running-state.ts` — RunningState + LastStopState management -- `packages/web/src/components/Dashboard.tsx` — Main dashboard view (sidebar uses unscoped sessions, kanban filters by project) -- `packages/web/src/components/SessionDetail.tsx` — Session detail view -- `packages/web/src/app/globals.css` — Design tokens +**Regenerate after editing:** -## CLI Behavior Notes +```bash +npm run api # runs api:spec then api:ts in sequence +``` -- `ao stop` loads global config to see all projects; `ao stop ` only kills that project's sessions -- Ctrl+C on `ao start` performs full graceful shutdown (same as `ao stop`) -- `LastStopState` includes `otherProjects` for cross-project session restore on next `ao start` -- Dashboard sidebar always shows ALL projects' sessions regardless of active project view +This is equivalent to running: -## Cross-Platform (Windows) Compatibility +```bash +npm run api:spec # cd backend && go generate ./internal/httpd/apispec/... +npm run api:ts # npx openapi-typescript@7.4.4 backend/internal/httpd/apispec/openapi.yaml -o frontend/src/api/schema.ts +``` + +**Verify:** -AO ships on macOS, Linux, **and Windows**. All three are first-class. +```bash +cd backend && go test ./internal/httpd/... # spec drift + route/spec parity tests (does not cover schema.ts — that is checked by the api-drift CI job) +``` -**Golden Rule:** Never write `process.platform === "win32"` in new code. Use `isWindows()` from `@aoagents/ao-core`. If you need branching the helpers don't cover, add it to `packages/core/src/platform.ts` — never inline at the call site. Inline checks bypass the central platform-mock test pattern and become silent regressions. +Commit `openapi.yaml` and `frontend/src/api/schema.ts` together with the Go changes. CI will regenerate both files and fail if the committed versions are out of date. The CLI hand-mirrored DTOs remain a deliberate manual boundary and are not generated. -**Read `docs/CROSS_PLATFORM.md` before merging any change that touches:** process spawning/killing/signalling, file paths, shell commands, network binding, POSIX shell-outs (`tmux`, `lsof`, etc.), runtime/agent/workspace plugins, agent-plugin internals (`setupPathWrapperWorkspace`, `getActivityState`, `formatLaunchCommand`, `isProcessRunning`, `detect()`), the Windows pty-host pipe protocol or registry, or any new `process.platform === "win32"` check. +## PR hygiene -That doc has the **full helper inventory** (every import path), the EPERM-vs-ESRCH gotcha when probing processes, path case-insensitivity rules, PowerShell-vs-bash differences (`& ` call-operator, `$env:VAR`, no `/dev/null`, no `$(cat …)`, `.cmd` shim resolution via `shell: isWindows()`), IPv6 `localhost` stalls on Windows, agent-plugin Windows specifics, the test pattern for mocking `process.platform`, and a 10-point pre-merge checklist. CLAUDE.md has the quick-reference helper table; CROSS_PLATFORM.md has the depth. +- Branch from `main` unless explicitly continuing an existing PR. +- Keep one issue per PR. If asked for separate work, create a separate branch and PR. +- Use conventional commit messages (`feat:`, `fix:`, `docs:`, `test:`, `chore:`). +- Explain intentional omissions in the PR body, especially when the TypeScript original had more behavior than the Go rewrite domain currently supports. +- Run the narrowest relevant tests first, then the repo/CI commands that match the touched area. diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md deleted file mode 100644 index e46d014d78..0000000000 --- a/ARCHITECTURE.md +++ /dev/null @@ -1,311 +0,0 @@ -# Final Architecture Plan - -## Core Principles - -1. **Convention over configuration** - Auto-derive everything possible -2. **Single source of truth** - Config file in repo, runtime data in `~/.agent-orchestrator/` -3. **Zero path configuration** - All paths determined automatically -4. **Global uniqueness** - Hash-based namespacing prevents collisions - ---- - -## 1. Directory Structure - -``` -Repo (versioned): -~/any/path/to/agent-orchestrator/ - agent-orchestrator.yaml ← Config file (only this matters) - packages/ - ... - -Runtime Data (not versioned): -~/.agent-orchestrator/ ← Single parent directory - a3b4c5d6e7f8-integrator/ ← {hash}-{projectId} - sessions/ - int-1 ← Session metadata files (no hash prefix) - int-2 - worktrees/ - int-1/ ← Git worktrees (no hash prefix) - int-2/ - archive/ - int-3_2026-02-17T10-30-00 - .origin ← Config path reference - - a3b4c5d6e7f8-backend/ ← Same hash (same config!) - sessions/ - be-1 ← No hash prefix (already namespaced) - worktrees/ - be-1/ - .origin -``` - -**Hash Derivation (from config location):** - -```typescript -const configDir = path.dirname(configPath); // /Users/alice/code/agent-orchestrator -const hash = sha256(configDir).slice(0, 12); // a3b4c5d6e7f8 - -// Each project managed by this config gets a directory -// Format: {hash}-{projectId} -const projectId = path.basename(projectPath); // integrator, backend, etc. -const instanceId = `${hash}-${projectId}`; // a3b4c5d6e7f8-integrator - -// Not configurable! -const projectBaseDir = `~/.agent-orchestrator/${instanceId}`; -const sessionsDir = `${projectBaseDir}/sessions`; -const worktreesDir = `${projectBaseDir}/worktrees`; -``` - -**Key insight:** All projects from the same config share the same hash prefix! - ---- - -## 2. Config File (Minimal) - -```yaml -# agent-orchestrator.yaml - -projects: - - path: ~/repos/integrator # Required: where is the repo? - repo: ComposioHQ/integrator # Required: GitHub repo - defaultBranch: next # Required: base branch - - # Optional overrides: - name: Composio Integrator # Display name (default: folder name) - sessionPrefix: int # Override auto-generated prefix -``` - -**Auto-derived:** - -- Project ID: `basename(path)` → `integrator` -- Session prefix: `generatePrefix("integrator")` → `int` -- Worktree path: `{worktreeDir}/integrator/` - -**That's it! No dataDir, no worktreeDir, no explicit IDs.** - ---- - -## 3. Session Naming - -### User-Facing Names (Elegant) - -``` -{sessionPrefix}-{num} - -int-1, int-2 (integrator) -ao-1, ao-2 (agent-orchestrator) -ss-1, ss-2 (safe-split) -``` - -### Runtime Session Names (Globally Unique) - -``` -{hash}-{sessionPrefix}-{num} - -a3b4c5d6e7f8-int-1 -a3b4c5d6e7f8-ao-1 -f1e2d3c4b5a6-int-1 (different checkout, no collision!) -``` - -On Unix this is the tmux session name. On Windows (where the default runtime is `process`, not `tmux`) the same string identifies the named pipe path `\\.\pipe\ao-pty-{sessionId}` and is recorded in `~/.agent-orchestrator/windows-pty-hosts.json`. - -### Prefix Generation (Clean Heuristic) - -```typescript -function generateSessionPrefix(projectId: string): string { - if (projectId.length <= 4) return projectId.toLowerCase(); - - // CamelCase: PyTorch → pt - const uppercase = projectId.match(/[A-Z]/g); - if (uppercase?.length > 1) { - return uppercase.join("").toLowerCase(); - } - - // kebab-case: agent-orchestrator → ao - if (projectId.includes("-") || projectId.includes("_")) { - const sep = projectId.includes("-") ? "-" : "_"; - return projectId - .split(sep) - .map((w) => w[0]) - .join("") - .toLowerCase(); - } - - // Single word: integrator → int - return projectId.slice(0, 3).toLowerCase(); -} -``` - ---- - -## 4. Metadata Storage - -### File Structure (One Directory Per Project) - -``` -~/.agent-orchestrator/a3b4c5d6e7f8-integrator/ - sessions/ - int-1 ← Metadata file (user-facing session name) - int-2 - worktrees/ - int-1/ - int-2/ - archive/ - int-3_2026-02-17T10-30-00 -``` - -### Metadata File Format (key=value) - -``` -project=integrator -issue=INT-100 -branch=feat/INT-100 -status=working -tmuxName=a3b4c5d6e7f8-int-1 # Unix; on Windows the runtime handle is `pipePath=\\.\pipe\ao-pty-` plus `ptyHostPid` -worktree=/Users/alice/.agent-orchestrator/a3b4c5d6e7f8-integrator/worktrees/int-1 -createdAt=2026-02-17T10:30:00Z -pr=https://github.com/ComposioHQ/integrator/pull/123 -``` - -**Key fields:** - -- `project` - Which project this session belongs to (for filtering) -- `issue` - Linear/GitHub issue ID -- `branch` - Git branch name -- `worktree` - Path to git worktree -- `status` - working/idle/pr_open/merged - ---- - -## 5. User Commands (Simple) - -```bash -# List all sessions -ao list - -# List sessions for specific project -ao list integrator - -# Spawn new session -ao spawn integrator INT-100 - -# Attach to session (orchestrator finds the runtime handle: tmux name on Unix, named pipe on Windows) -ao attach int-1 - -# Kill session -ao kill int-1 - -# Show instance info -ao info -``` - -**No config paths in commands! Everything auto-discovered.** - ---- - -## 6. Multi-Instance Support - -### Same Config → Same Hash - -```yaml -# ~/code/my-orchestrator/agent-orchestrator.yaml -projects: - - path: ~/repos/integrator - - path: ~/repos/backend -``` - -Results in: - -``` -~/.agent-orchestrator/ - a3b4c5d6e7f8-integrator/ ← Same hash (same config) - a3b4c5d6e7f8-backend/ ← Same hash (same config) -``` - -### Different Config Locations → Different Hashes - -``` -~/code/orchestrator/ → hash: a3b4c5d6e7f8 -~/code/orchestrator-v2/ → hash: f1e2d3c4b5a6 -~/splitly-orchestrator/ → hash: 9876abcd5432 -``` - -Results in: - -``` -~/.agent-orchestrator/ - a3b4c5d6e7f8-integrator/ ← From ~/code/orchestrator - f1e2d3c4b5a6-integrator/ ← From ~/code/orchestrator-v2 (different checkout!) - 9876abcd5432-safesplit/ ← From ~/splitly-orchestrator - -# Sessions (no collisions): -a3b4c5d6e7f8-int-1 (main checkout) -f1e2d3c4b5a6-int-1 (v2 checkout) -9876abcd5432-ss-1 (splitly) -``` - -**Each orchestrator checkout gets unique hash. Projects within same config share that hash.** - ---- - -## 7. Complete Example - -```yaml -# ~/code/my-orchestrator/agent-orchestrator.yaml -projects: - - path: ~/repos/integrator - repo: ComposioHQ/integrator - defaultBranch: next - - - path: ~/repos/backend - repo: ComposioHQ/backend - defaultBranch: main - sessionPrefix: be # Override auto-generated "bac" -``` - -**Results in:** - -``` -Config location: - ~/code/my-orchestrator/ - → Hash: a3b4c5d6e7f8 - -Runtime data: - ~/.agent-orchestrator/ - a3b4c5d6e7f8-integrator/ ← Project 1 - sessions/ - int-1 - worktrees/ - int-1/ - - a3b4c5d6e7f8-backend/ ← Project 2 (same hash!) - sessions/ - be-1 - worktrees/ - be-1/ - -Session names: - User-facing: int-1, be-1 - Tmux: a3b4c5d6e7f8-int-1, a3b4c5d6e7f8-be-1 - -Commands: - ao spawn integrator INT-100 - ao attach int-1 -``` - ---- - -## Summary: What Users Configure - -**Required (3 fields per project):** - -1. `path` - Where is the repo? -2. `repo` - GitHub owner/repo -3. `defaultBranch` - Base branch name - -**Optional:** - -- `sessionPrefix` - Override auto-generated prefix -- `name` - Display name - -**That's it! Everything else is automatic.** diff --git a/CLAUDE.md b/CLAUDE.md index 69dc6b580a..7b33550346 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,644 +1,30 @@ # CLAUDE.md -## What is this project? - -Agent Orchestrator (AO) is a platform for spawning and managing parallel AI coding agents across distributed systems. It runs multiple agents (Claude Code, Codex, Aider, OpenCode) simultaneously — each in an isolated git worktree with its own PR — and provides a single dashboard to supervise them all. Agents autonomously fix CI failures, address review comments, and manage PRs. - -**Org:** ComposioHQ -**Repo:** `github.com/ComposioHQ/agent-orchestrator` -**License:** MIT - -## Monorepo Structure - -pnpm workspace (v9.15.4) with ~30 packages: - -``` -packages/ - core/ # Engine: types, config, session manager, lifecycle, plugin registry - cli/ # CLI tool (`ao` command) — depends on all plugins - web/ # Next.js 15 dashboard (App Router, React 19, Tailwind v4) - ao/ # Global CLI wrapper (thin shim around cli) - plugins/ - agent-claude-code/ agent-aider/ agent-codex/ agent-opencode/ - runtime-tmux/ runtime-process/ - workspace-worktree/ workspace-clone/ - tracker-github/ tracker-linear/ tracker-gitlab/ - scm-github/ scm-gitlab/ - notifier-desktop/ notifier-slack/ notifier-webhook/ - notifier-composio/ notifier-openclaw/ - terminal-iterm2/ terminal-web/ - integration-tests/ # E2E tests -``` - -**Build order:** core -> plugins -> cli/web (parallel). `pnpm build` at root handles this. - -## Tech Stack - -| Layer | Stack | -|-------|-------| -| Language | TypeScript (strict mode, ES2022, Node16 modules) | -| Runtime | Node.js 20+ | -| Package Manager | pnpm 9.15.4 (`workspace:*` protocol) | -| Web | Next.js 15 (App Router) + React 19 | -| Styling | Tailwind CSS v4 + CSS custom properties (`@theme` block in `globals.css`) | -| Terminal UI | xterm.js 5.3.0 + WebSocket to tmux PTYs | -| Validation | Zod | -| Testing | Vitest + @testing-library/react | -| Linting | ESLint 10 (flat config) + Prettier 3.8 | -| CI/CD | GitHub Actions (lint, typecheck, test, release) | -| Versioning | Changesets | -| Git hooks | Husky + gitleaks (secret scanning) | -| Container | OCI via Containerfile (Podman/Docker) | - -## Commands - -```bash -# Install & build -pnpm install -pnpm build - -# Development -pnpm dev # Web dashboard (Next.js + 2 WS servers) - -# Type checking -pnpm typecheck # All packages -pnpm --filter @aoagents/ao-web typecheck # Web only - -# Testing -pnpm test # All packages (excludes web) -pnpm --filter @aoagents/ao-web test # Web tests -pnpm --filter @aoagents/ao-web test:watch # Web watch mode -pnpm test:integration # Integration tests - -# Lint & format -pnpm lint -pnpm lint:fix -pnpm format -pnpm format:check -``` - -## Architecture - -### Plugin System (8 Slots) - -Every abstraction is a pluggable interface defined in `packages/core/src/types.ts`: - -| Slot | Default | Purpose | -|------|---------|---------| -| Runtime | tmux | Where agents execute | -| Agent | claude-code | Which AI tool to use | -| Workspace | worktree | Code isolation (worktree vs clone) | -| Tracker | github | Issue tracking (GitHub, Linear, GitLab) | -| SCM | github | PR, CI, reviews | -| Notifier | desktop | Notification delivery | -| Terminal | iterm2 | Human attachment UI | -| Lifecycle | core (non-pluggable) | State machine + polling | - -### Session Lifecycle - -Sessions have a **canonical lifecycle** (in `lifecycle-state.ts`) with separate `state` and `reason` fields, and a **legacy status** derived from them for display. - -**Canonical session states:** `not_started`, `working`, `idle`, `needs_input`, `stuck`, `detecting`, `done`, `terminated` - -**Terminal reasons:** `manually_killed`, `runtime_lost`, `agent_process_exited`, `probe_failure`, `error_in_process`, `auto_cleanup`, `pr_merged` - -**Legacy status flow (derived via `deriveLegacyStatus`):** -``` -spawning -> working -> pr_open -> ci_failed / review_pending - | | - changes_requested approved - | | - +-> mergeable -> merged -> cleanup -> done -``` - -**Stale runtime reconciliation:** `sm.list()` detects dead runtimes (tmux/process gone) during enrichment and persists `detecting` state with `runtime_lost` reason to disk. The lifecycle manager's `resolveProbeDecision` pipeline is the single authority on terminal decisions — `sm.list()` never writes `terminated` directly (#1735). - -### Data Flow - -``` -agent-orchestrator.yaml -> Config Loader (Zod) -> Plugin Registry - -> Session Manager -> Lifecycle Manager (polling loop, state machine) - -> Events -> Notifiers - -> Web API Routes (Next.js) -> SSE (5s interval) + WebSocket (terminal) - -> Dashboard (React + xterm.js) -``` - -### Storage - -No database. Flat files + memory: - -- **Config:** `agent-orchestrator.yaml` (Zod-validated) -- **Global config:** `~/.agent-orchestrator/config.yaml` (all registered projects) -- **Session metadata:** `~/.agent-orchestrator/{hash}-{projectId}/sessions/{sessionId}` (key-value pairs) -- **Worktrees:** `~/.agent-orchestrator/{hash}-{projectId}/worktrees/{sessionId}/` -- **Archives:** `~/.agent-orchestrator/{hash}-{projectId}/archive/{sessionId}_{timestamp}` -- **Running state:** `~/.agent-orchestrator/running.json` (current ao start PID, port, projects) -- **Last-stop state:** `~/.agent-orchestrator/last-stop.json` (sessions killed by ao stop / Ctrl+C, includes `otherProjects` for cross-project sessions — used by ao start to offer session restore) - -Hash = SHA-256 of config directory (first 12 chars). Prevents collision across multiple checkouts. - -**Config resolution:** `loadConfig()` searches up from cwd and finds the nearest `agent-orchestrator.yaml` (typically 1 project). The global config at `~/.agent-orchestrator/config.yaml` contains all registered projects. CLI commands that need cross-project visibility (ao stop, tab completions) fall back to the global config. - -### Prompt Assembly (3 Layers) - -1. Base prompt (system instructions in core) -2. Config prompt (project-specific rules from YAML) -3. Rules files (optional `.agent-rules.md` from repo) - -## Working Principles - -These behavioral guidelines apply to every agent working on this codebase. They are not optional - they prevent the most common causes of PR rejection and rewrite. - -### Think Before Coding - -Don't assume. Don't hide confusion. Surface tradeoffs. - -- State assumptions explicitly. If uncertain, ask. -- If multiple interpretations of a task exist, present them - don't pick silently. -- If a simpler approach exists, say so. Push back when warranted. -- If something is unclear, stop. Name what's confusing. Ask. -- When editing `lifecycle-manager.ts` or `session-manager.ts`: state which invariants your change preserves. These files have subtle state dependencies. - -### Simplicity First - -Minimum code that solves the problem. Nothing speculative. - -- No features beyond what was asked. -- No abstractions for single-use code. -- No "flexibility" or "configurability" that wasn't requested. -- No error handling for impossible scenarios. -- Plugin slots are the extension point. Don't add configuration surface when a new plugin is the right answer. -- If you write 200 lines and it could be 50, rewrite it. - -Ask yourself: "Would a senior engineer say this is overcomplicated?" If yes, simplify. - -### Surgical Changes - -Touch only what you must. Clean up only your own mess. - -- Don't "improve" adjacent code, comments, or formatting. -- Don't refactor things that aren't broken. -- Match existing style, even if you'd do it differently. -- If your changes create orphans (unused imports, dead variables), remove them. -- Don't remove pre-existing dead code unless asked. -- Every changed line should trace directly to the task description. - -This is especially critical in: -- `types.ts` - changing an interface breaks every plugin. Minimize surface changes. -- `globals.css` - tokens are consumed across 50+ components. Don't rename casually. -- `lifecycle-manager.ts` - state transitions have implicit dependencies. Document why a transition is safe. - -### Goal-Driven Execution - -Define success criteria. Loop until verified. - -Transform tasks into verifiable goals: -- "Add a new status" -> "Add to enum, update `isTerminalSession`, add to dashboard column mapping, write tests for all three" -- "Fix the bug" -> "Write a test that reproduces it, then make it pass" -- "Refactor X" -> "Ensure tests pass before and after" - -For multi-step tasks, state a brief plan: - -[Step] -> verify: [check] -[Step] -> verify: [check] -[Step] -> verify: [check] - -Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification. - -## CLI Behavior (ao start / ao stop) - -### ao start -- Registers in `running.json` (PID, port, projects) -- Offers to restore sessions from `last-stop.json` — includes cross-project sessions via `otherProjects` field -- `ao start --restore` restores `last-stop.json` without prompting; `ao start --no-restore` skips restore -- **Ctrl+C performs full graceful shutdown** (same as ao stop): kills all sessions, writes last-stop state, unregisters from running.json. 10s hard timeout guarantees exit. - -### ao stop -- `ao stop` (no args): kills ALL sessions across ALL projects, sends SIGTERM to parent ao start process, stops dashboard, unregisters -- `ao stop `: kills only that project's sessions, does NOT kill parent process or dashboard (they serve all projects) -- Always loads global config (`~/.agent-orchestrator/config.yaml`) to see all projects — local config only has the cwd project -- Records `LastStopState` with `otherProjects` field for cross-project session restore - -### ao update -- For package-manager installs, `ao update` pauses a running AO via `ao stop --yes`, runs the global package update, verifies `ao --version`, then restarts with `ao start --restore` (or `--no-restore` if requested) -- Failed package-manager updates must report that AO was not updated, include actionable remediation, and restart the previous installation if AO was paused - -### Dashboard sidebar -- Sidebar always shows sessions from ALL projects regardless of which project page is active -- `useSessionEvents` in Dashboard.tsx is called without project filter — sidebar gets unscoped sessions -- Kanban board filters client-side via `projectSessions` memo - -### Key invariants -- `sm.list()` persists `detecting` state (not `terminated`) to disk when enrichment detects dead runtimes — terminal decisions are made only by the lifecycle manager's probe pipeline (#1735) -- `deriveLegacyStatus()` maps canonical lifecycle to legacy status — new terminal reasons must be added here -- Tab completions merge local config + global config to show all projects - -## Cross-Platform (Windows) Compatibility - -AO ships on macOS, Linux, **and Windows**. All three are first-class. - -### The Golden Rule - -> **Never write `process.platform === "win32"` in new code. Use `isWindows()` from `@aoagents/ao-core`. If you need branching the helpers don't cover, add it to `packages/core/src/platform.ts` (or one of the targeted helper modules below) — never inline at the call site.** - -The codebase has a deliberate set of cross-platform abstractions. Every platform helper is centrally tested by mocking `process.platform`; inline checks bypass those tests and become silent regressions. Whenever you'd type `process.platform`, stop and check the helper inventory in `docs/CROSS_PLATFORM.md` first. - -### Read `docs/CROSS_PLATFORM.md` before merging if you touch any of: - -- Process spawning, killing, signalling, or process-tree teardown (`child_process`, `process.kill`, runtime plugins) -- File paths — comparison, joining, walking, anything OS-specific -- Shell commands (`exec`, `execFile`, command strings, redirections, PowerShell-vs-bash) -- Network binding, sockets, anything that says `localhost` -- Shell-outs to POSIX tools (`tmux`, `lsof`, `pkill`, `which`, coreutils) -- Adding any new `if (process.platform === "win32")` check (it should go into `platform.ts` instead — see the Golden Rule) -- Runtime / agent / workspace plugin code that runs on both `runtime-tmux` and `runtime-process` -- Agent-plugin internals: `setupPathWrapperWorkspace`, `getActivityState`, `formatLaunchCommand`, `isProcessRunning`, `detect()` -- The Windows pty-host pipe protocol or registry (`pty-client.ts`, `windows-pty-registry.ts`, `sweepWindowsPtyHosts`) - -### Quick reference: helpers to use instead of raw platform checks - -All importable from `@aoagents/ao-core` unless noted: - -| Need | Use | -|------|-----| -| OS check | `isWindows()` | -| Pick runtime | `getDefaultRuntime()` | -| Resolve shell (PowerShell vs `/bin/sh`) | `getShell()` | -| Kill process + descendants | `killProcessTree(pid, signal?)` | -| Find PID listening on a port | `findPidByPort(port)` | -| Default env (HOME / TMPDIR / SHELL / PATH / USER) | `getEnvDefaults()` | -| Compare paths (case-insensitive on NTFS/APFS) | `pathsEqual()` / `canonicalCompareKey()` from `cli/src/lib/path-equality.ts` | -| Escape shell args | `shellEscape()` | -| Install agent PATH wrappers (`gh`/`git`) | `setupPathWrapperWorkspace(workspacePath)` | -| Build env PATH with `~/.ao/bin` prepended | `buildAgentPath(basePath?)` | -| Tail JSONL | `readLastJsonlEntry` / `readLastActivityEntry` | -| Activity-state contract helpers | `checkActivityLogState`, `getActivityFallbackState`, `classifyTerminalActivity`, `recordTerminalActivity`, `appendActivityEntry` | -| Windows pty-host registry (used by `ao stop`) | `registerWindowsPtyHost`, `getWindowsPtyHosts`, `unregisterWindowsPtyHost`, `clearWindowsPtyHostRegistry` | -| Reap orphan pty-hosts on `ao stop` | `sweepWindowsPtyHosts()` from `@aoagents/ao-plugin-runtime-process` | -| Talk to a Windows pty-host over its named pipe | `getPipePath`, `connectPtyHost`, `ptyHostSendMessage`, `ptyHostGetOutput`, `ptyHostIsAlive`, `ptyHostKill` from `@aoagents/ao-plugin-runtime-process` | -| Validate user-supplied session ID before pipe/shell use | `validateSessionId()` from `@/server/tmux-utils` | -| Resolve a session's Windows pipe path | `resolvePipePath()` from `@/server/tmux-utils` | -| POSIX-only Ctrl+C signal forwarding | `forwardSignalsToChild()` from `cli/src/lib/shell.ts` (guard with `!isWindows()`) | -| Defensive PowerShell sweep of orphan pty-hosts | `stopStaleWindowsPtyHosts(projectDir)` from `web/src/lib/windows-pty-cleanup.ts` | - -`docs/CROSS_PLATFORM.md` has the full helper reference with import paths, the EPERM-vs-ESRCH gotcha when probing processes (with a copyable code snippet), path case-insensitivity rules, PowerShell-vs-bash differences (`& ` call-operator, `$env:VAR`, no `/dev/null`, no `$(cat …)`, `.cmd`/`.bat`/`.exe` shim resolution via `shell: isWindows()`), the IPv6 `localhost` stall on Windows, agent-plugin Windows specifics, the test pattern for mocking `process.platform`, and a 10-point pre-merge checklist. **Run through that checklist for any non-trivial change.** - -### Environment variables to know about - -- `AO_SHELL` — overrides `getShell()` resolution (escape hatch for Git Bash users on Windows). Args inferred from basename: `cmd` → `/c`, `bash`/`sh`/`zsh` → `-c`, anything else → `-Command`. -- `AO_BASH_PATH` — used by `script-runner.ts` on Windows to locate bash before falling back to Git Bash auto-detection. WSL bash is excluded (it sees Linux paths from a Windows cwd, breaking script semantics). - -## Conventions - -### Code Style - -- **TypeScript strict mode** — no `any` types (`@typescript-eslint/no-explicit-any: error`) -- **Consistent type imports** — `import type { Foo }` enforced by ESLint -- **Immutable patterns** — spread operator, never mutate in place -- **Prefer const** — `no-var`, `prefer-const` -- **No eval** — `no-eval`, `no-implied-eval`, `no-new-func` -- **Unused vars** — prefix with `_` (`argsIgnorePattern: "^_"`) - -### File Organization - -- Components in flat `components/` directory (no nesting) -- Hooks in `hooks/` with `use` prefix -- Tests in `__tests__/` subdirectories -- No barrel files except `core/src/index.ts` -- Max 400 lines per component file - -### Naming - -- PascalCase for components/classes -- camelCase for functions/variables -- `use*` for hooks, `is*`/`has*` for booleans - -### Imports - -- `@/` alias -> `packages/web/src/` -- `@aoagents/ao-core` for core imports -- `workspace:*` for cross-package - -### Web / Styling - -- Tailwind utility classes only — **no inline `style=` attributes** -- CSS custom properties via `var(--color-*)` from `globals.css` `@theme` block -- Dark theme must always be preserved -- **No external UI component libraries** (no Radix, shadcn, etc.) -- Client components marked `"use client"`; server components for pages -- State: React hooks only (no Redux/Zustand) -- Real-time updates: SSE via `useSessionEvents` hook (5s interval, do not change) - -### Testing - -- Vitest + @testing-library/react -- Test files: `{Module}.test.ts` or `{Component}.test.tsx` in `__tests__/` -- Test files for all new components -- Relaxed lint in tests: `any` and `console.log` allowed - -### Commits - -- Conventional commits: `feat:`, `fix:`, `refactor:`, `docs:`, `test:`, `chore:`, `perf:`, `ci:` -- Changesets for version management -- gitleaks pre-commit hook — never commit secrets - -## Key Files - -| File | Purpose | -|------|---------| -| `packages/core/src/types.ts` | Central type definitions (all 8 plugin interfaces) | -| `packages/core/src/session-manager.ts` | Session CRUD + stale runtime reconciliation (persists runtime_lost on dead runtimes) | -| `packages/core/src/lifecycle-manager.ts` | State machine + polling loop + reactions | -| `packages/core/src/lifecycle-state.ts` | Canonical lifecycle → legacy status mapping (deriveLegacyStatus) | -| `packages/core/src/config.ts` | YAML config loading with Zod validation | -| `packages/core/src/plugin-registry.ts` | Plugin discovery and resolution | -| `packages/core/src/index.ts` | Core public API (stable, do not break) | -| `packages/web/src/components/Dashboard.tsx` | Main dashboard view | -| `packages/web/src/components/SessionDetail.tsx` | Session detail view | -| `packages/web/src/components/DirectTerminal.tsx` | xterm.js terminal with WebSocket | -| `packages/web/src/components/SessionCard.tsx` | Kanban session card | -| `packages/web/src/hooks/useSessionEvents.ts` | SSE consumer hook (project filter optional — sidebar uses unscoped) | -| `packages/web/src/lib/types.ts` | Dashboard types | -| `packages/web/src/app/globals.css` | Design tokens and base styles (full token definitions) | -| `DESIGN.md` | **Design system reference** — design principles, token mapping, component patterns, anti-patterns (read this before writing any web UI) | -| `agent-orchestrator.yaml` | Project-level config (user-created) | -| `eslint.config.js` | ESLint flat config | -| `tsconfig.base.json` | Shared TypeScript base config | -| `packages/cli/src/commands/start.ts` | ao start/stop commands + Ctrl+C graceful shutdown | -| `packages/cli/src/lib/running-state.ts` | RunningState + LastStopState management (register/unregister, last-stop read/write) | -| `packages/web/src/components/ProjectSidebar.tsx` | Sidebar — always shows all projects' sessions | - -## Skills - -The `skills/` directory contains reusable workflow documents for common tasks. Load them before starting work: - -| Skill | When to load | -|-------|-------------| -| [`skills/bug-triage/SKILL.md`](skills/bug-triage/SKILL.md) | Triage a bug report — investigate, search duplicates, file GitHub issues, push fix PRs | -| [`skills/agent-orchestrator/SKILL.md`](skills/agent-orchestrator/SKILL.md) | Architecture and conventions for working on this codebase | -| [`skills/release-notes/ao-weekly-release/SKILL.md`](skills/release-notes/ao-weekly-release/SKILL.md) | Generate weekly release notes from git history | -| [`skills/social-media/SKILL.md`](skills/social-media/SKILL.md) | Social media post generation | - -See [`skills/README.md`](skills/README.md) for how to install skills into other coding agents (Cursor, Copilot, Codex, etc.). - -## Plugin Standards - -### Package Layout - -``` -packages/plugins/{slot}-{name}/ -├── package.json # @aoagents/ao-plugin-{slot}-{name} -├── tsconfig.json # extends ../../../tsconfig.base.json -├── src/ -│ ├── index.ts # manifest + create + detect (default export) -│ └── __tests__/ # vitest tests -``` - -### Naming - -- Package: `@aoagents/ao-plugin-{slot}-{name}` (lowercase, hyphenated) -- `manifest.name` must match the `{name}` suffix (e.g. package `...-runtime-tmux` -> name: `"tmux"`) -- `manifest.slot` must use `as const` to preserve the literal type - -### Export Contract - -Every plugin default-exports a `PluginModule`: - -```typescript -import type { PluginModule, Runtime } from "@aoagents/ao-core"; - -export const manifest = { - name: "tmux", - slot: "runtime" as const, - description: "tmux session runtime", - version: "0.1.0", -}; - -export function create(config?: Record): Runtime { - // Validate config here, not in individual methods - // Use closure to capture validated config - return { ... }; -} - -// Optional: check if binary/dependency is available on system -export function detect(): boolean { ... } - -export default { manifest, create, detect } satisfies PluginModule; -``` - -### Config Handling - -- Plugin-level config comes via `create(config)` from the YAML notifier/tracker blocks -- Project-level config (e.g. `agentConfig`, `trackerConfig`) is passed to individual methods -- Validate in `create()`, store via closure — don't re-validate per call -- Warn (don't throw) for missing optional config during plugin load -- Throw with descriptive message when a required config is missing at method call time - -### Error Handling - -- Wrap errors with `cause` for debugging: `throw new Error("msg", { cause: err })` -- Return `null` for "not found" (e.g. tracker issue lookup), throw for unexpected errors -- Never silently swallow errors -- Use `shellEscape()` from core for all command arguments (prevent injection) - -### Interface Implementation - -- All I/O methods return `Promise` (async-first) -- Plugins are loosely coupled — communicate through Session object and Lifecycle Manager, never call other plugins directly -- Implement `destroy()` / cleanup with best-effort semantics - -### Core Utilities Available to Plugins - -```typescript -import { - shellEscape, // Safe command argument escaping - validateUrl, // Webhook URL validation - readLastJsonlEntry, // Efficient JSONL log tail (native agent JSONL) - readLastActivityEntry, // Read last AO activity JSONL entry - checkActivityLogState, // Extract sticky waiting_input/blocked from AO JSONL - getActivityFallbackState, // Last-resort fallback: actionable states + liveness age decay - recordTerminalActivity, // Shared recordActivity impl (classify + dedup + append) - classifyTerminalActivity, // Classify terminal output via detectActivity - appendActivityEntry, // Low-level JSONL append - setupPathWrapperWorkspace, // Install ~/.ao/bin wrappers + .ao/AGENTS.md - buildAgentPath, // Prepend ~/.ao/bin to PATH - normalizeAgentPermissionMode, // Normalize permission mode strings - DEFAULT_READY_THRESHOLD_MS, // 5 min — ready→idle threshold - DEFAULT_ACTIVE_WINDOW_MS, // 30s — active→ready window - ACTIVITY_INPUT_STALENESS_MS, // Deprecated compatibility export; actionable states no longer expire by wallclock - PREFERRED_GH_PATH, // /usr/local/bin/gh - CI_STATUS, ACTIVITY_STATE, SESSION_STATUS, // Constants - type Session, type ProjectConfig, type RuntimeHandle, -} from "@aoagents/ao-core"; -``` - -### Testing - -- Vitest in `src/__tests__/index.test.ts` -- Mock external CLIs, file I/O, HTTP calls -- Test manifest values, `create()` return shape, all public methods, and error paths -- Use `beforeEach` to reset mocks - -### Common Pitfalls - -- Hardcoded secrets -> use `process.env`, throw if missing -- Shell injection -> use `shellEscape()` for all arguments -- Large file reads -> use streaming or `readLastJsonlEntry()` -- Config validation in methods -> validate once in `create()`, closure the rest - -### Agent Plugin Implementation Standards - -All agent plugins (claude-code, codex, aider, opencode, etc.) must implement the full `Agent` interface. The dashboard depends on these methods for PR tracking, cost display, and session resume. - -**Required methods (all agents):** - -| Method | Purpose | Return `null` OK? | -|--------|---------|-------------------| -| `getLaunchCommand` | Shell command to start the agent | No | -| `getEnvironment` | Env vars for agent process (must include `~/.ao/bin` in PATH) | No | -| `detectActivity` | Terminal output classification (deprecated, but required) | No | -| `getActivityState` | JSONL/API-based activity detection (min 3 states: active/ready/idle) | Yes (if no data) | -| `isProcessRunning` | Check process alive via tmux TTY or PID | No | -| `getSessionInfo` | Extract summary, cost, session ID from agent's data | Yes (if agent has no introspection) | - -**Optional methods (implement when the agent supports it):** - -| Method | Purpose | When to skip | -|--------|---------|-------------| -| `getRestoreCommand` | Resume a previous session | Agent has no resume capability (return `null`) | -| `setupWorkspaceHooks` | Install metadata-update hooks (PATH wrappers or agent-native) | Never — required for dashboard PR tracking | -| `postLaunchSetup` | Post-launch config (re-ensure hooks, resolve binary) | Only if no post-launch work needed | -| `recordActivity` | Write terminal-derived activity to JSONL for `getActivityState` | Agent has native JSONL with full state coverage (Claude Code). Codex implements it as a safety net for when its native JSONL is missing/unparseable. | - -**Metadata hooks are critical.** Without `setupWorkspaceHooks`, PRs created by agents won't appear in the dashboard. Two patterns exist: -- **Agent-native hooks** (Claude Code): PostToolUse hooks in `.claude/settings.json` -- **PATH wrappers** (Codex, Aider, OpenCode): `~/.ao/bin/gh` and `~/.ao/bin/git` intercept commands. Call `setupPathWrapperWorkspace(workspacePath)` — it installs wrappers to `~/.ao/bin/` and writes session context to `.ao/AGENTS.md` (gitignored, does not modify tracked files). - -**Environment requirements:** -- All agents must set `AO_SESSION_ID` and optionally `AO_ISSUE_ID` -- All agents using PATH wrappers must prepend `~/.ao/bin` to PATH -- Use `normalizeAgentPermissionMode` from `@aoagents/ao-core` (not a local duplicate) - -**Activity detection architecture:** - -`getActivityState` is the most critical method in the agent plugin. The dashboard, lifecycle manager, and stuck-detection all depend on it returning correct states. **Every agent plugin must produce all 6 states over its lifetime:** - -``` -spawning → active ↔ ready → idle → exited - ↘ waiting_input / blocked ↗ -``` - -| State | Meaning | When | -|-------|---------|------| -| `active` | Agent is working right now | Activity within last 30s | -| `ready` | Agent finished recently, may resume | 30s–5min since last activity | -| `idle` | Agent has been quiet for a while | >5min since last activity | -| `waiting_input` | Agent is blocked on user approval | Permission prompt visible | -| `blocked` | Agent hit an error it can't recover from | Error state detected | -| `exited` | Process is dead | `isProcessRunning` returns false | - -**The `getActivityState` contract — implement exactly this cascade:** - -```typescript -async getActivityState(session, readyThresholdMs?): Promise { - // 1. PROCESS CHECK — always first - if (!running) return { state: "exited", timestamp }; - - // 2. ACTIONABLE STATES — check for waiting_input/blocked - // Source: native JSONL (Claude Code, Codex) OR AO activity JSONL (others) - // These are the only states checkActivityLogState() surfaces. - // If found, return immediately. - - // 3. NATIVE SIGNAL — agent-specific API for timestamp (preferred) - // Source: agent's session list API, native JSONL timestamps, etc. - // Classify by age: active (<30s) / ready (30s–threshold) / idle (>threshold) - - // 4. JSONL ENTRY FALLBACK — always implement this - // Source: getActivityFallbackState(activityResult, activeWindowMs, threshold) - // Uses the entry's detected state + entry.ts for age-based decay. - // Decay only demotes (active→ready→idle), never promotes. - // This is the SAFETY NET when the native signal is unavailable. - // Without this, getActivityState returns null and the dashboard shows - // no activity for the entire session lifetime. - - // 5. Return null only if there is genuinely no data at all. -} -``` - -**Step 4 is mandatory.** If you skip the JSONL entry fallback, `getActivityState` will return `null` whenever the native API fails (binary not in PATH, API changed, session not found, timeout). The dashboard will show no activity state and stuck-detection breaks. This was a real bug in the OpenCode plugin — `findOpenCodeSession` returned null due to a session creation issue, and without the fallback, the entire active/ready/idle flow was dead. Use `getActivityFallbackState()` from core — it handles age-based decay and staleness caps correctly. - -**Two activity detection patterns exist:** - -| Pattern | Used by | How it works | -|---------|---------|-------------| -| **Native JSONL** | Claude Code, Codex | Agent writes its own JSONL with rich state (`permission_request`, `tool_call`, `error`, etc.). `getActivityState` reads the last entry and maps it to activity states. | -| **AO Activity JSONL** | Aider, OpenCode, new agents | Agent implements `recordActivity`. Lifecycle manager calls it each poll cycle with terminal output. It calls `classifyTerminalActivity()` → `appendActivityEntry()` to write to `{workspacePath}/.ao/activity.jsonl`. `getActivityState` reads from this file. | - -**For agents using AO Activity JSONL (the common case for new plugins):** - -1. Implement `recordActivity` — delegate to the shared `recordTerminalActivity()`: -```typescript -async recordActivity(session: Session, terminalOutput: string): Promise { - if (!session.workspacePath) return; - await recordTerminalActivity(session.workspacePath, terminalOutput, (output) => - this.detectActivity(output), - ); -} -``` - -`recordTerminalActivity` handles classification, deduplication (20s window for non-actionable states), and appending. You don't need to implement dedup yourself. - -2. Implement `detectActivity` with patterns specific to the agent's terminal output: -```typescript -detectActivity(terminalOutput: string): ActivityState { - // Match the ACTUAL prompts/patterns the agent emits. - // Test with real terminal output — don't guess patterns. - // Return: "idle" | "active" | "waiting_input" | "blocked" -} -``` - -3. In `getActivityState`, use `checkActivityLogState()` for waiting_input/blocked, then fall back to `getActivityFallbackState()`: -```typescript -// checkActivityLogState returns non-null ONLY for waiting_input/blocked. -// active/idle/ready intentionally return null — use the fallback for those. -const activityResult = await readLastActivityEntry(session.workspacePath); -const activityState = checkActivityLogState(activityResult); -if (activityState) return activityState; - -// ... try native signal first (session list API, git commits, etc.) ... - -// JSONL entry fallback (REQUIRED — do not skip) -const activeWindowMs = Math.min(DEFAULT_ACTIVE_WINDOW_MS, threshold); -const fallback = getActivityFallbackState(activityResult, activeWindowMs, threshold); -if (fallback) return fallback; -``` - -`getActivityFallbackState` uses the entry's detected state with age-based decay (active→ready→idle) and respects the entry state as a ceiling (never promotes idle to active). Stale waiting_input/blocked entries (>5min) decay to idle. - -**Required tests for `getActivityState` — all agent plugins must have these:** - -1. Returns `exited` when process is not running -2. Returns `waiting_input` from JSONL when agent is at a permission prompt -3. Returns `blocked` from JSONL when agent hit an error -4. Returns `active` from native signal when agent was recently active -5. Returns `active` from JSONL entry fallback when native signal fails (fresh entry) -6. Returns `idle` from JSONL entry fallback when native signal fails (old entry with age decay) -7. Returns `null` when both native signal and JSONL are unavailable - -**`isProcessRunning` must:** -- Support tmux runtime (TTY-based `ps` lookup with process name regex) -- Support process runtime (PID signal-0 check with EPERM handling) -- Match BOTH the node wrapper name AND the actual binary name (some agents install as `.agentname` with a dot prefix — the regex must handle this) -- Return `false` (not `null`) on error - -## Constraints - -- C-01: No new UI component libraries -- C-02: No inline styles in new/modified code -- C-04: Component files max 400 lines -- C-05: Dark theme preserved (no redesign) -- C-06: Next.js App Router only -- C-07: No animation libraries -- C-12: Test files for all new components -- C-13: pnpm `workspace:*` protocol for cross-package deps -- C-14: SSE 5s interval unchanged +Read and follow [`AGENTS.md`](AGENTS.md) for repository layout, commands, coding conventions, and hard rules. + +## App state lives under `~/.ao` only + +All app state, the daemon's data dir, `running.json`, worktrees, and the Electron +supervisor's `userData` (Chromium cache, cookies, local/session storage, crash +dumps), must resolve under `~/.ao` (overridable via `AO_DATA_DIR`/`AO_RUN_FILE`). +Never write to or read from `~/Library/Application Support` or any other OS-default +app-data location. `frontend/src/main.ts` pins Electron's `userData` to +`~/.ao/electron`; do not remove that override. See the hard rule in `AGENTS.md`. + +## Design System + +Always read [`DESIGN.md`](DESIGN.md) before making any visual or UI decision — +**start with the "clone agent-orchestrator verbatim" banner at the top**, which +governs the current look. + +The renderer **clones the agent-orchestrator web app verbatim** +(`~/Projects/agent-orchestrator/packages/web/src`) in looks and design, with a +refined-blue accent and the terminal keeping its own palette. This **supersedes the +older "match emdash" framing** in DESIGN.md (per explicit user decision 2026-06-10). +Build new UI from shadcn primitives (`components/ui/*`) where a component fits. Do not +deviate without explicit user approval. In QA/review, flag any renderer code that +diverges from **agent-orchestrator** — do **not** re-flag emdash mismatches. + +When showing or demoing frontend changes, run `ao preview [url]` from inside the +session so the change renders in the desktop browser panel (the inspector rail's +Browser tab); do not just describe it. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 825003c562..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,361 +0,0 @@ -# Contributing to Agent Orchestrator - -Thanks for your interest in contributing. This guide covers how to report bugs, submit PRs, and build new plugins. - -## Quick Links - -- [Setup and first build](#development-setup) -- [Plugin development](#building-a-plugin) -- [Code conventions](#code-conventions) -- [PR process](#pull-request-process) - ---- - -## Reporting Bugs - -Open an issue at [github.com/ComposioHQ/agent-orchestrator/issues](https://github.com/ComposioHQ/agent-orchestrator/issues). - -Include: - -- `ao --version` output -- OS and Node.js version (`node --version`) -- Steps to reproduce -- What you expected vs. what happened -- Relevant output from `ao doctor` - ---- - -## Development Setup - -**Prerequisites**: Node.js 20+, pnpm 9.15+, Git 2.25+, gh CLI - -- **Unix (macOS/Linux)**: also install `tmux` — it is the default runtime. -- **Windows**: tmux is **not** required. The default runtime on Windows is `process` (ConPTY via `node-pty`), and PowerShell is the default shell. See [docs/CROSS_PLATFORM.md](docs/CROSS_PLATFORM.md) for what's different on Windows when contributing. - -```bash -git clone https://github.com/ComposioHQ/agent-orchestrator.git -cd agent-orchestrator -pnpm install -pnpm build -``` - -Build order matters — `@aoagents/ao-core` must be built before the CLI, web, or plugins can run. `pnpm build` at the root handles this automatically. - -### Running tests - -```bash -pnpm test # all packages -pnpm --filter @aoagents/ao-core test # core only -pnpm --filter @aoagents/ao-core test -- --watch # watch mode -pnpm test:integration # integration tests -``` - -### Running the dashboard locally - -```bash -cp agent-orchestrator.yaml.example agent-orchestrator.yaml -# edit agent-orchestrator.yaml for your setup -pnpm --filter @aoagents/ao-web dev -``` - -### Refreshing a local AO install - -If your local `ao` launcher or built packages seem stale, refresh the install from a clean `main` checkout: - -```bash -git switch main -git status --short --branch # confirm the install repo is clean -ao update -``` - -`ao update` fast-forwards the local install repo, reinstalls dependencies, clean-rebuilds `@aoagents/ao-core`, `@aoagents/ao-cli`, and `@aoagents/ao-web`, refreshes the global launcher with `npm link`, and finishes with CLI smoke tests. Use `ao update --skip-smoke` when you only need the rebuild step, or `ao update --smoke-only` when validating an existing install. - -## Release Architecture (maintainers only) - -AO uses a **two-stage release pipeline**. This public repo handles version bumps, git tags, and GitHub releases. npm publishing runs on a private server (AO cron job) that polls GitHub releases and publishes when a new tag is ahead of the current npm version. Org compliance forbids npm publish credentials in public repositories, so `NPM_TOKEN` never enters this repo. - -### Where things happen - -| Stage | Where | Responsibility | -| ------------------------ | ------------------------------ | ------------------------------------------------------------------------ | -| Versioning + GitHub release | This repo (public, CI) | Changesets version bumps, git tags, `gh release create` | -| npm publish | Private server (AO cron) | Detects new GitHub releases → builds → `pnpm changeset publish` | - -The flow on every release: - -``` -This repo (public CI) Private server (AO cron) -────────────────────── ───────────────────────── -release.yml: Polls gh release list - changeset version Detects new vX.Y.Z tag - push vX.Y.Z tag Compare to npm @latest/@nightly - gh release create vX.Y.Z If behind → checkout tag → build → publish - -canary.yml: Same cron, detects prereleases - changeset version --snapshot Publishes with --tag nightly - commit snapshot bump + tag - gh release create --prerelease -``` - -Each release pushes a single umbrella `vX.Y.Z` git tag pointing at the version-bump commit. We deliberately do **not** run `pnpm changeset tag`, which would emit one tag per publishable package (~27) every release — fine for stable's monthly cadence, noisy on the nightly cadence (~7 000 tags/year). The npm publisher only consumes the umbrella tag, so the per-package tags add no value. - -### Secrets - -This repo requires **no additional secrets** beyond the automatic `GITHUB_TOKEN`. `NPM_TOKEN` lives only on the private server. - -### How releases are cut - -- **Stable**: merge the "chore: version packages" PR opened by `changesets/action`. `release.yml` tags the bumped packages and creates a `vX.Y.Z` GitHub release. The AO cron detects the new release and publishes to npm `@latest`. -- **Nightly**: `canary.yml` runs on cron (23:30 IST Fri–Tue) or via `workflow_dispatch`. It snapshots versions to `X.Y.Z-nightly-` format (e.g., `0.6.1-nightly-7c46dc92`), tags, and creates a prerelease GitHub release. The AO cron detects the new prerelease and publishes to npm `@nightly`. - -There is no path from this repo that calls `npm publish` directly. - -### Idempotency - -`release.yml` is idempotent: each step (tag push, GitHub release creation) is gated on whether that piece of state already exists, so a re-run after a partial failure picks up only the missing steps. - -The AO cron is also idempotent — `pnpm changeset publish` skips packages whose current version is already on the registry, so re-running after a partial publish is safe. - -### Recovery - -If `release.yml` fails after the GitHub release was created, **re-run the failed workflow**: the state-detection step will see that the tag and release already exist and skip those steps. - -If the AO cron fails to publish, it will retry on the next poll cycle (every 15 minutes). No manual intervention needed for transient failures. For persistent issues, check the cron logs on the private server. - -## Testing your changes - -### Latest main at any time - -```bash -npm install -g @aoagents/ao@nightly -``` - -The nightly cron publishes from `main` daily at 23:30 IST (Fri–Tue). The bake window (Wed–Thu) pauses scheduled nightlies; release captains can re-cut a nightly via `workflow_dispatch` if a fix lands during bake. - ---- - -## Building a Plugin - -The plugin system is the primary extension point. You can add support for new agents, runtimes, issue trackers, and notification channels without modifying core code. - -### 1. Understand the interface - -All plugin interfaces are in [`packages/core/src/types.ts`](packages/core/src/types.ts). Pick the slot that matches what you want to build: - -| Slot | Interface | Example use case | -| ----------- | ----------- | ------------------------------------ | -| `runtime` | `Runtime` | Run agents in Docker, SSH, cloud VMs | -| `agent` | `Agent` | Adapt a new AI coding tool | -| `workspace` | `Workspace` | Different code isolation strategies | -| `tracker` | `Tracker` | Jira, Asana, or custom issue systems | -| `scm` | `SCM` | GitLab, Bitbucket support | -| `notifier` | `Notifier` | Email, Discord, custom webhooks | -| `terminal` | `Terminal` | Different terminal UI integrations | - -### 2. Create the package - -```bash -mkdir -p packages/plugins/runtime-myplugin/src -cd packages/plugins/runtime-myplugin -``` - -`package.json`: - -```json -{ - "name": "@aoagents/ao-runtime-myplugin", - "version": "0.1.0", - "type": "module", - "main": "dist/index.js", - "types": "dist/index.d.ts", - "scripts": { - "build": "tsc", - "typecheck": "tsc --noEmit", - "test": "vitest" - }, - "dependencies": { - "@aoagents/ao-core": "workspace:*" - } -} -``` - -`tsconfig.json` — copy from an existing plugin like `packages/plugins/runtime-tmux/`. - -### 3. Implement the interface - -```typescript -// src/index.ts -import type { PluginModule, Runtime } from "@aoagents/ao-core"; - -export const manifest = { - name: "myplugin", - slot: "runtime" as const, - description: "My custom runtime", - version: "0.1.0", -}; - -export function create(): Runtime { - return { - name: "myplugin", - async create(config) { - /* start session */ - }, - async destroy(sessionName) { - /* tear down */ - }, - async send(sessionName, text) { - /* send input */ - }, - async isRunning(sessionName) { - return false; - }, - }; -} - -export default { manifest, create } satisfies PluginModule; -``` - -### 4. Register the plugin - -Add it to the CLI's dependencies in `packages/cli/package.json`: - -```json -"@aoagents/ao-runtime-myplugin": "workspace:*" -``` - -Then register it in `packages/core/src/plugin-registry.ts` inside `loadBuiltins()`. - -### 5. Add tests - -```typescript -// src/index.test.ts -import { describe, it, expect } from "vitest"; -import { create } from "./index.js"; - -describe("myplugin runtime", () => { - it("reports not running for unknown session", async () => { - const runtime = create(); - expect(await runtime.isRunning("unknown-session")).toBe(false); - }); -}); -``` - -### 6. Build and test - -```bash -pnpm --filter @aoagents/ao-runtime-myplugin build -pnpm --filter @aoagents/ao-runtime-myplugin test -``` - -### Publishing to the Marketplace Registry - -To list your plugin in the AO marketplace so others can install it with `ao plugin install`, submit a PR that adds an entry to `packages/cli/src/assets/plugin-registry.json`. - -Each entry requires: - -- **`id`** — short kebab-case name (e.g. `tracker-jira`) -- **`package`** — npm package name -- **`slot`** — one of: `runtime`, `agent`, `workspace`, `tracker`, `scm`, `notifier`, `terminal` -- **`description`** — one-line summary -- **`source`** — always `"registry"` -- **`latestVersion`** — semver string - -Optionally include `setupAction` if post-install configuration is needed (e.g. `"openclaw-setup"`). - -Your plugin package must satisfy the contract in [`docs/PLUGIN_SPEC.md`](docs/PLUGIN_SPEC.md) — export a `PluginModule` with a valid manifest and `create()` function. The package must be published to npm before your registry PR is merged so `ao plugin install` can fetch it. - ---- - -## Code Conventions - -See [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md) for the full reference. The short version: - -### Behavioral Guidelines - -Beyond syntax and style, follow these principles: - -- **State assumptions explicitly** - if a task is ambiguous, present interpretations rather than guessing. -- **Minimum viable change** - no speculative features, no unused abstractions, no formatting changes outside your diff. -- **Every changed line traces to the task** - if you can't explain why a line changed, revert it. -- **Write a failing test first** - for bug fixes, reproduce the bug in a test before implementing the fix. -- **Don't refactor unrelated code** - mention dead code you spot, don't delete it. - -These match the "Working Principles" section in CLAUDE.md. AI agents working on this repo are instructed to follow these same rules. - -**TypeScript** - -- ESM modules, `.js` extensions on local imports -- `node:` prefix for builtins -- No `any` — use `unknown` + type guards -- Strict mode, semicolons, double quotes, 2-space indent - -**Shell commands** - -- Always `execFile`, never `exec` -- Always pass args as an array, never interpolate into strings -- Always add timeouts - -**Tests** - -- Unit tests alongside source in `src/__tests__/` -- Mock plugins in tests — don't call real tmux, GitHub, or external services -- Test the interface contract, not internal implementation details - ---- - -## Pull Request Process - -1. **Fork and branch** from `main`: - - ```bash - git checkout -b feat/your-feature - ``` - -2. **Make your changes** — keep PRs focused on one thing. - -3. **Build, test, lint**: - - ```bash - pnpm build - pnpm test - pnpm lint - pnpm typecheck - ``` - -4. **Commit** with [Conventional Commits](https://www.conventionalcommits.org/): - - ``` - feat: add kubernetes runtime plugin - fix: handle missing LINEAR_API_KEY gracefully - docs: add plugin development guide - chore: update vitest to v2 - ``` - -5. **Push and open a PR**. In the PR description: - - What changed and why - - How to test it - - Link to the issue it closes (e.g., `Closes #123`) - -6. **Address review comments** — update the branch and push. Reply to comments when done. - -### What gets reviewed - -- Does the change work as described? -- Are there tests? -- Does it follow the TypeScript and shell conventions in [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md)? -- For new features: is it documented? - -### CI checks - -All PRs must pass: - -- `pnpm build` — no TypeScript errors -- `pnpm test` — all tests green -- `pnpm lint` — no lint errors -- Secret scanning — no leaked credentials - ---- - -## License - -By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE). diff --git a/DESIGN.md b/DESIGN.md index bb7c841958..4778112be1 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -1,181 +1,274 @@ -# Design System — Agent Orchestrator - -> **This document supersedes the previous "Warm Terminal" system.** AO's design -> language is **Mission Control**: a calm, high-signal control room for -> supervising a fleet of autonomous agents. The earlier warm-neutral direction -> (Geist Sans, amber/orange orchestrator CTA, brown-tinted surfaces) is retired. -> This file is the single source of truth — there is no second package-level -> `DESIGN.md`. Origin: the dashboard design-language exploration in -> [`docs/design/dashboard-language.md`](docs/design/dashboard-language.md) and -> its canonical mockups ([`kanban.html`](docs/design/mockups/kanban.html), -> [`session.html`](docs/design/mockups/session.html)). +# Design System — ReverbCode + +> Source of truth for the ReverbCode desktop UI (Electron + React 19 + Tailwind v4 +> +> - Radix/shadcn + xterm, in `frontend/src/renderer`). Read this before any visual +> or UI change. Created by `/design-consultation` on 2026-06-09. + +## ⚠️ Design direction — clone agent-orchestrator verbatim (SUPERSEDES emdash · 2026-06-10) + +By explicit user decision (2026-06-10), the renderer **clones the +agent-orchestrator web app verbatim** in looks and design. This **supersedes the +"match emdash" direction** documented in _Aesthetic Direction_ and the palette +sections below — where they conflict, **agent-orchestrator wins**. Do not re-flag +"this doesn't match emdash" in QA/review; flag divergence from **agent-orchestrator**. + +- **Reference (the user's own app):** `~/Projects/agent-orchestrator/packages/web/src` + — `app/globals.css`, `app/mc-board.css`, `app/mc-sidebar.css`, + `components/{ProjectSidebar,Dashboard,SessionCard,SessionDetailHeader,SessionInspector,StatusBadge}.tsx`. +- **Palette (live in `frontend/src/renderer/styles.css` `:root`):** `--bg #0a0b0d`, + `--bg-1 #15171b`, `--fg #f4f5f7`, `--fg-muted #9ba1aa`, `--fg-passive #646a73`, + hairline white-alpha borders, accent `--accent #4d8dff`; status: working=orange + `#f59f4c`, needs-you=amber `#e8c14a`, mergeable=green `#74b98a`, fail=red `#ef6b6b`. + The sidebar rail is the cooler `#08090b`. +- **Cloned surfaces:** the four-column gradient kanban board, the `ProjectSidebar` + (brand + project disclosure + nested session rows + Settings menu footer), the + session topbar (Kanban back button + identity + breathing `StatusBadge` pill), and + the shared `DashboardTopbar`/`DashboardSubhead` chrome (Coding/Reviews tabs · "N + working" pill · subhead) reused across board/review/PR/settings. +- **Build with shadcn primitives** where a component fits (`components/ui/*`: + dropdown-menu, select, card, table, tooltip, …); agent-orchestrator's own + hand-rolled CSS components are structure/behaviour reference only. +- The one carried-over divergence still holds: the **accent is refined blue**, and + the **terminal keeps its own palette**. Everything else tracks agent-orchestrator. +- **Approved divergence (2026-06-10):** on macOS, a titlebar cluster (sidebar toggle + + back/forward history arrows, `TitlebarNav`) sits beside the traffic lights, + VS Code-style — the web reference has no window chrome, so no analogue exists. +- **Approved divergence (2026-06-10):** the session inspector rail is fully + collapsible, built on the shadcn resizable primitive (`pnpm dlx shadcn add +resizable`, react-resizable-panels v4 `collapsible` panel + imperative API, + user-requested). The panel animates to 0% via a flex-grow transition while the + content keeps a stable min-width (yyork-style, no mid-animation reflow). Toggled + by a `PanelRight` icon button in the session topbar and ⌘⇧B; open state + split + width persist. The AO reference keeps the rail always visible. +- **Approved divergence (2026-06-12):** the shell topbar spans the full window + width and the sidebar is pinned below it (`top-14`), so the sidebar's right + border stops at the header instead of cutting through the macOS traffic-light + strip (user-requested). The AO reference keeps a full-height sidebar with the + header beside it. On macOS the header always pads past the lights + TitlebarNav + cluster (`.is-under-titlebar-nav`, 180px). ## Product Context -- **What this is:** A web dashboard for supervising fleets of parallel AI coding agents. Each agent gets its own git worktree, branch, and PR. The dashboard is the operator's single pane of glass. -- **Who it's for:** Developers running 10–30+ agents in parallel. It must stay calm and glanceable with 20+ agents running. -- **Project type:** Next.js 15 (App Router) + React 19 + Tailwind v4. A kanban fleet board (home) and a per-session detail view. -## Concept & Identity +- **What this is:** ReverbCode is an Electron desktop app for supervising many parallel + AI coding-agent sessions, backed by a Go daemon (`backend/`). The `ao` CLI is the + thin client over the same daemon. +- **Who it's for:** professional software engineers running multiple coding agents at + once who need to delegate, watch, intervene, and ship PRs. +- **Space/peers:** agent orchestration / parallel-agent desktop tools. Closest peers: + **emdash** (the primary design reference), **PostHog Code**, Conductor. +- **Project type:** dark-mode-primary desktop app; terminal-dense; keyboard-driven; + runs all day. +- **The one memorable thing:** leverage and speed — "I'm more in control here than + babysitting N terminal tabs myself." -**A calm, high-signal control room.** Linear-grade restraint, dense but humane. -State is glanceable, not noisy. +### Product flow (what the UI must serve) -**The blue/orange split.** The mascot is the Claude Code character recolored -**blue** — the *conductor*. This drives a deliberate two-color semantic split: +ReverbCode is **orchestrator-led**, which is the one thing that differs from emdash +(a flat list of independent sessions). Grounded in the daemon +(`backend/internal/session_manager/manager.go`, `docs/architecture.md`): -- **Blue = the orchestrator (AO itself / "you").** Brand, the single solid-fill - primary CTA (the **Orchestrator** button), active selection, focus, links. -- **Orange = the agents being conducted.** The per-agent identity and the - **`working`** status — the one "an agent is alive right now" signal (a gently - breathing dot, the terminal cursor). +- A **Project** is a registered git repo. +- Per project there is **one active Orchestrator** session plus **N Worker** sessions. + Both are the same underlying "session" (durable facts: `activity_state`, + `is_terminated`, PR facts); they differ only by `Kind` (`KindOrchestrator` vs the + default worker). A project may run the orchestrator on a different agent than its workers. +- The **Orchestrator is the human-facing coordinator**: you talk to it; it spawns + workers (`ao spawn`), messages them (`ao send`), tracks progress, and synthesizes + results. It avoids implementing unless necessary. +- A **Worker is a normal agent session** — nothing special-cased. It runs one focused + task in an isolated git worktree + branch, with the agent CLI in a terminal as the + conversation, producing a diff → commit/push → PR. It escalates to the orchestrator + only for true blockers or cross-session coordination. +- The daemon **observes** runtime + PR/CI/review facts and **derives** display status + at read time: `working`, `needs_input`, `ci_failed`, `changes_requested`, + `mergeable`, `approved`, `review_pending`, `pr_open`, `idle`, `terminated`, `merged`. + Never store display status; keep session facts small. -Blue does not *replace* orange; they mean different things. The board reads as a -blue conductor surrounded by orange agents. +## Aesthetic Direction -## Color discipline +> **Superseded (2026-06-10):** see the _Design direction — clone agent-orchestrator +> verbatim_ banner at the top. The emdash framing below is retained for history; the +> live look tracks agent-orchestrator (same flat near-black / hairline family, so most +> of this still reads true). -**Color = meaning. Most states get none.** The UI is grayscale by default; -color is rationed so it always signals something. +- **Direction:** match **emdash** exactly — flat, near-black, hairline-bordered, + utilitarian. Industrial control surface, calm chrome, the terminal as the center of gravity. +- **Decoration level:** minimal. Type + 1px hairlines do all the work. No gradients, + glow, blobs, or emoji. +- **Mood:** low-glare, dense, keyboard-native; signal-over-noise. +- **Reference:** [emdash](https://github.com/generalaction/emdash) (primary, visual + + structural), [PostHog Code](https://github.com/PostHog/code) (secondary). Tokens + below were extracted from emdash's `src/renderer/index.css`. +- **Deliberate tradeoff:** to _be_ emdash, we use the **system font stack** (not a + custom typeface) and emdash's neutral palette. We diverge in exactly one place: the + accent is ReverbCode's **refined blue**, not emdash's jade green. The terminal keeps + green (it is the agent CLI). -| Token | Hex | Use | -|-------|-----|-----| -| Blue | `#4d8dff` | orchestrator / you — primary action, selection, focus, links (the *only* solid-fill button) | -| Orange | `#f59f4c` | a working agent (status dot + terminal cursor) | -| Amber | `#e8c14a` | needs-your-input / attention (incl. unresolved review comments, changes requested) | -| Red | `#ef6b6b` | failing / stuck (CI failed, crashed, conflicts) | -| Green | `#74b98a` | mergeable / passed / resolved | -| Neutral grays | — | everything healthy & passive: in-review, idle, done, metadata | +## Typography -Diff add/remove green & red are permitted in their literal context (the Changes view). +System fonts only, like emdash — no custom/Google fonts, zero font payload. -### Surfaces & lines (dark, cool neutral) +- **UI / body / display:** `-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, +Oxygen, Ubuntu, Cantarell, "Fira Sans", "Helvetica Neue", sans-serif` (San Francisco + on macOS). +- **Mono / terminal / code / eyebrow labels:** `Menlo, Monaco, Consolas, +"Liberation Mono", "Courier New", monospace`. +- **Eyebrow labels** (section titles, dialog titles, the rail "PROJECTS" header): + mono, **uppercase**, `letter-spacing: .12–.14em`, `--foreground-passive`. +- **Scale:** 14px base UI / sidebar (`text-sm`, weight 400) · 12px secondary + labels + (`text-xs`) · 13px code/mono/terminal · 11px tiny · 10px micro + badges · 9px sidebar + badge label. Buttons are `font-normal` (400), not bold. -The product is **dark-only mission control**. The dark theme is authoritative. +## Color -| Token (literal) | Value | Maps to semantic token | -|-----------------|-------|------------------------| -| `--bg` | `#0a0b0d` | `--color-bg-base` (app base) | -| `--bg-side` | `#08090b` | `--color-bg-sidebar` | -| `--card` | `#15171b` | `--color-bg-surface` / `--color-bg-card` — **the only bordered surface** | -| `--card-hover` | `#191b20` | `--color-bg-elevated` / `-elevated-hover` | -| `--col` | `#0e0f12` | `--color-column-bg` (kanban trough) | -| `--term` | `#0c0d10` | xterm background (terminal-themes.ts) | -| `--line` | `rgba(255,255,255,0.06)` | `--color-border-subtle` / `-default` | -| `--line-2` | `rgba(255,255,255,0.10)` | `--color-border-strong` | -| `--t1 … --t4` | `#f4f5f7` `#9ba1aa` `#646a73` `#444951` | `--color-text-primary/secondary/tertiary/muted` | +emdash's flat Radix-neutral near-black ramp carries the whole interface; color is rare +and meaningful. Values are sRGB approximations of emdash's `color(display-p3 …)` tokens. -These literals live at the top of the `.dark` block in -`packages/web/src/app/globals.css`; the existing `--color-*` semantic tokens -**alias** them, so all consuming CSS keeps working. **Don't rename the semantic -tokens** — add/alias and migrate. +### Dark (primary) -## Typography +| Role | Hex | +| ------------------------------------ | --------------- | +| `--bg` canvas | `#111111` | +| `--bg-1` surface | `#191919` | +| `--bg-2` raised / hover / active row | `#222222` | +| `--bg-3` | `#2a2a2a` | +| `--fg` text | `#eeeeee` | +| `--fg-muted` | `#b4b4b4` | +| `--fg-passive` | `#6e6e6e` | +| `--border` hairline | `#3a3a3a` | +| `--border-1` | `#484848` | +| **`--accent` (blue)** | **`#5b9dff`** | +| `--needs-you` / in-progress (amber) | `#ffcc4a` | +| `--success` / mergeable (green) | `#6cb16c` | +| terminal green | `#7bd88f` | +| `--error` (red) | `#d4544f` | +| text selection | `#3f8ef7` @ 35% | +| terminal bg | `#161616` | + +### Light (supported, not primary) + +| Role | Hex | +| ------------------------- | --------------------------------- | +| canvas / surface / raised | `#fcfcfc` / `#ffffff` / `#ededee` | +| text / muted / passive | `#1a1a1a` / `#666666` / `#9a9a9a` | +| border | `#e3e3e5` | +| accent (blue) | `#2563eb` | +| amber / green / red | `#9a6b00` / `#1a7f37` / `#c0392b` | + +### Accent rules + +- **Blue** = the live edge only: primary buttons, the active/selected session, focus + rings. Never decorative. +- **Amber** = an agent needs you (blocked / `needs_input` / `review_pending`). +- **Green** = `mergeable`/success and terminal/agent CLI text. +- **Red** = `ci_failed` / destructive. +- These map 1:1 to the daemon's derived statuses. + +### Status indicator (no text badges) + +Session status is a single ~14px glyph in one fixed slot, never a text pill/badge: + +- **Working / active** → an animated spinner (accent). +- **Has an open PR** → a PR icon, tinted by PR state: mergeable/approved green, + `ci_failed` red, review/`changes_requested` amber, plain `pr_open` muted. +- **Otherwise** → a filled dot: `needs_input` amber (pulsing), idle/done muted gray. + +Precedence: **working spinner > PR icon > dot**. Implemented as `StatusGlyph` in +`components/SideRail.tsx`; used in the orchestrator's Workers list. (Worker rows in the +left rail stay name-only — no glyph.) -Self-hosted via `next/font/local` (`packages/web/src/fonts/`). **No external font CDN.** - -- **UI = Schibsted Grotesk** (`--font-sans`). The product voice. Used for all - chrome: titles, labels, buttons, body. A distinctive grotesk — not Inter/system. -- **Machine = JetBrains Mono** (`--font-mono`). Branches, IDs, PR numbers, costs, - timestamps, terminal — anything the machine emits. -- **Numerals:** `tabular-nums` wherever numbers appear (counts, costs, tokens). -- **Never render chrome in mono.** The sans/mono split is itself a design device: - product voice vs. machine voice. - -(Geist Sans is removed. JetBrains Mono is no longer used for display headlines.) - -## Status as one system - -A single semantic spectrum maps the canonical lifecycle to a `{tone, label}` -pair and is used **everywhere** — kanban card badge, sidebar dot, session topbar -pill. It lives in [`lib/status-spec.ts`](packages/web/src/lib/status-spec.ts) -(`getStatusSpec`) and renders through -[`StatusBadge`](packages/web/src/components/StatusBadge.tsx). - -| Tone | Color | Meaning | -|------|-------|---------| -| `working` | orange (breathing) | an agent is alive right now | -| `input` | amber | needs your input | -| `changes` | amber | changes requested | -| `fail` | red | CI failed / stuck / crashed / conflicts | -| `review` | neutral | in review / waiting on a reviewer | -| `ready` / `merged` | green | mergeable / landed | -| `neutral` | gray | idle / done / terminated | - -Tone is refined from the (tested) attention-level bucket so a card's badge never -disagrees with the column it sits in. - -## Layout patterns - -### Fleet board (home) — `kanban.html` -- **Lead with the fleet, not the terminal.** Answers "what are all my agents doing?" at a glance. -- **Frameless columns:** lifecycle columns **Working → Needs you → In review → - Ready to merge** are borderless tinted troughs with a faint *per-column* - semantic top-glow. The **card is the only bordered surface** — no box-in-box. -- **Compact cards:** status badge + id, task title (2-line clamp), branch, a thin - footer. Done/Terminated collapses at the bottom. -- The sidebar always shows **all projects'** sessions; the board filters - client-side. The SSE refresh interval is **5s** (unchanged — C-14). - -### Session detail — `session.html` -- **Framed terminal** as a real surface (header + viewport), flush to sidebar/topbar. - It is a **live xterm.js/PTY** — we do *not* style its content; we only set the - frame and the xterm.js `theme` object (background `--term`, orange cursor, blue - selection, a 16-color ANSI palette tied to the tokens — see `terminal-themes.ts`). - Claude Code's own input lives inside the terminal; there is no separate composer. -- **Pluggable inspector rail** (a registered-view slot): - [`SessionInspector`](packages/web/src/components/SessionInspector.tsx) with views - **Summary · Changes · Browser**; adding more (Logs, Cost…) is just another entry. - - *Summary* is ordered by supervision value: **Pull request → Review comments → - Activity → Overview** (the PR card bundles PR + review comments). - - *Review comments* surface a soft-blue **Address** action (`askAgentToFix`) that - hands the comment — with its `file:line` — to the agent session to fix. - - *Browser* is reserved for a web-preview / Playwright plugin. -- **Topbar:** `‹ Kanban` (back) · title + inline branch · **status pill** · - notifications · **Kill** (trash) · **Orchestrator** (blue primary, org-chart icon). - -## Iconography & motion -- **Line icons only** (Lucide-style, ~1.6px stroke, `currentColor`, inline SVG). **No emoji.** -- **Motion is minimal & purposeful:** a slow CSS-only "breathe" pulse on the - working dot / terminal cursor (`@keyframes breathe`, 2.4s). No animation - libraries (C-07). All motion respects `prefers-reduced-motion: reduce`. - -## Web Implementation Rules -- **Tokens over raw values.** Use the `--color-*` semantic tokens (or the literal - `--bg/--card/--t1…` palette) from `globals.css`. No hardcoded hex/rgba in components. -- **No inline `style=`** for theme values (C-02). Tailwind utilities with - `var(--token)`, or a named class in `globals.css`. -- **No external UI kits** (Radix, shadcn, Headless UI, …) (C-01). -- **Tailwind vs CSS classes:** Tailwind for one-off layout/spacing; add a class in - `globals.css` when a pattern is theme-sensitive, uses pseudo-elements/gradients, - or repeats 3+ times. -- **App Router only** (C-06). Component files ≤ 400 lines (C-04). Test files for - new/changed components (C-12). -- **Dark theme is always preserved** (C-05). Light-mode tokens still exist for the - theme toggle but mission control is designed and tuned for dark. - -## Accessibility -- **Focus indicators:** `outline: 2px solid var(--color-accent); outline-offset: 2px` on `:focus-visible`. Never `outline: none` without a visible replacement. -- **Reduced motion:** `@media (prefers-reduced-motion: reduce)` disables animations/transitions. Non-negotiable. -- **Color independence:** never encode meaning with color alone. Status badges always pair a colored dot with a text label. -- **Contrast:** body text ≥ 4.5:1; UI/borders/icons ≥ 3:1. The text ramp `--t1…--t3` is for primary→labels on the `--bg`/`--card` surfaces; `--t4` is for faint/disabled only. -- **Keyboard nav:** all interactive elements reachable via Tab; Escape closes popovers; logical order. -- **ARIA labels** on all icon-only buttons. - -## Constraints -- C-01: No new UI component libraries -- C-02: No inline styles in new/modified code -- C-04: Component files max 400 lines -- C-05: Dark theme preserved -- C-06: Next.js App Router only -- C-07: No animation libraries (CSS-only motion) -- C-12: Test files for new/changed components -- C-14: SSE 5s interval unchanged +## Spacing + +- **Base unit:** 4px (Tailwind scale: 1=4, 1.5=6, 2=8, 3=12, 4=16, 5=20, 6=24). +- **Density:** compact / desktop-tight. +- **Control + row height:** `h-8` = 32px default; `h-7` = 28px small; `h-6` = 24px xs. +- Inputs `px-2.5 py-1`; buttons `px-2.5`, gap 1–1.5. + +## Layout + +- **Approach:** fixed three-pane app shell, opens into the workbench (no marketing/dashboard home). +- **Panes:** `[ rail 240px ] [ center 1fr ] [ side rail 316px ]`. +- **Rail (240px), top → bottom:** + 1. **Orchestrator anchor** — pinned, single, visually distinct (blue 2px left bar, + `--bg-2` fill, hub/`waypoints` icon, name "Orchestrator", a `5 agents · 2 need you` + mono summary). This is ReverbCode's one addition over emdash. Default landing view. + 2. `PROJECTS` eyebrow label + a `+`. + 3. Project rows (folder icon + name) with nested **worker rows beneath**. Each project + row has a hover-revealed **`+`** that opens the New-worker modal pre-scoped to that + project (distinct from the `PROJECTS` header `+`, which registers a repo). + 4. **Footer:** `Search ⌘K`, `Settings ⌘,`. (No Library.) + 5. **Account** row pinned at the very bottom. +- **Worker rows are name-only.** Just the session name, truncated. Status, branch, diff, + and PR live in the panes and topbar, never in the row. Selection = `--bg-2` fill + a + 2px blue left bar. (emdash itself shows a faint trailing timestamp; we omit it by choice.) +- **Center = the conversation.** Orchestrator → its coordination terminal (delegate here; + composer reads "tell the orchestrator what to build"). Worker → the agent CLI terminal + (tabbed per agent, e.g. `claude-code (1)`), with a composer (model selector, worktree + path, `Accept edits`). The terminal **is** the conversation; no separate chat surface. +- **Side rail (316px):** orchestrator → a quiet **Workers** list (name + project + derived + status). Worker → the **Git review rail**: `Changed N` → All files / Discard all / Stage + all → file rows (`+adds −dels`, stage toggle) → `Commit message` + `Description` → + **Commit & Push** (primary blue) → branch + `Create PR`. +- **Border radius:** `sm` 4px (scrollbar) · `md` 6px (buttons, inputs, toggles) · + `lg` 8px (rows, cards, panels) · `xl` 12px (modals) · `full` (badges/pills/dots). +- **Icons:** **lucide** only. No emoji. + +### Topbar + +- **Left (both):** `project / session` breadcrumb + pin; for the orchestrator, a hub icon + - `Orchestrator`. +- **Right — worker session:** a **PR/CI status pill** that is the action + (`PR #156 · mergeable` green / `CI failed` red / `review requested` amber / + `Open PR` when none) → **Changes / Files / Terminal** view toggles → **⋯ session menu** + (rename, restart, kill, claim PR — the `ao session …` commands). +- **Right — orchestrator:** **+ New worker** → Terminal toggle → **⋯ menu**. No diff toggles. + +### Spawn-worker modal (mirrors emdash's Create Task) + +You mostly let the orchestrator spawn workers from its conversation; the manual paths +(the topbar `+ New worker`, a project row's hover `+`, or `ao spawn`) open a modal that +mirrors emdash exactly. Launching from a project row pre-fills the Project field: + +- Centered dialog, **12px radius**, `max-w` ~512px, `bg` canvas, `ring-1` at 10% fg, + fade + zoom-95 enter. +- **Header:** eyebrow mono-uppercase title `New worker` + `×` close. +- **Body** (`gap` 15–16px): a **borderless large name field** (18px, auto-focus, slug + rule "letters, numbers, hyphens") → **Project** selector → **Agent** selector + (claude-code / codex / opencode / …) → a **"Based on"** bordered card with a segmented + control `Branch · Issue · Pull Request` revealing a combobox → a **Prompt / Workspace** + tab where Prompt is the worker's initial task (textarea). +- **Footer:** right-aligned single primary **`Spawn worker`** (blue) with a `⌘↵` keycap, + disabled until valid. + +## Motion + +- **Approach:** minimal-functional. The one expressive exception: a status dot/spinner + pulse on active/working sessions (opacity breathe) so "alive" is glanceable. Never + animate text or layout. +- **Easing:** enter `ease-out`, exit `ease-in`, move `ease-in-out`. +- **Duration:** micro 80ms · short 160ms · medium 240ms · status pulse 1.8s loop · + modal enter ~150ms fade+zoom-95. + +## Implementation notes + +- The renderer (`frontend/src/renderer/styles.css`) currently uses **Inter** and a + grayscale-blue theme. Migrate to this system: drop the Inter `font-family`, adopt the + system stack, and replace the token values with the emdash neutral ramp + blue accent above. +- Keep tokens as CSS custom properties under `:root` (dark) and `:root[data-theme="light"]`. +- A faithful HTML reference of all of the above (both views + topbar + spawn modal, + light/dark) is saved under + `~/.gstack/projects/aoagents-agent-orchestrator/designs/design-system-20260609/`. ## Decisions Log -| Date | Decision | Rationale | -|------|----------|-----------| -| 2026-05-27 | **Mission Control supersedes Warm Terminal** | A single source of truth. The product is a calm control room for a fleet of agents; cool restraint + rationed color reads better at 20+ agents than warm decoration. | -| 2026-05-27 | Blue = orchestrator/you, orange = working agent | The mascot is the blue conductor; orange is the Claude Code lineage. Two colors, two meanings — the product metaphor, visualized. | -| 2026-05-27 | Schibsted Grotesk (UI) + JetBrains Mono (machine), self-hosted | A distinctive grotesk for the product voice; mono reserved for machine data. Self-hosted via `next/font/local` — no external font CDN. | -| 2026-05-27 | One status system (`getStatusSpec` + `StatusBadge`) | Kanban badge, sidebar dot, and topbar pill all render from one spectrum so status never disagrees with itself. | -| 2026-05-27 | The card is the only bordered surface | Frameless tinted columns with per-column glow; cards are flat `--card` with a hairline ring. No box-in-box nesting. | + +| Date | Decision | Rationale | +| ---------- | ---------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- | +| 2026-06-09 | Match emdash's visual language exactly | User direction; emdash is the demonstrated reference for this app's UI. | +| 2026-06-09 | System font, not a custom typeface (e.g. Geist) | emdash uses the system stack; fidelity + native feel + zero font payload chosen over brand type. | +| 2026-06-09 | Refined **blue** accent, not emdash's jade green | User's explicit pick; blue for primary/active/focus, terminal stays green. | +| 2026-06-09 | Single global **Orchestrator** anchor, orchestrator-first default view | The one real difference from emdash; orchestrator is the human-facing coordinator you delegate to. | +| 2026-06-09 | **Name-only** worker rows | User direction; status/branch/diff live in panes + topbar, not the row. | +| 2026-06-09 | Removed **Library** from the rail footer | User direction; footer is Search + Settings only. | +| 2026-06-09 | Topbar right = PR/CI pill + view toggles + ⋯ menu (worker) | Surfaces the actionable PR/CI state from the daemon; emdash/PostHog Code precedent. | +| 2026-06-09 | Spawn modal mirrors emdash's Create Task | Consistency with the reference; mapped to `ao spawn` params. | diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 56868c1d25..0000000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2025 Composio, Inc. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md index 121c050c0f..27398e1a8a 100644 --- a/README.md +++ b/README.md @@ -1,244 +1,221 @@ -

Agent Orchestrator — The Orchestration Layer for Parallel AI Agents

- -

- - Agent Orchestrator banner - -

- -
- -Spawn parallel AI coding agents, each in its own git worktree. Agents autonomously fix CI failures, address review comments, and open PRs — you supervise from one dashboard. - -[![GitHub stars](https://img.shields.io/github/stars/ComposioHQ/agent-orchestrator?style=flat-square)](https://github.com/ComposioHQ/agent-orchestrator/stargazers) -[![npm version](https://img.shields.io/npm/v/%40aoagents%2Fao?style=flat-square)](https://www.npmjs.com/package/@aoagents/ao) -[![License: MIT](https://img.shields.io/badge/license-MIT-blue?style=flat-square)](LICENSE) -[![PRs merged](https://img.shields.io/badge/PRs_merged-61-brightgreen?style=flat-square)](https://github.com/ComposioHQ/agent-orchestrator/pulls?q=is%3Amerged) -[![Tests](https://img.shields.io/badge/test_cases-3%2C288-blue?style=flat-square)](https://github.com/ComposioHQ/agent-orchestrator/releases/tag/metrics-v1) -[![Discord](https://img.shields.io/badge/Discord-Join%20Community-5865F2?style=flat-square&logo=discord&logoColor=white)](https://discord.gg/UZv7JjxbwG) - -
- ---- - -Agent Orchestrator manages fleets of AI coding agents working in parallel on your codebase. Each agent gets its own git worktree, its own branch, and its own PR. When CI fails, the agent fixes it. When reviewers leave comments, the agent addresses them. You only get pulled in when human judgment is needed. - -**Agent-agnostic** (Claude Code, Codex, Aider) · **Runtime-agnostic** (tmux, ConPTY/process, Docker) · **Tracker-agnostic** (GitHub, Linear) - -
- -## See it in action - - - Agent Orchestrator demo — AI agents building their own orchestrator - -

-Watch the Demo on X -


- - The Self-Improving AI System That Built Itself - -

-Read the Full Article on X - -
- -## Quick Start - -> **Prerequisites:** [Node.js 20.18.3+](https://nodejs.org), [Git 2.25+](https://git-scm.com), [`gh` CLI](https://cli.github.com), and: -> - **macOS / Linux:** [tmux](https://github.com/tmux/tmux/wiki/Installing) — install via `brew install tmux` or `sudo apt install tmux`. -> - **Windows:** PowerShell 7+ recommended. tmux is **not** required — AO uses native ConPTY via the `runtime-process` plugin (the default on Windows). Set `AO_SHELL=bash` if you have Git Bash and prefer it. - -### Install +# ReverbCode + +The orchestration layer for parallel AI coding agents. ReverbCode is a +Go-backed daemon that supervises many coding-agent sessions at once, each in +its own `git worktree`, and routes the feedback they need (CI failures, review +comments, merge conflicts) back to the right agent automatically. It ships with +an `ao` CLI and an Electron supervisor that both drive the same daemon over +loopback. + +The Go module and packages remain `agent-orchestrator`; "ReverbCode" is the +public name. + +See [`docs/architecture.md`](docs/architecture.md) for the backend mental model +and [`AGENTS.md`](AGENTS.md) for the contributor / worker contract. For current +progress (what's shipped vs. in flight) see [`docs/STATUS.md`](docs/STATUS.md). + +## What it does + +- **Agent-agnostic.** A 23-adapter platform under + `backend/internal/adapters/agent/` (`claude-code`, `codex`, `cursor`, + `opencode`, `aider`, `amp`, `goose`, `copilot`, `grok`, `qwen`, `kimi`, + `crush`, `cline`, `droid`, `devin`, `auggie`, `continue`, `kiro`, `kilocode`, + and more), registered through a shared registry with common + activity-dispatch / hook utilities. Worker and orchestrator defaults are set + per project. +- **Isolated workspaces.** Worker and orchestrator sessions spawn into their own + `git worktree` (`backend/internal/adapters/workspace/gitworktree/`), launched + inside a `zellij` runtime adapter (`backend/internal/adapters/runtime/`) so + every session has its own attachable terminal. +- **Live PR observation.** The provider-neutral SCM observer + (`backend/internal/observe/scm/`) polls each session's PR with ETag guards and + semantic diffing, tracking CI/check runs and review threads, and feeds those + facts into the lifecycle manager, which sends the owning agent nudges for CI + failures, review feedback, and merge conflicts. GitHub is the implemented + provider today. +- **Durable facts, derived status.** The SQLite store + (`backend/internal/storage/sqlite/`) persists a small set of session facts + plus PR/check/comment rows; display status is computed at read time, never + stored. DB triggers append every user-visible change to `change_log`, and a + CDC poller/broadcaster (`backend/internal/cdc/`) feeds in-process subscribers + and an SSE replay endpoint. +- **Loopback-only daemon.** The HTTP daemon (`backend/internal/httpd`) controls + projects, sessions, orchestrators, and hook callbacks over `127.0.0.1` with no + auth, CORS, or TLS by design. +- **Lifecycle manager + reaper** (`backend/internal/lifecycle/`, + `backend/internal/observe/reaper/`) reduce runtime/activity/PR observations + into the durable session state and reclaim dead sessions. + +## How it works + +1. Register a local git repo as a project (`ao project add`). +2. Spawn a worker session (`ao spawn`), or an orchestrator that fans work out + across sessions. Each session gets its own `git worktree` and a `zellij` + pane. +3. The agent develops, tests, and opens a PR from inside its worktree. +4. The SCM observer watches that PR and routes feedback back to the agent: a CI + failure, a requested change, or a merge conflict becomes a nudge to the agent + that owns the PR. +5. You inspect, attach a terminal, and merge from the CLI or the Electron app; + human attention is needed only where the loop can't resolve on its own. + +## Extensibility + +The backend is organized around inbound/outbound port contracts +(`backend/internal/ports/`) with swappable adapters under +`backend/internal/adapters/`: + +| Port | Implemented adapters | +| --------- | --------------------------------------------- | +| Agent | 23 harnesses (see above) | +| Runtime | `zellij` | +| Workspace | `git worktree` | +| SCM | GitHub | +| Tracker | GitHub (adapter present; no runtime loop yet) | +| Reviewer | `claude-code` | +| Notifier | port defined; no shipped adapter yet | + +See [`docs/STATUS.md`](docs/STATUS.md) for which lanes are live at runtime. + +## Quick start + +Requirements: Go 1.25+, [`zellij`](https://zellij.dev/) on `PATH` for the +runtime adapter, and `gh` (or `GITHUB_TOKEN`) if you want the SCM observer to +authenticate against GitHub. The SQLite driver is the pure-Go +`modernc.org/sqlite` — no system SQLite library is required. ```bash -npm install -g @aoagents/ao -``` - -> **Nightly builds** (latest `main`, daily Fri–Tue): `npm install -g @aoagents/ao@nightly` -> Back to stable: `npm install -g @aoagents/ao@latest` +cd backend +go build -o /tmp/ao ./cmd/ao -
-Permission denied? Install from source? +# Start the daemon and wait for /readyz. +/tmp/ao start -If `npm install -g` fails with EACCES, prefix with `sudo` or [fix your npm permissions](https://docs.npmjs.com/resolving-eacces-permissions-errors-when-installing-packages-globally). +# Register a local git repo as a project. The id defaults to the lowercased +# base of --path; pass --id explicitly when the directory name doesn't match. +/tmp/ao project add --path /path/to/your/repo --id your-repo --name your-repo \ + --worker-agent codex --orchestrator-agent codex -To install from source (for contributors): +# Spawn a worker session running the project's worker agent. +/tmp/ao spawn --project your-repo --prompt "Refactor the auth module" -```bash -git clone https://github.com/ComposioHQ/agent-orchestrator.git -cd agent-orchestrator && bash scripts/setup.sh +# Inspect what's running. +/tmp/ao status +/tmp/ao session ls ``` -
-### Zsh Completion +### Electron app (dev) -Generate the completion file from the installed CLI: +The desktop supervisor lives under `frontend/` and is started separately: ```bash -mkdir -p ~/.zsh/completions -ao completion zsh > ~/.zsh/completions/_ao -``` - -Then make sure the directory is on your `fpath` before `compinit` runs: - -```zsh -fpath=(~/.zsh/completions $fpath) -autoload -Uz compinit -compinit +cd frontend +npm install +npm run dev # electron-forge start ``` -For Oh My Zsh, install the same generated file into a custom plugin directory and add `ao` to your plugin list: - -```bash -mkdir -p "${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/ao" -ao completion zsh > "${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/ao/_ao" -``` - -If you are contributing from a source checkout, you can also symlink the repo copy at [`completions/_ao`](completions/_ao). - -### Start - -Point it at any repo — it clones, configures, and launches the dashboard in one command: - -```bash -ao start https://github.com/your-org/your-repo -``` - -Or from inside an existing local repo: - -```bash -cd ~/your-project && ao start -``` - -That's it. The dashboard opens at `http://localhost:3000` and the orchestrator agent starts managing your project. - -### Add more projects - -```bash -ao start ~/path/to/another-repo -``` - -## How It Works - -1. **You start** — `ao start` launches the dashboard and an orchestrator agent -2. **Orchestrator spawns workers** — each issue gets its own agent in an isolated git worktree -3. **Agents work autonomously** — they read code, write tests, create PRs -4. **Reactions handle feedback** — CI failures and review comments are automatically routed back to the agent -5. **You review and merge** — you only get pulled in when human judgment is needed - -The orchestrator agent uses the [AO CLI](docs/CLI.md) internally to manage sessions. You don't need to learn or use the CLI — the dashboard and orchestrator handle everything. +Heads-up: `npm run dev` does **not** start the daemon for you. Start it first +(`ao start`, see above) — the renderer attaches to the running daemon over +loopback (`127.0.0.1:3001` by default, the `AO_PORT` from the table below). +Without a daemon the app opens but shows its daemon-not-ready state. + +For renderer-only UI work without the Electron shell, use +`npm run dev:web` (Vite in a regular browser). + +## CLI surface + +The CLI is intentionally thin: every product command resolves to a daemon HTTP +route. Run `ao --help` for the authoritative flag shape; the table +below groups what's on `main` today. + +| Lane | Command | Purpose | +| ------------ | ------------------------------------ | ---------------------------------------------------------------------------------- | +| Daemon | `ao start` | Start the daemon in the background and wait for `/readyz`. | +| Daemon | `ao stop` | Graceful shutdown via loopback `POST /shutdown`. | +| Daemon | `ao status` | Report PID/port/health/readiness from `running.json`. | +| Daemon | `ao daemon` | Hidden internal entrypoint used by `ao start`. | +| Project | `ao project add` | Register a local git repo as a project. | +| Project | `ao project ls` | List registered projects. | +| Project | `ao project get ` | Fetch one project. | +| Project | `ao project set-config ` | Update per-project config. | +| Project | `ao project rm ` | Remove a project. | +| Session | `ao spawn` | Spawn a worker session in a registered project. | +| Session | `ao session ls` | List sessions (filter by project, include terminated). | +| Session | `ao session get ` | Fetch one session. | +| Session | `ao session kill ` | Terminate a session. | +| Session | `ao session rename ` | Rename a session. | +| Session | `ao session restore ` | Relaunch a terminated session. | +| Session | `ao session cleanup` | Reclaim eligible workspaces for terminated sessions. | +| Session | `ao session claim-pr ` | Attach an existing PR to a session. | +| Orchestrator | `ao orchestrator ls` | List orchestrator sessions. | +| Messaging | `ao send` | Send a message to a running agent session. | +| Preview | `ao preview [url]` | Open a URL (or the workspace `index.html`) in the session's desktop browser panel. | +| Utility | `ao doctor` | Local health checks (config, data dir, DB, `git`, `zellij`). | +| Utility | `ao completion ` | Generate bash/zsh/fish/powershell completions. | +| Utility | `ao version` | Print build metadata. | +| Internal | `ao hooks ` | Hidden adapter hook callback. | + +See [`docs/cli/`](docs/cli/) for the daemon-control intent and command shape. ## Configuration -`ao start` auto-generates `agent-orchestrator.yaml` with sensible defaults. You can edit it afterwards to customize behavior: - -```yaml -# agent-orchestrator.yaml -$schema: https://raw.githubusercontent.com/ComposioHQ/agent-orchestrator/main/schema/config.schema.json -# Runtime data is auto-derived under ~/.agent-orchestrator/{hash}-{projectId}/ -port: 3000 - -defaults: - runtime: tmux # default on macOS / Linux; on Windows the default is `process` (ConPTY) - agent: claude-code - workspace: worktree - notifiers: [desktop] - -projects: - my-app: - repo: owner/my-app - path: ~/my-app - defaultBranch: main - sessionPrefix: app - -reactions: - ci-failed: - auto: true - action: send-to-agent - retries: 2 - changes-requested: - auto: true - action: send-to-agent - escalateAfter: 30m - approved-and-green: - auto: false # flip to true for auto-merge - action: notify -``` - -CI fails → agent gets the logs and fixes it. Reviewer requests changes → agent addresses them. PR approved with green CI → you get a notification to merge. - -Keep the `$schema` line so editors can autocomplete and validate against [`schema/config.schema.json`](schema/config.schema.json). +All configuration is env-driven; the daemon takes no config file. The bind +host is hard-coded to `127.0.0.1` — the daemon has no auth, CORS, or TLS, and +exposing it beyond loopback would be a security regression. -See [`agent-orchestrator.yaml.example`](agent-orchestrator.yaml.example) for the full reference, or run `ao config-help` for the complete schema. +| Var | Default | Purpose | +| --------------------- | ------------------------------------------------- | --------------------------------------------------------------------------- | +| `AO_PORT` | `3001` | Bind port; daemon fails fast if taken. | +| `AO_REQUEST_TIMEOUT` | `60s` | Per-request timeout (Go duration). | +| `AO_SHUTDOWN_TIMEOUT` | `10s` | Graceful-shutdown hard cap. | +| `AO_RUN_FILE` | `/agent-orchestrator/running.json` | PID + port handshake path. | +| `AO_DATA_DIR` | `/agent-orchestrator/data` | SQLite DB, WAL files, managed state. | +| `AO_AGENT` | `claude-code` | Compatibility agent adapter id validated at daemon startup. | +| `AO_SESSION_ID` | _(unset)_ | Set inside spawned sessions; read by `ao send` and `ao hooks`. | +| `GITHUB_TOKEN` | _(unset)_ | Used by the GitHub SCM and tracker adapters. Falls back to `gh auth token`. | -## Remote Access +Health check: -AO keeps your Mac awake while running, so you can access the dashboard remotely (e.g., via Tailscale from your phone) without the machine going to sleep. - -**How it works:** On macOS, AO automatically holds an idle-sleep prevention assertion using `caffeinate`. When AO exits, the assertion is released. - -```yaml -# agent-orchestrator.yaml -$schema: https://raw.githubusercontent.com/ComposioHQ/agent-orchestrator/main/schema/config.schema.json -power: - preventIdleSleep: true # Default on macOS; no-op on Linux and Windows +```bash +curl localhost:3001/healthz +curl localhost:3001/readyz ``` -Set to `false` if you want to allow idle sleep while AO runs. - -**Lid-close limitation:** macOS enforces lid-close sleep at the hardware level — no userspace assertion can override it. If you need remote access while traveling with the lid closed, use [clamshell mode](https://support.apple.com/en-us/102505) (external power + display + input device). - -**Linux / Windows:** AO does not currently hold a wake assertion on these platforms. On Linux, idle-sleep behaviour is governed by your desktop environment / `systemd-logind`; configure that directly. On Windows, set the OS power plan if remote access matters while idle. - -## Plugin Architecture - -Seven plugin slots. Lifecycle stays in core. +## Architecture -| Slot | Default | Alternatives | -| --------- | ----------- | ------------------------ | -| Runtime | tmux (macOS/Linux) / process (Windows) | process, docker | -| Agent | claude-code | codex, aider, cursor, opencode, kimicode | -| Workspace | worktree | clone | -| Tracker | github | linear, gitlab | -| SCM | github | gitlab | -| Notifier | desktop | slack, discord, composio, webhook, openclaw | -| Terminal | iterm2 | web | +The daemon is a long-running supervisor. Adapters observe external facts (PR +state, agent activity, runtime liveness); the lifecycle manager reduces those +into a small set of durable session facts (`activity_state`, `is_terminated`, +PR rows). Display status is _derived_ from those facts at read time — it is +never stored. SQLite triggers append every user-visible change to `change_log`, +and the CDC poller broadcasts those events to in-process subscribers and an +SSE stream. -All interfaces defined in [`packages/core/src/types.ts`](packages/core/src/types.ts). A plugin implements one interface and exports a `PluginModule`. That's it. +Full mental model and load-bearing rules: [`docs/architecture.md`](docs/architecture.md). +Package-by-package ownership: [`docs/backend-code-structure.md`](docs/backend-code-structure.md). -## Why Agent Orchestrator? +## Testing -Running one AI agent in a terminal is easy. Running 30 across different issues, branches, and PRs is a coordination problem. - -**Without orchestration**, you manually: create branches, start agents, check if they're stuck, read CI failures, forward review comments, track which PRs are ready to merge, clean up when done. - -**With Agent Orchestrator**, you: `ao start` and walk away. The system handles isolation, feedback routing, and status tracking. You review PRs and make decisions — the rest is automated. - -## Documentation - -| Doc | What it covers | -| ---------------------------------------- | ------------------------------------------------------------ | -| [Setup Guide](SETUP.md) | Detailed installation, configuration, and troubleshooting | -| [CLI Reference](docs/CLI.md) | All `ao` commands (mostly used by the orchestrator agent) | -| [Examples](examples/) | Config templates (GitHub, Linear, multi-project, auto-merge) | -| [Development Guide](docs/DEVELOPMENT.md) | Architecture, conventions, plugin pattern | -| [Contributing](CONTRIBUTING.md) | How to contribute, build plugins, PR process | - -## Development +The local gate is the backend Go build and race-enabled test suite: ```bash -pnpm install && pnpm build # Install and build all packages -pnpm test # Run tests (3,288 test cases) -pnpm dev # Start web dashboard dev server +cd backend && go build ./... && go test -race ./... ``` -See [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md) for code conventions and architecture details. +GitHub Actions is the authoritative pre-merge gate; mirror its commands here +when in doubt. See [`AGENTS.md`](AGENTS.md) for the regen workflow when +touching the daemon API surface (`npm run sqlc`, `npm run api`). -## Contributing +## Status and roadmap -Contributions welcome. The plugin system makes it straightforward to add support for new agents, runtimes, trackers, and notification channels. Every plugin is an implementation of a TypeScript interface — see [CONTRIBUTING.md](CONTRIBUTING.md) and the [Development Guide](docs/DEVELOPMENT.md) for the pattern. +Progress tracking lives in [`docs/STATUS.md`](docs/STATUS.md): what is shipped +on `main` today, what is still in flight, and the linked +[`rewrite`](https://github.com/aoagents/agent-orchestrator/milestone/1) +milestone on GitHub. -## License +## Contributing -MIT +Repo layout and the worker contract live in [`AGENTS.md`](AGENTS.md). Keep +changes surgical, follow the package boundaries documented in +[`docs/backend-code-structure.md`](docs/backend-code-structure.md), and prefer +adding daemon HTTP routes over leaking storage / runtime into the CLI. diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index f04b53f18e..0000000000 --- a/SECURITY.md +++ /dev/null @@ -1,228 +0,0 @@ -# Security Policy - -## Reporting Security Issues - -**Please do not report security vulnerabilities through public GitHub issues.** - -Instead, please report them via email to security@composio.dev. - -You should receive a response within 48 hours. If for some reason you do not, please follow up via email to ensure we received your original message. - -Please include the following information: - -- Type of issue (e.g., secret leak, code injection, authentication bypass) -- Full paths of source file(s) related to the issue -- Location of the affected source code (tag/branch/commit or direct URL) -- Step-by-step instructions to reproduce the issue -- Proof-of-concept or exploit code (if possible) -- Impact of the issue - -## Security Audit History - -### Known Issues - -#### OpenClaw Notifier Token (Resolved) - -**Status**: Removed from codebase -**Severity**: Medium -**Date**: 2026-02-15 -**Commit**: 0393ab70a83e090883895d2168aa39a76f997ec8 - -An OpenClaw notifier token (`1af5c4f...872` - redacted) was accidentally committed in `agent-orchestrator.yaml` and later removed. This token was: - -- Used for local development/testing only -- Never used in production -- Removed in subsequent commits -- Still present in git history - -**Action Required**: If this token is still in use, it should be rotated immediately. - -**Lesson**: All tokens and API keys must use environment variables. The `agent-orchestrator.yaml` file is now in `.gitignore` to prevent future accidental commits. - -## Security Measures - -### Automated Secret Scanning - -This repository uses [Gitleaks](https://github.com/gitleaks/gitleaks) to prevent accidental commits of secrets: - -1. **Pre-commit Hook** — Scans staged files before every commit -2. **CI Pipeline** — Scans full git history on every push/PR -3. **Scheduled Scans** — Weekly scans to catch new vulnerability patterns - -### Dependency Security - -- **Dependency Review** — GitHub Action scans PRs for vulnerable dependencies -- **npm audit** — Runs in CI to detect known vulnerabilities in dependencies -- **Automated Updates** — Dependabot (or similar) for security patches - -## Best Practices for Developers - -### Never Commit Secrets - -❌ **Bad** — Hardcoded secret: - -```yaml -notifiers: - slack: - webhook: https://hooks.slack.com/services/T123/B456/abc123 -``` - -✅ **Good** — Environment variable: - -```yaml -notifiers: - slack: - webhook: ${SLACK_WEBHOOK_URL} -``` - -### Use Environment Variables - -Store all secrets in environment variables: - -```bash -# .env.local (ignored by git) -LINEAR_API_KEY=lin_api_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -SLACK_WEBHOOK_URL=https://hooks.slack.com/services/... -``` - -Then reference in config: - -```yaml -notifiers: - slack: - webhook: ${SLACK_WEBHOOK_URL} -``` - -### Naming Conventions - -Use consistent environment variable names: - -- `*_API_KEY` — API keys (e.g., `LINEAR_API_KEY`) -- `*_TOKEN` — Authentication tokens (e.g., `GITHUB_TOKEN`) -- `*_SECRET` — Secret keys (e.g., `JWT_SECRET`) -- `*_URL` — URLs that may contain credentials (e.g., `DATABASE_URL`) - -### Example Config Files - -When creating example config files: - -1. Use placeholder values: `your-api-key-here`, `your-token-here` -2. Use environment variable references: `${ENV_VAR}` -3. Never copy real credentials, even "temporarily" -4. Document which environment variables are required - -### Files to Never Commit - -The `.gitignore` excludes these patterns: - -- `.env`, `.env.local`, `.env.*.local` -- `*.key`, `*.pem`, `*.p12`, `*.pfx` -- `secrets.yaml`, `credentials.json` -- `agent-orchestrator.yaml` (local config) - -### Checking for Secrets Locally - -Before committing: - -```bash -# Scan current files -gitleaks detect --no-git - -# Scan staged files (automatic in pre-commit hook) -gitleaks protect --staged - -# Scan full git history -gitleaks detect -``` - -### What to Do If You Commit a Secret - -If you accidentally commit a secret: - -1. **Rotate the secret immediately** — Assume it's compromised -2. **Remove from git history** — Use `git filter-repo` or similar (dangerous!) -3. **Update `.gitleaks.toml`** — Add pattern to prevent similar leaks -4. **Report internally** — Document in SECURITY.md - -**Never** just delete the file and commit — the secret remains in git history! - -### Code Review - -When reviewing PRs: - -- ✅ Check for hardcoded tokens, passwords, API keys -- ✅ Verify environment variables are documented but not hardcoded -- ✅ Ensure example configs use placeholders -- ✅ Confirm CI security check passed - -## Best Practices for Users - -### Secure Configuration - -When setting up Agent Orchestrator: - -1. **Copy example config**: `cp agent-orchestrator.yaml.example agent-orchestrator.yaml` -2. **Add real secrets**: Edit `agent-orchestrator.yaml` with your actual tokens -3. **Never commit local config**: It's in `.gitignore` — keep it there! -4. **Use secret management**: Consider 1Password, AWS Secrets Manager, etc. - -### Required Secrets - -Agent Orchestrator may require these secrets: - -| Service | Environment Variable | Where to Get | -| --------- | -------------------- | ---------------------------------------- | -| GitHub | `GITHUB_TOKEN` | https://github.com/settings/tokens | -| Linear | `LINEAR_API_KEY` | https://linear.app/settings/api | -| Slack | `SLACK_WEBHOOK_URL` | https://api.slack.com/messaging/webhooks | -| Anthropic | `ANTHROPIC_API_KEY` | https://console.anthropic.com/ | - -### Setting Environment Variables - -**macOS/Linux**: - -```bash -# In ~/.zshrc or ~/.bashrc -export GITHUB_TOKEN="ghp_xxxxx" -export LINEAR_API_KEY="lin_api_xxxxx" -``` - -**Or use `.env.local`**: - -```bash -# In your project directory -echo 'GITHUB_TOKEN=ghp_xxxxx' >> .env.local -echo 'LINEAR_API_KEY=lin_api_xxxxx' >> .env.local -``` - -### Protecting Your Secrets - -- ✅ Use strong, unique tokens for each service -- ✅ Rotate tokens regularly (every 90 days) -- ✅ Use minimal permissions (read-only when possible) -- ✅ Store in a password manager -- ❌ Never share tokens in chat, email, or screenshots -- ❌ Never commit to git (public or private repos) -- ❌ Never hardcode in shell scripts - -## Supported Versions - -| Version | Supported | -| ------- | ------------------ | -| 0.1.x | :white_check_mark: | - -Security updates are provided for the latest version only. - -## Security Tools - -This project uses: - -- [Gitleaks](https://github.com/gitleaks/gitleaks) — Secret scanning -- [GitHub Dependency Review](https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review) — Dependency vulnerability scanning -- [npm audit](https://docs.npmjs.com/cli/v8/commands/npm-audit) — Dependency vulnerability detection -- [Husky](https://typicode.github.io/husky/) — Git hooks for pre-commit validation - -## License - -This security policy is part of the Agent Orchestrator project and is licensed under the MIT License. diff --git a/SETUP.md b/SETUP.md deleted file mode 100644 index 08402f8325..0000000000 --- a/SETUP.md +++ /dev/null @@ -1,859 +0,0 @@ -# Agent Orchestrator Setup Guide - -Comprehensive guide to installing, configuring, and troubleshooting Agent Orchestrator. - -## Prerequisites - -### Required - -- **Node.js 20+** - Runtime for the orchestrator and CLI - - ```bash - node --version # Should be v20.0.0 or higher - ``` - -- **Git 2.25+** - For repository management and worktrees - - ```bash - git --version - ``` - -- **Terminal runtime** — varies by OS: - - **On macOS / Linux:** `tmux` is required (it's the default runtime). - - ```bash - tmux -V - - # Install on macOS - brew install tmux - - # Install on Ubuntu/Debian - sudo apt install tmux - - # Install on Fedora/RHEL - sudo dnf install tmux - ``` - - **On Windows:** tmux is **not** required. AO uses native ConPTY via the `runtime-process` plugin (the default on Windows). PowerShell 7+ is recommended; if you have Git Bash and prefer bash semantics for shell-out commands, set `AO_SHELL=bash` in your environment. WSL is not required. - -- **GitHub CLI** (for GitHub integration) - Required for PR creation, issue management - - ```bash - gh --version - - # Install on macOS - brew install gh - - # Install on Linux - # See: https://github.com/cli/cli/blob/trunk/docs/install_linux.md - ``` - -### Optional - -- **Linear API Key** - If using Linear for issue tracking - - Get it from: https://linear.app/settings/api - - Set environment variable: `export LINEAR_API_KEY="lin_api_..."` - -- **Slack Webhook** - If using Slack notifications - - Create incoming webhook: https://api.slack.com/messaging/webhooks - - Set environment variable: `export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/..."` - -- **Public dashboard URL** - If running AO behind a reverse proxy (e.g. inside a remote dev container, on a VPS fronted by Caddy/nginx/Traefik) - - Set `AO_PUBLIC_URL` to the externally-reachable URL of the dashboard - - All console output, `ao open` browser launches, and orchestrator-prompt session links use this URL instead of `http://localhost:` - - Example: `export AO_PUBLIC_URL="https://ao.example.com"` - - When the dashboard is served on a standard port (HTTPS 443 / HTTP 80) the dashboard JS connects the mux WebSocket to `/ao-terminal-mux` on the same hostname. Your proxy needs to forward that path to the direct terminal server (`DIRECT_TERMINAL_PORT`, default 14801) — its upgrade handler accepts both `/mux` and `/ao-terminal-mux`. For custom paths set `TERMINAL_WS_PATH=/your/path`. - - **`AO_PATH_BASED_MUX=1`** (opt-in) — if your proxy can only forward one hostname:port pair (e.g. Cloudflare Tunnel pointed at a single `service:` URL with no path-based ingress), set this and `ao start` will run a small bundled HTTP/WS proxy on `PORT` that demultiplexes: HTTP forwards to Next.js (shifted to `PORT + 1000`, override with `NEXT_INTERNAL_PORT`), and `wss://hostname/ao-terminal-mux` is tunneled to `DIRECT_TERMINAL_PORT/mux`. Tradeoff: an extra Node process and one extra hop per HTTP request, in exchange for a one-line proxy config on the operator side. - -## Installation - -### Install via npm (recommended) - -```bash -npm install -g @aoagents/ao - -# Verify -ao --version -``` - -This installs the `ao` CLI globally along with all default plugins and the web dashboard. - -**Permission denied (EACCES)?** This is common on macOS. Three options: - -```bash -# Option 1: Use sudo -sudo npm install -g @aoagents/ao - -# Option 2: Use npx (no global install needed) -npx @aoagents/ao start - -# Option 3: Fix npm permissions permanently (recommended) -mkdir -p ~/.npm-global -npm config set prefix '~/.npm-global' -echo 'export PATH=~/.npm-global/bin:$PATH' >> ~/.zshrc -source ~/.zshrc -npm install -g @aoagents/ao -``` - -### Build from Source (for contributors) - -If you want to develop or contribute to Agent Orchestrator: - -```bash -# Clone the repository -git clone https://github.com/ComposioHQ/agent-orchestrator -cd agent-orchestrator - -# Run the setup script (installs deps, builds, links CLI) -bash scripts/setup.sh - -# Verify -ao --version -``` - -The setup script handles pnpm installation, dependency resolution, building all packages, and linking the `ao` command globally (with automatic permission handling on macOS). - -## First-Time Setup - -### `ao start` — the only command you need - -`ao start` handles everything: auto-detecting your project, generating config, and launching the dashboard + orchestrator. There are three ways to use it: - -**From a URL (fastest for any repo):** - -```bash -ao start https://github.com/your-org/your-repo -``` - -This clones the repo, auto-detects language/framework/branch, generates `agent-orchestrator.yaml`, and starts everything. Supports GitHub, GitLab, and Bitbucket (HTTPS and SSH): - -```bash -ao start https://github.com/owner/repo -ao start https://gitlab.com/org/project -ao start git@github.com:owner/repo.git -``` - -**From a local repo (zero prompts):** - -```bash -cd ~/your-project -ao start -``` - -Auto-detects git remote, default branch, language, and available agent runtimes. Generates config and starts. - -**Adding more projects:** - -```bash -ao start ~/path/to/another-repo -``` - -If a config already exists, the new project is appended. If not, one is created first. - -### What `ao start` detects automatically - -- **Git remote** — parses `owner/repo` from origin -- **Default branch** — checks symbolic-ref, GitHub API, then common names (main/master) -- **Project type** — language, framework, test runner, package manager -- **Agent runtime** — which AI agents are installed (Claude Code, Codex, Aider, OpenCode) -- **Free port** — if configured port is busy, auto-finds the next available -- **tmux** — warns if not installed (skipped on Windows; AO uses ConPTY there and tmux is not required) -- **GitHub CLI** — checks `gh auth status` - -### Manual Configuration - -If you prefer to write the config by hand: - -```bash -cp agent-orchestrator.yaml.example agent-orchestrator.yaml -nano agent-orchestrator.yaml -``` - -Or start from an example: - -```bash -cp examples/simple-github.yaml agent-orchestrator.yaml -nano agent-orchestrator.yaml -``` - -## Configuration Reference - -### Minimal Configuration - -The absolute minimum needed (everything else has sensible defaults): - -```yaml -projects: - my-app: - repo: owner/my-app - path: ~/my-app - defaultBranch: main -``` - -`ao start` generates this automatically — you only need to write it manually if you want full control. - -### Full Configuration Schema - -See [agent-orchestrator.yaml.example](./agent-orchestrator.yaml.example) for a fully commented example with all options. - -### Plugin Slots - -Agent Orchestrator has 8 plugin slots. All are swappable: - -| Slot | Purpose | Default | Alternatives | -| ------------- | -------------------- | ------------- | ----------------------------------------------- | -| **Runtime** | How sessions run | `tmux` (macOS/Linux) / `process` (Windows; ConPTY via node-pty) | `process`, `docker`, `kubernetes`, `ssh`, `e2b` | -| **Agent** | AI coding assistant | `claude-code` | `codex`, `aider`, `goose`, custom | -| **Workspace** | Workspace isolation | `worktree` | `clone`, `copy` | -| **Tracker** | Issue tracking | `github` | `linear`, `jira`, custom | -| **SCM** | Source control | `github` | GitLab, Bitbucket (future) | -| **Notifier** | Notifications | `desktop` | `slack`, `discord`, `webhook`, `email` | -| **Terminal** | Terminal integration | `iterm2` | `web`, custom | -| **Lifecycle** | Session lifecycle | (core) | Non-pluggable | - -### Reactions - -Reactions are auto-responses to events. Configure how the orchestrator handles common scenarios: - -#### CI Failed - -```yaml -reactions: - ci-failed: - auto: true # Enable auto-handling - action: send-to-agent # Send failure logs to agent - retries: 2 # Retry up to 2 times - escalateAfter: 2 # Notify human after 2 failures -``` - -#### Changes Requested (Review Comments) - -```yaml -reactions: - changes-requested: - auto: true - action: send-to-agent - escalateAfter: 30m # Notify human if not resolved in 30 minutes -``` - -#### Approved and Green (Auto-merge) - -```yaml -reactions: - approved-and-green: - auto: true # Enable auto-merge - action: auto-merge # Merge when approved + CI passes - priority: action # Notification priority -``` - -**Warning:** Only enable auto-merge if you trust your CI pipeline and agents! - -#### Agent Stuck - -```yaml -reactions: - agent-stuck: - threshold: 10m # Consider stuck after 10 minutes of inactivity - action: notify - priority: urgent -``` - -### Notification Routing - -Route notifications by priority: - -```yaml -notificationRouting: - urgent: [desktop, slack] # Agent stuck, needs input, errored - action: [desktop, slack] # PR ready to merge - warning: [slack] # Auto-fix failed - info: [slack] # Summary, all done -``` - -### Agent Rules - -Inline rules included in every agent prompt: - -```yaml -projects: - my-app: - agentRules: | - Always run tests before pushing. - Use conventional commits (feat:, fix:, chore:). - Link issue numbers in commit messages. -``` - -Or reference an external file: - -```yaml -projects: - my-app: - agentRulesFile: .agent-rules.md -``` - -### Per-Project Overrides - -Override defaults per project: - -```yaml -projects: - frontend: - runtime: tmux # default on macOS/Linux; on Windows use `process` - agent: claude-code - workspace: worktree - - backend: - runtime: docker # Use Docker for backend - agent: codex # Use Codex instead of Claude -``` - -## Integration Guides - -### GitHub Issues - -**Authentication:** - -```bash -gh auth login -``` - -**Required scopes:** - -- `repo` - Full repository access -- `read:org` - Read organization membership (for team mentions) - -**Verification:** - -```bash -gh auth status -``` - -### Linear - -**Setup:** - -1. Get your API key: https://linear.app/settings/api -2. Add to environment: - - ```bash - echo 'export LINEAR_API_KEY="lin_api_..."' >> ~/.zshrc - source ~/.zshrc - ``` - -3. Find your team ID: - - Go to https://linear.app/settings/api - - Click "Create new key" or use existing key - - Team ID is visible in your Linear workspace URL or via API - -4. Configure in `agent-orchestrator.yaml`: - ```yaml - projects: - my-app: - tracker: - plugin: linear - teamId: "your-team-id" - ``` - -**Branch names:** On `ao spawn ` with the Linear tracker, AO **prefers** Linear’s branch name (same as **Copy git branch name**, API field `branchName`). If that value is missing, it **falls back** to the previous convention: `feat/` (e.g. `feat/INT-123`). To change how Linear generates `branchName`, use **Linear → Settings → Integrations → GitHub → Branch format**. - -**Verification:** - -```bash -echo $LINEAR_API_KEY # Should print your key -``` - -### Slack - -**Setup:** - -1. Create incoming webhook: https://api.slack.com/messaging/webhooks -2. Add to environment: - - ```bash - echo 'export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/..."' >> ~/.zshrc - source ~/.zshrc - ``` - -3. Configure in `agent-orchestrator.yaml`: - ```yaml - notifiers: - slack: - plugin: slack - webhook: ${SLACK_WEBHOOK_URL} - channel: "#agent-updates" - ``` - -**Verification:** - -```bash -# Send test message -curl -X POST -H 'Content-type: application/json' \ - --data '{"text":"Agent Orchestrator test"}' \ - $SLACK_WEBHOOK_URL -``` - -### Custom Trackers - -To add a custom tracker (Jira, Asana, etc.), create a plugin: - -1. See plugin examples in `packages/plugins/tracker-*/` -2. Implement the `Tracker` interface from `@aoagents/ao-core` -3. Register your plugin in the config - -See [Development Guide](./docs/DEVELOPMENT.md) for plugin development guidelines. - -## Troubleshooting - -### Run `ao doctor` - -Use the built-in doctor before debugging a broken install by hand: - -```bash -ao doctor -ao doctor --fix -``` - -`ao doctor` reports deterministic PASS/WARN/FAIL checks for PATH and launcher resolution, required binaries, terminal-runtime health (tmux on Unix; PowerShell / `runtime-process` on Windows), GitHub CLI health, stale AO temp files, config support directories, and core build/runtime sanity. It runs and is supported on Windows. `--fix` only applies safe fixes such as creating missing AO support directories, refreshing the local launcher link, and removing stale AO temp files. - -### Run `ao update` - -When you installed AO from this repository and want to refresh that local install: - -```bash -git switch main -ao update -``` - -`ao update` is intentionally conservative: it requires a clean working tree on `main`, fast-forwards from `origin/main`, reinstalls dependencies, clean-rebuilds the critical core/CLI/web packages, refreshes the launcher with `npm link`, and runs CLI smoke tests. Works on macOS, Linux, and Windows (Windows uses the bundled `ao-update.ps1` script automatically). Use `ao update --skip-smoke` to stop after rebuild, or `ao update --smoke-only` to rerun just the smoke checks. - -### "No agent-orchestrator.yaml found" - -**Problem:** The orchestrator can't find your config file. - -**Solution:** - -```bash -# ao start auto-creates the config if none exists -ao start - -# Or copy an example and edit manually -cp examples/simple-github.yaml agent-orchestrator.yaml -``` - -### "tmux not found" - -**Problem:** tmux is not installed (required for the tmux runtime — the default on macOS and Linux). - -**Solution:** - -```bash -# macOS -brew install tmux - -# Ubuntu/Debian -sudo apt install tmux - -# Fedora/RHEL -sudo dnf install tmux -``` - -**On Windows:** this error should not appear in normal use. If it does, your config has `runtime: tmux` set explicitly. Switch to `runtime: process` (or remove the override — `process` is the Windows default), and AO will use ConPTY natively without tmux. - -### "gh auth failed" - -**Problem:** GitHub CLI is not authenticated. - -**Solution:** - -```bash -gh auth login - -# Select: -# - GitHub.com (not Enterprise) -# - HTTPS (recommended) -# - Authenticate with browser -# - Include repo scope -``` - -**Verify:** - -```bash -gh auth status -``` - -### "LINEAR_API_KEY not found" - -**Problem:** Linear API key is not set in environment. - -**Solution:** - -```bash -# Get your key from: https://linear.app/settings/api - -# Add to shell profile -echo 'export LINEAR_API_KEY="lin_api_..."' >> ~/.zshrc -source ~/.zshrc - -# Verify -echo $LINEAR_API_KEY -``` - -### "Port already in use" - -**Problem:** Another service is using the dashboard port (default 3000). - -**Note:** `ao start` automatically finds the next free port if the configured port is busy. You'll see a message like "Port 3000 is busy — using 3001 instead." If you still need to fix it manually: - -```bash -# Option 1: Change port in agent-orchestrator.yaml -port: 3001 - -# Option 2: Find and kill the process using the port -lsof -ti:3000 | xargs kill -``` - -### "Workspace creation failed" - -**Problem:** Orchestrator can't create worktrees or clones. - -**Solution:** - -```bash -# AO stores runtime data under ~/.agent-orchestrator/ -ls -la ~/.agent-orchestrator - -# Create the base directory if missing -mkdir -p ~/.agent-orchestrator - -# Check disk space -df -h -``` - -### "Session not found" - -**Problem:** Session ID doesn't exist or was already destroyed. - -**Solution:** - -```bash -# List active sessions -ao session ls - -# Check status dashboard -ao status -``` - -### "Agent not responding" - -**Problem:** Agent session is stuck or frozen. - -**Solution:** - -```bash -# Check session status -ao status - -# Attach to session to investigate -ao open - -# Send message to agent -ao send "Please report your current status" - -# Kill and respawn if necessary -ao session kill -ao spawn -``` - -### "Permission denied" when spawning - -**Problem:** Agent doesn't have permissions for git operations. - -**Solution:** - -```bash -# Check SSH keys are added -ssh -T git@github.com - -# Add SSH key if needed -ssh-add ~/.ssh/id_ed25519 - -# Or use HTTPS and authenticate gh CLI -gh auth login -``` - -### "YAML parse error" - -**Problem:** Syntax error in `agent-orchestrator.yaml`. - -**Solution:** - -```bash -# Validate YAML syntax online: https://www.yamllint.com/ - -# Common issues: -# - Incorrect indentation (use 2 spaces, not tabs) -# - Missing quotes around strings with special characters -# - Typo in field names -``` - -### "Node version too old" - -**Problem:** Node.js version is below 20. - -**Solution:** - -```bash -# Check version -node --version - -# Upgrade with nvm (recommended) -nvm install 20 -nvm use 20 -nvm alias default 20 - -# Or download from: https://nodejs.org/ -``` - -## Advanced Configuration - -### Multi-Project Setup - -Manage multiple repositories: - -```yaml -projects: - frontend: - repo: org/frontend - path: ~/frontend - sessionPrefix: fe - - backend: - repo: org/backend - path: ~/backend - sessionPrefix: api - - docs: - repo: org/docs - path: ~/docs - sessionPrefix: doc -``` - -See [examples/multi-project.yaml](./examples/multi-project.yaml) for full example. - -### Custom Plugin Development - -Create custom plugins for: - -- Different runtimes (Docker, Kubernetes, SSH, cloud VMs) -- Different agents (custom AI assistants) -- Different trackers (Jira, Asana, custom systems) -- Different notifiers (email, webhooks, custom integrations) - -See [Development Guide](./docs/DEVELOPMENT.md) for plugin development guidelines. - -### Docker Runtime - -Run agents in Docker containers: - -```yaml -defaults: - runtime: docker - -# Plugin will use official images or build from Dockerfile -``` - -### Kubernetes Runtime - -Run agents in Kubernetes pods: - -```yaml -defaults: - runtime: kubernetes - -# Requires kubectl configured with cluster access -``` - -### Custom Notifiers - -Send notifications to custom webhooks: - -```yaml -notifiers: - webhook: - plugin: webhook - url: https://your-service.com/webhook - method: POST - headers: - Authorization: "Bearer ${WEBHOOK_TOKEN}" -``` - -## FAQ - -### What's a session? - -A session is an isolated workspace where an agent works on a single issue. Each session has: - -- Its own git worktree or clone -- Its own runtime session — a tmux session on macOS/Linux, a ConPTY pty-host process on Windows (or a Docker container, etc.) -- Its own metadata (branch, PR, status) -- Its own event log - -Sessions are ephemeral — they're created for an issue and destroyed when merged. - -### What's a worktree vs clone? - -**Worktree** (default): - -- Shares `.git` directory with main repo -- Fast to create (no cloning) -- Efficient disk usage -- Best for local development - -**Clone**: - -- Full independent repository clone -- Slower to create -- More disk space -- Better for isolation, remote work - -### How do reactions work? - -Reactions are event handlers that run automatically: - -1. Event occurs (CI fails, review comment added, PR approved) -2. Orchestrator checks reaction config -3. If `auto: true`, performs the action automatically -4. If escalation threshold reached, notifies human - -Actions can be: - -- `send-to-agent` - Forward event to agent to handle -- `auto-merge` - Merge PR automatically -- `notify` - Send notification to human - -### When should I enable auto-merge? - -Enable auto-merge if: - -- ✅ You have comprehensive CI/CD tests -- ✅ You require code review approval -- ✅ You trust your agents to write correct code -- ✅ You want maximum automation - -Don't enable auto-merge if: - -- ❌ You have incomplete test coverage -- ❌ You want manual review of every change -- ❌ You're still evaluating agent quality -- ❌ You work on critical systems (finance, healthcare, etc.) - -Start with `auto: false` and enable after building confidence. - -### How do I add custom agent rules? - -**Inline:** - -```yaml -projects: - my-app: - agentRules: | - Always run tests before pushing. - Use conventional commits. -``` - -**External file:** - -```yaml -projects: - my-app: - agentRulesFile: .agent-rules.md -``` - -Rules are included in every agent prompt for that project. - -### Can I use multiple trackers? - -Yes! Different projects can use different trackers: - -```yaml -projects: - frontend: - tracker: - plugin: github - - backend: - tracker: - plugin: linear - teamId: "..." -``` - -### How do I monitor agent progress? - -Three ways: - -1. **Dashboard** - `ao start` then visit http://localhost:3000 (or your configured `port:`) -2. **CLI status** - `ao status` (text-based dashboard) -3. **Attach to session** - `ao open ` (live terminal) - -### What if an agent gets stuck? - -```bash -# Check status -ao status - -# Send message -ao send "What's your current status?" - -# Attach to investigate -ao open - -# Kill and respawn if necessary -ao session kill -ao spawn -``` - -Agents also send "stuck" notifications automatically after inactivity threshold. - -### How do I clean up old sessions? - -```bash -# List all sessions -ao session ls - -# Kill specific session -ao session kill - -# Cleanup script (example) -ao session ls --json --include-terminated | jq -r '.data[] | select(.status == "merged") | .id' | xargs -I{} ao session kill {} -``` - -> **Note:** `ao session ls --json` and `ao status --json` emit `{ data: [...], meta: { hiddenTerminatedCount } }`. By default terminated sessions (`killed`, `terminated`, `done`, `merged`, `errored`, `cleanup`) are hidden — pass `--include-terminated` to include them in `data`. - -### Can I run multiple orchestrators? - -Yes! Each orchestrator instance should have: - -- Different dashboard port (`port`) — e.g., 3000 for project A, 3001 for project B -- Different config location or project paths - -AO derives runtime directories from the config location, so separate config locations already produce separate hash-scoped runtime paths under `~/.agent-orchestrator/`. Terminal WebSocket ports are auto-detected by default, so you typically only need to set `port:` differently. If you need explicit control, you can also set `terminalPort:` and `directTerminalPort:` per config. - -Useful for: - -- Separating projects -- Different teams -- Testing new configs - -## Next Steps - -1. **Start the orchestrator** — `ao start` (auto-creates config on first run) -2. **Spawn an agent** — `ao spawn 123` (project auto-detected from cwd) -3. **Monitor progress** — `ao status` or dashboard at http://localhost:3000 -4. **Read [Development Guide](./docs/DEVELOPMENT.md)** — Code conventions and architecture -5. **Explore examples** — See [examples/](./examples/) for more configs -6. **Join the community** — Report issues, share configs, contribute plugins - ---- - -**Need help?** Open an issue at: https://github.com/ComposioHQ/agent-orchestrator/issues diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md deleted file mode 100644 index c8ef0b7e30..0000000000 --- a/TROUBLESHOOTING.md +++ /dev/null @@ -1,61 +0,0 @@ -# Troubleshooting - -## DirectTerminal: posix_spawnp failed error - -**Symptom**: Terminal in browser shows "Connected" but blank. WebSocket logs show: - -``` -[DirectTerminal] Failed to spawn PTY: Error: posix_spawnp failed. -``` - -**Root Cause**: node-pty prebuilt binaries are incompatible with your system. - -**Fix**: Rebuild node-pty from source: - -```bash -# From the repository root -cd node_modules/.pnpm/node-pty@1.1.0/node_modules/node-pty -npx node-gyp rebuild -``` - -**Verification**: - -```bash -# Test node-pty works -node -e "const pty = require('./node_modules/.pnpm/node-pty@1.1.0/node_modules/node-pty'); \ - const shell = pty.spawn('/bin/zsh', [], {name: 'xterm-256color', cols: 80, rows: 24, \ - cwd: process.env.HOME, env: process.env}); \ - shell.onData((d) => console.log('✅ OK')); \ - setTimeout(() => process.exit(0), 1000);" -``` - -**When this happens**: - -- After `pnpm install` (uses cached prebuilts) -- After copying the repo to a new location -- On some macOS configurations with Homebrew Node - -**Permanent fix**: The postinstall hook automatically rebuilds node-pty: - -```bash -pnpm install # Automatically rebuilds node-pty via postinstall hook -``` - -If you need to manually rebuild: - -```bash -cd node_modules/.pnpm/node-pty@1.1.0/node_modules/node-pty -npx node-gyp rebuild -``` - -## Other Issues - -### Config file not found - -**Symptom**: API returns 500 with "No agent-orchestrator.yaml found" - -**Fix**: Ensure config exists in the directory where you run `ao start`, or symlink it: - -```bash -ln -s /path/to/agent-orchestrator.yaml packages/web/agent-orchestrator.yaml -``` diff --git a/agent-orchestrator.yaml.example b/agent-orchestrator.yaml.example deleted file mode 100644 index eecf3dd490..0000000000 --- a/agent-orchestrator.yaml.example +++ /dev/null @@ -1,174 +0,0 @@ -# Agent Orchestrator Configuration -# Copy to agent-orchestrator.yaml and customize. -$schema: https://raw.githubusercontent.com/ComposioHQ/agent-orchestrator/main/schema/config.schema.json - -# Runtime data directories are auto-derived from this config location under: -# ~/.agent-orchestrator/{hash}-{projectId}/ -# You usually do not need to configure paths manually. - -# Web dashboard port -port: 3000 - -# Terminal server ports (defaults: 14800/14801 — chosen to avoid conflicts with dev tools) -# Override when running multiple dashboards to avoid EADDRINUSE -# terminalPort: 14800 -# directTerminalPort: 14801 - -# Power management — controls system sleep while AO is running -# power: -# preventIdleSleep: true # Default on macOS, no-op on Linux -# # Keeps Mac awake for remote dashboard access (e.g., via Tailscale) -# # Uses caffeinate -i -w — auto-releases when AO exits -# # Note: lid-close sleep is enforced by hardware and cannot be prevented - -# Lifecycle — controls how AO cleans up sessions after their PRs merge -# lifecycle: -# autoCleanupOnMerge: true # Default. When a PR is detected as merged, tear down -# # the tmux session, remove the worktree, and archive -# # metadata so `ao status` stays clean. Set false if -# # you want merged worktrees preserved for inspection. -# mergeCleanupIdleGraceMs: 300000 # Grace window (ms) before forcing cleanup on an agent -# # that is still active at merge time. Default 5 min. - -# Default plugins (these are the defaults — you can omit this section) -# runtime defaults to 'tmux' on Linux/macOS, 'process' on Windows -defaults: - # runtime: tmux # tmux (Linux/macOS default) | process (Windows default) - agent: claude-code # claude-code | codex | aider | opencode | cursor | kimicode - # orchestrator: - # agent: claude-code - # worker: - # agent: codex - workspace: worktree # worktree | clone - notifiers: [desktop] # desktop | slack | discord | webhook | composio | openclaw - -# Installer-managed external plugins (optional) -# plugins: -# - name: owasp-auditor -# source: registry # registry | npm | local -# package: "@ao-plugins/owasp-auditor" -# version: "^0.1.0" -# enabled: true -# -# - name: local-dev-plugin -# source: local -# path: ./plugins/local-dev-plugin -# enabled: true - -# Projects — at minimum, specify repo and path -projects: - my-app: - name: My App - repo: org/my-app - path: ~/my-app - defaultBranch: main - sessionPrefix: app - - # Issue tracker (defaults to github issues) - # tracker: - # plugin: linear - # teamId: "your-team-id" - - # SCM webhook acceleration (optional) - # scm: - # plugin: github - # webhook: - # path: /api/webhooks/github - # secretEnvVar: GITHUB_WEBHOOK_SECRET - # signatureHeader: x-hub-signature-256 - # eventHeader: x-github-event - # deliveryHeader: x-github-delivery - # maxBodyBytes: 1048576 - - # Per-project environment variables forwarded into worker session runtimes. - # Useful for scoping per-project tokens (e.g. pinning gh auth via GH_TOKEN). - # AO-internal vars (AO_SESSION, AO_PROJECT_ID, etc.) always take precedence. - # env: - # GH_TOKEN: ghp_xxx - - # Files to symlink into workspaces - # symlinks: [.env, .claude] - - # Commands to run after workspace creation - # postCreate: - # - "pnpm install" - - # Agent-specific config - # agentConfig: - # permissions: skip # --dangerously-skip-permissions - # model: opus - - # Optional role-specific agent overrides - # orchestrator: - # agent: claude-code - # agentConfig: - # model: claude-sonnet-4-5 - # worker: - # agent: codex - # agentConfig: - # model: gpt-5-codex - - # Inline rules included in every agent prompt for this project - # agentRules: | - # Always run tests before pushing. - # Use conventional commits (feat:, fix:, chore:). - - # Path to a rules file (relative to project path) - # agentRulesFile: .agent-rules.md - - # Rules for the orchestrator agent (reserved for future use) - # orchestratorRules: | - # Prefer to batch-spawn related issues together. - - # OpenCode issue session strategy (only for agent: opencode) - # opencodeIssueSessionStrategy: reuse # reuse | delete | ignore - - # Per-project reaction overrides - # reactions: - # approved-and-green: - # auto: true # enable auto-merge for this project - -# Notification channels -# notifiers: -# slack: -# plugin: slack -# webhook: ${SLACK_WEBHOOK_URL} -# channel: "#agent-updates" -# -# openclaw: -# plugin: openclaw -# url: http://127.0.0.1:18789/hooks/agent # Use https:// for remote (non-localhost) deployments -# token: ${OPENCLAW_HOOKS_TOKEN} -# retries: 3 -# retryDelayMs: 1000 -# wakeMode: now - -# Notification routing by priority -# notificationRouting: -# urgent: [desktop, slack] # agent stuck, needs input, errored -# action: [desktop, slack] # PR ready to merge -# warning: [slack] # auto-fix failed -# info: [slack] # summary, all done - -# Reactions — auto-responses to events (these are the defaults) -# reactions: -# ci-failed: -# auto: true -# action: send-to-agent -# retries: 2 -# escalateAfter: 2 -# -# changes-requested: -# auto: true -# action: send-to-agent -# escalateAfter: 30m -# -# approved-and-green: -# auto: false # set to true for auto-merge -# action: notify -# priority: action -# -# agent-stuck: -# threshold: 10m -# action: notify -# priority: urgent diff --git a/artifacts/architecture-design.md b/artifacts/architecture-design.md deleted file mode 100644 index 0f279e403e..0000000000 --- a/artifacts/architecture-design.md +++ /dev/null @@ -1,784 +0,0 @@ -# Architecture Design — Agent Orchestrator - -_Compiled: 2026-02-13_ - -## Core Philosophy - -**Push, not pull.** The human never polls. The human never checks a dashboard wondering "what's happening?" The system pushes notifications to the human exactly when their attention is needed — and stays silent otherwise. - -The dashboard is a **drill-down tool** you open after receiving a notification, not something you sit and watch. The **Notifier is the primary interface.** - -### Interaction Model - -``` -Human spawns 20 agents → walks away → lives their life - │ - ┌───────────────────────────────┘ - │ - ▼ - Orchestrator runs autonomously: - ├── Agents work on issues - ├── CI fails? → auto-send fix to agent → resolved silently - ├── Review comments? → auto-send to agent → resolved silently - ├── Agent stuck? → NOTIFY HUMAN - ├── Agent needs input? → NOTIFY HUMAN - ├── PR ready to merge? → NOTIFY HUMAN (or auto-merge if configured) - ├── Agent errored? → NOTIFY HUMAN - └── All done? → NOTIFY HUMAN with summary - -Human only intervenes when notified. Everything else is handled. -``` - -### Design Principles - -1. **Push, not pull**: Notifications are the primary interface. Dashboard is secondary drill-down. -2. **Server-centric**: One central daemon (`ao start`) manages every registered project, and all agents report to it. Each project gets its own orchestrator agent — one orchestrator per project, never a single orchestrator spanning all projects. -3. **Plugin everything**: 8 pluggable abstraction slots. Swap any component. -4. **Works out of the box**: Default config (tmux + claude-code + worktree + github) requires zero setup beyond `npx agent-orchestrator init`. -5. **Silence by default, loud when needed**: Auto-handle routine issues (CI failures, review comments). Only notify the human when their judgment or action is truly required. -6. **Runtime agnostic**: tmux is just one way to run agents. Docker, K8s, cloud, SSH, child processes — all through the same interface. - ---- - -## Nomenclature - -| Term | Definition | Examples | -| ---------------- | ------------------------------------------ | -------------------------------- | -| **Orchestrator (daemon)** | The central server process that manages **all** registered projects | `ao start` + the Next.js app | -| **Orchestrator agent** | A per-project agent session that spawns and supervises workers — **one per project** | `my-app-orchestrator`, `backend-api-orchestrator` | -| **Project** | A configured repository to work on | `my-app`, `backend-api` | -| **Session** | A running agent instance working on a task | `my-app-1`, `my-app-2` | -| **Runtime** | Where/how the session executes | tmux, docker, k8s, process | -| **Agent** | The AI coding tool being used | claude-code, codex, aider | -| **Workspace** | Isolated code copy for a session | git worktree, clone, volume | -| **Tracker** | Issue/task tracking system | github, linear, jira | -| **SCM** | Source code management platform | github, gitlab, bitbucket | -| **Notifier** | Communication/alert channel | slack, discord, desktop, webhook | -| **Terminal** | Human interaction interface | iterm2, web terminal, none | - ---- - -## System Architecture - -``` - ┌──────────────────────────────────────┐ - CLI ───REST───► │ Orchestrator Server │ - │ (Next.js) │ - Web ───REST/───► │ │ - SSE │ ┌────────────┐ ┌────────────────┐ │ - │ │ Session │ │ Plugin │ │ - Agents ────────► │ │ Manager │ │ Registry │ │ - (heartbeat/ │ └──────┬─────┘ └───────┬────────┘ │ - webhook) │ │ │ │ - │ ┌──────┴─────┐ ┌───────┴────────┐ │ - │ │ Lifecycle │ │ Config │ │ - │ │ Manager │ │ Manager │ │ - │ └──────┬─────┘ └────────────────┘ │ - │ │ │ - │ ┌──────┴──────────────────────────┐ │ - │ │ Event Bus │ │ - │ │ (pub/sub + persistence) │ │ - │ └──┬──────┬──────┬──────┬────────┘ │ - └─────┼──────┼──────┼──────┼──────────┘ - │ │ │ │ - ┌───────┘ │ │ └───────┐ - ▼ ▼ ▼ ▼ - ┌─────────┐ ┌────────┐ ┌────────┐ ┌─────────┐ - │ SSE → │ │Notifier│ │Reaction│ │ Event │ - │ Web UI │ │Plugins │ │ Engine │ │ Log │ - └─────────┘ └────────┘ └────────┘ └─────────┘ -``` - -### Data Flow - -1. **Agent → Server**: Heartbeats, status updates, "need input" signals -2. **Server → Dashboard**: SSE stream of session state changes -3. **Server → Notifiers**: Alerts when human attention is needed -4. **Server → Agents**: Commands via runtime-specific channels (tmux send-keys, docker exec, HTTP POST, etc.) -5. **CLI → Server**: REST API calls for spawn, kill, send, status -6. **SCM → Server**: PR state, CI checks, review comments (polled or webhooks) - ---- - -## The 8 Plugin Slots - -### 1. Runtime — Where sessions execute - -```typescript -interface Runtime { - readonly name: string; - - // Lifecycle - create(session: SessionConfig): Promise; - destroy(handle: RuntimeHandle): Promise; - - // Communication - sendMessage(handle: RuntimeHandle, message: string): Promise; - getOutput(handle: RuntimeHandle, lines?: number): Promise; - - // Health - isAlive(handle: RuntimeHandle): Promise; - getMetrics(handle: RuntimeHandle): Promise; - - // Optional: interactive access - attach?(handle: RuntimeHandle): Promise; -} -``` - -| Implementation | How it works | Best for | -| ---------------- | ------------------------------ | ------------------------------ | -| `tmux` (default) | tmux sessions + send-keys | Local development, interactive | -| `process` | Child processes + stdin/stdout | Headless, CI/CD, scripting | -| `docker` | Docker containers + exec | Isolation, reproducibility | -| `kubernetes` | K8s pods/jobs | Scale, enterprise | -| `ssh` | SSH to remote + tmux/process | Remote machines | -| `e2b` | E2B SDK (Firecracker microVMs) | Cloud sandboxes | -| `fly` | Fly.io Machines API | Cost-effective cloud | -| `modal` | Modal Sandboxes | GPU, autoscaling | - -### 2. Agent — AI coding tool - -```typescript -interface Agent { - readonly name: string; - readonly processName: string; // for detection - - // Launch - getLaunchCommand(session: SessionConfig, project: ProjectConfig): string; - getEnvironment(session: SessionConfig): Record; - - // Activity detection - detectActivity(session: Session): Promise; - isProcessRunning(runtimeHandle: RuntimeHandle): Promise; - - // Introspection - introspect(session: Session): Promise; - - // Optional - postLaunchSetup?(session: Session): Promise; - estimateCost?(session: Session): Promise; -} -``` - -| Implementation | Launch command | Activity detection | -| ----------------------- | --------------------------------------- | -------------------------- | -| `claude-code` (default) | `claude --dangerously-skip-permissions` | JSONL mtime + process tree | -| `claude-headless` | `claude -p --output-format stream-json` | stdout parsing | -| `codex` | `codex` | Process detection | -| `aider` | `aider --no-auto-commits` | Process detection | -| `goose` | `goose session` | Process detection | -| `custom` | User-defined command | Configurable | - -### 3. Workspace — Code isolation - -```typescript -interface Workspace { - readonly name: string; - - create(project: ProjectConfig, session: SessionConfig): Promise; - destroy(path: WorkspacePath): Promise; - list(project: ProjectConfig): Promise; - - // Optional hooks - postCreate?(path: WorkspacePath, project: ProjectConfig): Promise; -} -``` - -| Implementation | How | Tradeoff | -| -------------------- | ------------------------ | ---------------------------------------- | -| `worktree` (default) | `git worktree add` | Fast, shared objects, requires same repo | -| `clone` | `git clone` | Full isolation, slower, more disk | -| `copy` | `cp -r` | No git dependency, heaviest | -| `volume` | Docker/K8s volume mounts | For container runtimes | - -### 4. Tracker — Issue/task tracking - -```typescript -interface Tracker { - readonly name: string; - - getIssue(identifier: string): Promise; - isCompleted(identifier: string): Promise; - issueUrl(identifier: string): string; - branchName(identifier: string): string; - generatePrompt(identifier: string, project: ProjectConfig): string; - - // Optional - listIssues?(filters?: IssueFilters): Promise; - updateIssue?(identifier: string, update: IssueUpdate): Promise; - createIssue?(input: CreateIssueInput): Promise; -} -``` - -| Implementation | API | Auth | -| ------------------ | ----------- | -------------- | -| `github` (default) | `gh` CLI | GitHub token | -| `linear` | GraphQL API | Linear API key | -| `jira` | REST API | Jira token | -| `plain` | Local files | None | - -### 5. SCM — Source code platform (PR, CI, Reviews) - -```typescript -interface SCM { - readonly name: string; - - // PR lifecycle - detectPR(session: Session): Promise; - getPRState(pr: PRInfo): Promise; - createPR(session: Session, title: string, body: string): Promise; - mergePR(pr: PRInfo, method?: MergeMethod): Promise; - closePR(pr: PRInfo): Promise; - - // CI tracking - getCIChecks(pr: PRInfo): Promise; - getCISummary(pr: PRInfo): Promise; - - // Review tracking - getReviews(pr: PRInfo): Promise; - getReviewDecision(pr: PRInfo): Promise; - getPendingComments(pr: PRInfo): Promise; - getAutomatedComments(pr: PRInfo): Promise; - - // Merge readiness - getMergeability(pr: PRInfo): Promise; -} -``` - -| Implementation | API | Features | -| ------------------ | ------------------- | -------------------------- | -| `github` (default) | `gh` CLI + REST API | Full PR/CI/review support | -| `gitlab` | REST API | MR/pipeline/review support | -| `bitbucket` | REST API | PR/pipeline support | - -### 6. Notifier — THE PRIMARY INTERFACE - -The notifier is not a nice-to-have — it is the primary way the system communicates with humans. The human walks away after spawning agents. Notifications bring them back only when needed. - -```typescript -interface Notifier { - readonly name: string; - - // Core: push a notification to the human - notify(event: OrchestratorEvent): Promise; - - // Optional: actionable notifications (buttons/links) - notifyWithActions?(event: OrchestratorEvent, actions: NotifyAction[]): Promise; - - // Optional: richer communication (post to channel) - post?(message: string, context?: NotifyContext): Promise; -} - -// Notifications can include actions the human can take directly -interface NotifyAction { - label: string; // "Merge PR", "Open Dashboard", "Kill Session" - url?: string; // Deep link to dashboard action - callback?: string; // API endpoint to call -} -``` - -| Implementation | Channel | Best for | Actionable? | -| ------------------- | ---------------------------- | ------------------- | ----------------------------- | -| `desktop` (default) | OS notifications (clickable) | Solo developer | Click → opens dashboard | -| `slack` | Slack messages with buttons | Teams | Buttons → merge, review, kill | -| `discord` | Discord messages | Communities | Links | -| `webhook` | HTTP POST | Custom integrations | Custom | -| `email` | Email digest | Async | Links | - -**Multiple notifiers can be active simultaneously.** E.g., desktop for immediate alerts + Slack for team visibility + email for daily digest. - -### 7. Terminal — Human interaction interface - -```typescript -interface Terminal { - readonly name: string; - - openSession(session: Session): Promise; - openAll(sessions: Session[]): Promise; - - // Optional - isSessionOpen?(session: Session): Promise; -} -``` - -| Implementation | How | Platform | -| ---------------- | ------------------------- | ------------- | -| `auto` (default) | Detect best available | Any | -| `iterm2` | AppleScript API | macOS | -| `web` | xterm.js in browser | Any | -| `tmux-attach` | `tmux attach` in terminal | Any with tmux | -| `none` | Headless | CI/CD | - -### 8. Lifecycle Manager (Core — not pluggable) - -The Lifecycle Manager is the orchestrator's brain. It: - -- Polls SCM + Agent plugins on configurable intervals -- Maintains state machine per session -- Emits events on state transitions -- Runs configured reactions -- Feeds real-time data to dashboard via SSE - ---- - -## Session Lifecycle State Machine - -``` - ┌──────────┐ - │ SPAWNING │ - └────┬─────┘ - │ runtime.create() + agent launched - ▼ - ┌──────────┐ - ┌─────│ WORKING │◄─────────────────────────┐ - │ └────┬─────┘ │ - │ │ PR detected │ - │ ▼ │ - │ ┌──────────────┐ │ - │ │ PR_OPEN │ │ - │ └────┬─────────┘ │ - │ │ │ - │ ┌────┴────────────┐ │ - │ ▼ ▼ │ - │ ┌──────────┐ ┌─────────────────┐ │ - │ │ CI_FAILED│ │ REVIEW_PENDING │ │ - │ └────┬─────┘ └────┬────────────┘ │ - │ │ │ │ - │ │ ┌──────────┴──────┐ │ - │ │ ▼ ▼ │ - │ │ ┌──────────────┐ ┌──────────┐ │ - │ │ │CHANGES_REQ'D │ │ APPROVED │ │ - │ │ └──────┬───────┘ └────┬─────┘ │ - │ │ │ │ │ - │ └────────┼───────────────┘ │ - │ │ agent fixes │ - │ └──────────────────────────┘ - │ - │ When approved + CI green + no conflicts: - │ ▼ - │ ┌──────────┐ - │ │MERGEABLE │──► auto-merge or notify human - │ └────┬─────┘ - │ │ - │ ▼ - │ ┌──────────┐ - │ │ MERGED │ - │ └────┬─────┘ - │ │ - │ ▼ - │ ┌──────────┐ - │ │ CLEANUP │──► destroy workspace + archive metadata - │ └──────────┘ - │ - │ At any point: - │ ┌───────────────┐ - ├────►│ NEEDS_INPUT │──► notify human - │ └───────────────┘ - │ ┌───────────────┐ - ├────►│ STUCK/IDLE │──► notify human after threshold - │ └───────────────┘ - │ ┌───────────────┐ - ├────►│ ERRORED │──► notify human - │ └───────────────┘ - │ ┌───────────────┐ - └────►│ KILLED │──► cleanup - └───────────────┘ -``` - ---- - -## Human Attention Optimization - -**The system notifies the human. The human never polls.** - -The orchestrator operates on a simple principle: handle everything you can automatically, and push a notification to the human only when their judgment or action is truly required. The human spawns agents, walks away, and gets notified. - -### Two-Tier Event Handling - -**Tier 1: Auto-handled (human never sees these)** -The orchestrator resolves these silently. The human is only notified if auto-resolution fails. - -| Event | Auto-Response | Escalation | -| ---------------------- | -------------------------------- | -------------------------------- | -| CI failed | Send fix prompt to agent | Notify after 2 failed attempts | -| Review comments | Send "address comments" to agent | Notify if unresolved after 30min | -| Bugbot/linter comments | Send fix prompt to agent | Notify if unresolved after 30min | -| Merge conflicts | Send "rebase" to agent | Notify if unresolved after 15min | - -**Tier 2: Notify human (requires human judgment)** -These always push a notification. The human's phone buzzes, Slack pings, etc. - -| Event | Priority | Notification | -| -------------------------------------------------------------- | -------- | ----------------------------------------------------- | -| **Agent needs input** (permission, question, stuck) | URGENT | "Session X needs your input" + deep link | -| **Agent errored** (crashed, unrecoverable) | URGENT | "Session X crashed" + error context | -| **PR ready to merge** (approved + CI green) | ACTION | "PR #42 ready to merge" + merge button | -| **Agent idle too long** (no PR, no progress) | WARNING | "Session X idle for 15min, may need help" | -| **Auto-fix failed** (CI fix failed 2x, comments not addressed) | WARNING | "Session X couldn't resolve CI/review — needs you" | -| **All work complete** | INFO | "All 20 sessions done. 18 PRs merged, 2 need review." | - -### Escalation Chains - -Events start at auto-handle and escalate through notification tiers: - -``` -Event detected - │ - ▼ -Can auto-handle? ──yes──► Auto-respond (send to agent) - │ │ - no Resolved? ──yes──► Done (silent) - │ │ - ▼ no (retry N times) -NOTIFY HUMAN │ - │ ▼ - │ NOTIFY HUMAN - │ "Tried to auto-fix, couldn't resolve" - ▼ -Human acts via: - ├── Notification action button (merge, kill, open) - ├── Dashboard deep link - ├── CLI command - └── Direct tmux attach -``` - -### Notification Channels (Priority-Based Routing) - -Different priorities route to different channels: - -```yaml -notifications: - routing: - urgent: [desktop, slack, sms] # Agent stuck, errored, needs input - action: [desktop, slack] # PR ready to merge - warning: [slack] # Auto-fix failed, idle too long - info: [slack] # Summary, all done -``` - -### Reactions (configurable auto-responses) - -```yaml -# agent-orchestrator.yaml -reactions: - ci-failed: - auto: true - action: send-to-agent - message: "CI is failing. Run `gh pr checks` to see failures, fix them, and push." - retries: 2 - escalate-after: 2 # notify human after 2 failed auto-fix attempts - - changes-requested: - auto: true - action: send-to-agent - message: "Review comments on your PR. Check with `gh pr view --comments` and address each one." - escalate-after: 30m - - bugbot-comments: - auto: true - action: send-to-agent - message: "Automated review comments found. Fix the issues flagged by the bot." - escalate-after: 30m - - merge-conflicts: - auto: true - action: send-to-agent - message: "Your branch has merge conflicts. Rebase on the default branch and resolve them." - escalate-after: 15m - - approved-and-green: - auto: false # require human confirmation by default - action: notify - priority: action - message: "PR is ready to merge" - # Set auto: true + action: auto-merge for full automation - - agent-stuck: - threshold: 10m - action: notify - priority: urgent - - agent-needs-input: - action: notify - priority: urgent - - agent-exited: - action: notify - priority: urgent - - all-complete: - action: notify - priority: info - message: "All sessions complete" - include-summary: true # PRs merged, pending, failed - - agent-idle-no-pr: - threshold: 30m # working for 30min with no PR - action: notify - priority: warning - message: "Agent has been working for 30min without creating a PR" -``` - -### Dashboard (Secondary — Drill-Down Tool) - -The dashboard exists for when you get a notification and need to drill down. It's organized by attention priority: - -- **Red zone** (top): URGENT — sessions needing human input RIGHT NOW -- **Orange zone**: ACTION — PRs ready to merge, decisions needed -- **Yellow zone**: WARNING — auto-fix failed, agents idle too long -- **Green zone**: Sessions working normally (collapsed by default) -- **Grey zone**: Completed/merged (collapsed by default) - -Clicking a notification deep-links directly to the relevant session/PR in the dashboard. - ---- - -## Configuration - -### Minimal Config (works out of the box) - -```yaml -# agent-orchestrator.yaml -projects: - my-app: - repo: org/repo - path: ~/my-app -``` - -Everything else uses sensible defaults: - -- Runtime: tmux -- Agent: claude-code -- Workspace: worktree -- Tracker: github (inferred from repo) -- SCM: github (inferred from repo) -- Notifier: desktop -- Terminal: auto-detect - -### Full Config - -```yaml -# agent-orchestrator.yaml -dataDir: ~/.agent-orchestrator # metadata storage -worktreeDir: ~/.worktrees # workspace root -port: 3000 # web dashboard port - -defaults: - runtime: tmux - agent: claude-code - workspace: worktree - notifiers: [desktop] - -projects: - my-app: - name: My App - repo: org/repo - path: ~/my-app - defaultBranch: main - sessionPrefix: app - - # Override defaults per project - agent: claude-code - runtime: tmux - - # Issue tracker - tracker: - plugin: linear - teamId: "abc-123" - - # SCM (usually inferred from repo) - scm: - plugin: github - - # Symlinks to copy into workspaces - symlinks: [.env, .claude] - - # Commands to run after workspace creation - postCreate: - - "pnpm install" - - "claude mcp add rube --transport http https://rube.app/mcp" - - # Agent-specific config - agentConfig: - permissions: skip # --dangerously-skip-permissions - model: opus - - # Reaction overrides - reactions: - approved-and-green: - auto: true # enable auto-merge for this project - -# Notification channels -notifiers: - slack: - plugin: slack - webhook: ${SLACK_WEBHOOK_URL} - channel: "#agent-updates" - desktop: - plugin: desktop - -# Reaction defaults (can be overridden per project) -reactions: - ci-failed: - auto: true - retries: 2 - escalate-after: 2 - changes-requested: - auto: true - escalate-after: 30m - approved-and-green: - auto: false - agent-stuck: - threshold: 10m - agent-needs-input: - priority: high -``` - ---- - -## Tech Stack - -| Segment | Choice | Why | -| ------------------- | --------------------------------------- | ---------------------------------------------------- | -| **Core library** | TypeScript | Shared types across all packages | -| **Web + API** | Next.js 15 (App Router) | SSR + API routes in one process | -| **Styling** | Tailwind CSS | Dark theme, responsive | -| **Real-time** | Server-Sent Events | One-way push, auto-reconnect, simpler than WebSocket | -| **CLI** | TypeScript + Commander.js | Shares types with core | -| **Config** | YAML + Zod validation | Human-readable, type-safe | -| **State** | Flat metadata files + Event log (JSONL) | Stateless orchestrator, crash recovery | -| **Package manager** | pnpm workspaces | Fast, monorepo-native | -| **Distribution** | npm (`npx agent-orchestrator`) | Zero install | - -### Why TypeScript Throughout - -1. **One language** — Plugin authors only need TypeScript/JavaScript -2. **Shared types** — No serialization boundaries between core, web, CLI, plugins -3. **npm distribution** — `npx agent-orchestrator` works everywhere -4. **Next.js** — Web + API server in one process, great DX -5. **Largest ecosystem** — More packages on npm than any other registry -6. **Performance is fine** — Bottleneck is AI agents, not orchestrator. We shell out to tmux/git/docker anyway. - ---- - -## Directory Structure - -``` -agent-orchestrator/ -├── package.json -├── pnpm-workspace.yaml -├── tsconfig.base.json -├── agent-orchestrator.yaml.example -│ -├── packages/ -│ ├── core/ # @aoagents/ao-core -│ │ └── src/ -│ │ ├── types.ts # All interfaces + types -│ │ ├── config.ts # YAML config loader + Zod validation -│ │ ├── session-manager.ts # Session CRUD -│ │ ├── lifecycle-manager.ts # State machine + reactions -│ │ ├── event-bus.ts # Pub/sub + JSONL persistence -│ │ ├── plugin-registry.ts # Plugin discovery + loading -│ │ ├── metadata.ts # Flat-file read/write -│ │ └── index.ts -│ │ -│ ├── cli/ # @aoagents/ao-cli → `ao` binary -│ │ └── src/ -│ │ ├── index.ts # Commander.js setup -│ │ └── commands/ -│ │ ├── init.ts # ao init -│ │ ├── status.ts # ao status -│ │ ├── spawn.ts # ao spawn [issue] -│ │ ├── batch-spawn.ts # ao batch-spawn -│ │ ├── session.ts # ao session [ls|kill|cleanup] -│ │ ├── send.ts # ao send -│ │ ├── review-check.ts # ao review-check [project] -│ │ ├── dashboard.ts # ao dashboard (starts web) -│ │ └── open.ts # ao open [session|all] -│ │ -│ ├── web/ # @aoagents/ao-web -│ │ ├── next.config.ts -│ │ └── src/ -│ │ ├── app/ -│ │ │ ├── layout.tsx -│ │ │ ├── page.tsx # Dashboard (attention-prioritized) -│ │ │ └── sessions/[id]/ -│ │ │ └── page.tsx # Session detail -│ │ ├── api/ -│ │ │ ├── sessions/ # CRUD + actions -│ │ │ ├── spawn/ # POST spawn -│ │ │ ├── events/ # SSE stream -│ │ │ └── health/ # Server health -│ │ └── components/ -│ │ ├── SessionCard.tsx -│ │ ├── AttentionZone.tsx -│ │ ├── PRStatus.tsx -│ │ ├── CIBadge.tsx -│ │ └── Terminal.tsx # xterm.js -│ │ -│ └── plugins/ # Built-in plugins -│ ├── runtime-tmux/ -│ ├── runtime-process/ -│ ├── runtime-docker/ -│ ├── agent-claude-code/ -│ ├── agent-codex/ -│ ├── agent-aider/ -│ ├── workspace-worktree/ -│ ├── workspace-clone/ -│ ├── tracker-github/ -│ ├── tracker-linear/ -│ ├── scm-github/ -│ ├── notifier-desktop/ -│ ├── notifier-slack/ -│ ├── terminal-iterm2/ -│ └── terminal-web/ -│ -├── artifacts/ # Research + design docs -│ ├── competitive-research.md -│ └── architecture-design.md -│ -├── scripts/ # Original bash scripts (reference) -│ -└── CLAUDE.md -``` - ---- - -## Implementation Phases - -### Phase 1: Foundation (Dog-food ready) - -- Monorepo scaffolding -- Core types + interfaces -- Config loader -- Session manager + lifecycle manager + event bus -- tmux runtime, claude-code agent, worktree workspace -- GitHub SCM (PR/CI/review tracking) -- GitHub tracker -- Desktop notifier -- CLI (init, status, spawn, session, send, dashboard) -- Web dashboard with attention-prioritized view -- SSE real-time updates -- Reaction engine (CI failed, changes requested, agent stuck) - -### Phase 2: Multi-Runtime + More Plugins - -- Process runtime (headless claude -p) -- Docker runtime -- Codex + Aider agent adapters -- Linear + Jira trackers -- Slack notifier -- Web terminal (xterm.js) - -### Phase 3: Cloud + Scale - -- Kubernetes runtime -- E2B / Fly.io runtimes -- Cost tracking -- Webhook-triggered spawning - -### Phase 4: Team + Enterprise - -- Dashboard auth -- Role-based access -- Remote session support -- Audit log diff --git a/artifacts/competitive-research.md b/artifacts/competitive-research.md deleted file mode 100644 index 981cb2ef48..0000000000 --- a/artifacts/competitive-research.md +++ /dev/null @@ -1,432 +0,0 @@ -# Competitive Research — Agent Orchestration Tools - -_Compiled: 2026-02-13_ - -## Overview - -Research into 16+ projects that orchestrate AI coding agents. The goal: understand abstractions, architectures, and gaps to build the best, most extensible agent orchestrator. - ---- - -## Tier 1: Direct Competitors (Multi-Agent Orchestrators) - -### Gas Town (Steve Yegge) - -- **GitHub**: https://github.com/steveyegge/gastown -- **Stack**: Go 1.23+ (~189K LOC), SQLite3, Git 2.25+, tmux 3.0+ -- **Stars**: Growing rapidly (released Jan 2026) - -**Architecture — MEOW Stack (Molecular Expression of Work):** - -| Layer | What | How | -| ----------------------------- | ------------------------ | ------------------------------------------------------------------------------ | -| **Beads** | Atomic work units | JSONL files tracked in Git. IDs like `gt-abc12`. Universal data/control plane. | -| **Epics** | Hierarchical collections | Organize beads into tree structures for parallel/sequential execution | -| **Molecules** | Workflow graphs | Sequenced beads with dependencies, gates, loops | -| **Protomolecules & Formulas** | Reusable templates | TOML format workflow definitions | - -**Agent Roles (7 roles, 2 scopes):** - -| Role | Scope | Purpose | -| ------------ | ----- | --------------------------------------------------------- | -| **Mayor** | Town | Chief AI coordinator with full workspace context | -| **Deacon** | Town | Health daemon running patrol loops | -| **Dogs** | Town | Maintenance helpers | -| **Crew** | Rig | Named, persistent agents for sustained design/review work | -| **Polecats** | Rig | Ephemeral "cattle" workers spawned for specific tasks | -| **Refinery** | Rig | Merge queue manager handling conflicts | -| **Witness** | Rig | Supervises polecats, unblocks stuck work | - -**Other Abstractions:** - -- **Town** — Workspace directory (`~/gt/`) housing all projects -- **Rigs** — Project containers wrapping git repositories -- **Hooks** — Git worktree-based persistent storage surviving crashes -- **Convoys** — Work-tracking bundles grouping multiple beads for an agent -- **GUPP** — Agents must execute work on their hooks; scheduling persists across restarts - -**Runtime Backends:** claude, gemini, codex, cursor, auggie, amp (per-rig config) - -**Communication/Isolation:** - -- Git worktrees for filesystem isolation per agent -- Beads/Hooks for coordination (external state, not shared context windows) -- GUPP: deterministic handoffs through version control, not LLM-judged phase gates - -**Strengths:** Most architecturally ambitious. Crash recovery via git-backed Beads. Role-based agent hierarchy. Multi-agent support. -**Weaknesses:** ~$100/hr token burn, auto-merged failing tests, agents causing unexpected deletions. Go-only ecosystem. No web dashboard. Optimized for autonomous, not human-in-the-loop. - ---- - -### Par (Coplane) - -- **GitHub**: https://github.com/coplane/par -- **Stack**: Python 3.12+ -- **Closest to our current approach** - -**Key Abstractions:** - -- **Sessions**: Single-repo isolated branches via git worktrees + tmux sessions -- **Workspaces**: Multi-repo synchronized development contexts -- **Control Center**: Unified tmux session with windows for each context -- **Labels**: Globally unique, human-readable names - -**Features:** - -- `par start my-feature` — creates worktree + branch + tmux session -- `par send