diff --git a/web/components/docs/DocsNav.tsx b/web/components/docs/DocsNav.tsx index fba44e6ee..9622f67f8 100644 --- a/web/components/docs/DocsNav.tsx +++ b/web/components/docs/DocsNav.tsx @@ -1,11 +1,12 @@ 'use client'; -import type { ComponentType } from 'react'; -import { useEffect, useRef } from 'react'; +import type { ComponentType, ReactElement } from 'react'; +import { useEffect, useRef, useState } from 'react'; import { usePathname } from 'next/navigation'; import { Activity, Bot, + ChevronRight, Cloud, Clock3, Compass, @@ -30,7 +31,7 @@ import { PiBroadcastFill, PiLockKeyDuotone } from 'react-icons/pi'; import { RiLayout5Line } from 'react-icons/ri'; import { SiClaude, SiPython, SiTypescript } from 'react-icons/si'; -import { docsNav } from '../../lib/docs-nav'; +import { docsNav, type NavItem } from '../../lib/docs-nav'; import styles from './docs.module.css'; type NavIcon = ComponentType<{ className?: string; 'aria-hidden'?: boolean | 'true' | 'false' }>; @@ -125,22 +126,78 @@ export function DocsNav({ variant = 'sidebar' }: { variant?: 'sidebar' | 'mobile

{group.title}

))} ); } + +function isLinkActive(slug: string, pathname: string): boolean { + return pathname === `/docs/${slug}` || (slug === 'introduction' && pathname === '/docs'); +} + +function containsActive(item: NavItem, pathname: string): boolean { + if (isLinkActive(item.slug, pathname)) return true; + return item.children?.some((child) => containsActive(child, pathname)) ?? false; +} + +function NavItemRow({ item, pathname }: { item: NavItem; pathname: string }): ReactElement { + const href = `/docs/${item.slug}`; + const isActive = isLinkActive(item.slug, pathname); + const Icon = navIcons[item.slug]; + const hasChildren = Boolean(item.children && item.children.length > 0); + + // Collapsed by default; auto-expanded if the current page is in this + // item's subtree so users don't lose their bearings when navigating. + const activeInSubtree = hasChildren && containsActive(item, pathname); + const [open, setOpen] = useState(activeInSubtree); + + // Re-sync open state when the pathname changes (e.g. nav click). + useEffect(() => { + if (activeInSubtree) setOpen(true); + }, [activeInSubtree]); + + if (!hasChildren) { + return ( +
  • + + {Icon && +
  • + ); + } + + const childListId = `nav-children-${item.slug}`; + return ( +
  • +
    + + {Icon && + +
    + {open && ( + + )} +
  • + ); +} diff --git a/web/components/docs/docs.module.css b/web/components/docs/docs.module.css index 3fbc699ad..1f671dad2 100644 --- a/web/components/docs/docs.module.css +++ b/web/components/docs/docs.module.css @@ -211,6 +211,49 @@ } .navList { list-style: none; padding: 0; margin: 0; } +.navChildren { padding-left: 0.9rem; margin-top: 0.1rem; border-left: 1px solid rgba(255, 255, 255, 0.08); } + +.navLinkRow { + display: flex; + align-items: center; + gap: 0.15rem; +} + +.navLinkRow .navLink { + flex: 1; + min-width: 0; +} + +.navToggle { + display: inline-flex; + align-items: center; + justify-content: center; + width: 1.35rem; + height: 1.35rem; + padding: 0; + background: transparent; + border: 0; + border-radius: 0.3rem; + color: var(--fg-muted); + cursor: pointer; + opacity: 0.65; + transition: transform 0.15s ease, opacity 0.15s, background 0.15s; +} + +.navToggle:hover { + opacity: 1; + background: rgba(255, 255, 255, 0.06); +} + +.navToggleOpen { + transform: rotate(90deg); + opacity: 1; +} + +.navToggleIcon { + width: 0.85rem; + height: 0.85rem; +} .navLink { display: flex; diff --git a/web/content/docs/cli-cloud-commands.mdx b/web/content/docs/cli-cloud-commands.mdx index 13dc0dab2..ce4d24a9c 100644 --- a/web/content/docs/cli-cloud-commands.mdx +++ b/web/content/docs/cli-cloud-commands.mdx @@ -47,6 +47,59 @@ agent-relay cloud sync - `logs` streams workflow or per-agent output. - `sync` downloads the generated patch and applies it locally. +## `--sync-code`: uploading your local repo + +`--sync-code` tarballs your current working copy and ships it to the cloud sandbox as the starting point for the run. Without it, the sandbox starts with **no code on disk** — it doesn't clone from `origin`, it just has an empty `$HOME`. Any workflow that reads your source needs `--sync-code`. + +**In almost every case, you want `--sync-code`.** Running a workflow against an empty sandbox is rarely what you mean; you almost always want your local worktree state there. + +### What gets uploaded + +The tarball is built from `git ls-files` — the **tracked** paths — and `tar` reads their current working-tree contents. No `git clone` anywhere. + +| State | Synced? | +|---|---| +| Committed, unmodified | ✅ Working-tree version | +| Committed, then modified | ✅ Working-tree version (your edits go too) | +| Committed, then modified + `git add`ed | ✅ Working-tree version | +| New file + `git add`ed (not committed) | ✅ — once added, the file is tracked | +| New file, never added | ❌ Untracked → excluded | +| `.gitignore`d path | ❌ Excluded | + +**Rule of thumb:** `git add` whatever the run needs. Commit is NOT required — staging is enough because `git ls-files` returns indexed paths. You don't need to push either. + +If you're not in a git repo at all, there's a fallback: the packer walks the filesystem and uses `.gitignore` as the exclude list. + +### When NOT to use `--sync-code` + +Rare but real: + +- You're running a cloud-managed workflow that doesn't touch local code (a fully `config`-typed workflow). +- You've set up a workflow whose `setup-branch` step explicitly `git clone`s something and doesn't care about local state. + +### Typical flow + +```bash +# Edit your workflow locally +vim workflows/fix-bug.ts + +# Stage everything the run needs — `git add` is enough; commit is optional. +# Untracked files would be silently excluded otherwise. +git add workflows/fix-bug.ts + +# Ship your working tree to the cloud sandbox and run +agent-relay cloud run workflows/fix-bug.ts --sync-code +# note the run ID printed... + +# Stream logs as it runs +agent-relay cloud logs --follow + +# When complete, pull the produced diff back into your local worktree +agent-relay cloud sync +``` + +See [Workflows → Common mistakes](/docs/workflows-common-mistakes#output-and-exit-codes) for the "untracked files silently excluded" pitfall. + ## Inspect a patch before applying it ```bash diff --git a/web/content/docs/github-primitive.mdx b/web/content/docs/github-primitive.mdx new file mode 100644 index 000000000..879657236 --- /dev/null +++ b/web/content/docs/github-primitive.mdx @@ -0,0 +1,95 @@ +--- +title: 'GitHub primitive' +description: 'Workflow-specific primitive: a typed GitHub integration step. Runs through the local `gh` CLI or a cloud proxy.' +--- + +The GitHub primitive is a **workflow-specific primitive** — an integration step shaped for `workflow()`. It gives a workflow a typed GitHub surface (create issues, open PRs, read files, merge branches) with one call site that works the same locally via `gh` or in cloud via Nango / relay-cloud. + +It's **bundled with the SDK** — no separate install. If you've run `npm install @agent-relay/sdk`, you already have it. + +## Usage inside a workflow + +Import `createGitHubStep` from the SDK's `/github` subpath and drop it in anywhere you'd use a regular `.step()`: + +```ts +import { workflow } from '@agent-relay/sdk/workflows'; +import { createGitHubStep } from '@agent-relay/sdk/github'; + +await workflow('ship-readme') + .agent('writer', { cli: 'claude' }) + + .step('read-readme', createGitHubStep({ + action: 'readFile', + repo: 'AgentWorkforce/relay', + params: { path: 'README.md' }, + output: { mode: 'data', format: 'text' }, + })) + + .step('edit', { + agent: 'writer', + dependsOn: ['read-readme'], + task: `Current README:\n{{steps.read-readme.output}}\n\nClean up the intro.`, + }) + + .step('open-pr', createGitHubStep({ + action: 'createPR', + repo: 'AgentWorkforce/relay', + params: { + head: 'docs/readme-cleanup', + base: 'main', + title: 'docs: clean up README intro', + body: 'Lightly edited for clarity.', + }, + })) + + .run({ cwd: process.cwd() }); +``` + +Under the hood each `createGitHubStep(...)` call produces a `type: 'integration'` step — the runner schedules it, applies verification, and captures output the same way as any other step. + +## Supported actions + +- **Repositories** — `listRepos`, `getRepo` +- **Issues** — `listIssues`, `createIssue`, `updateIssue`, `closeIssue` +- **Pull requests** — `listPRs`, `getPR`, `createPR`, `updatePR`, `mergePR` +- **Files** — `listFiles`, `readFile`, `createFile`, `updateFile`, `deleteFile` +- **Branches + commits** — `listBranches`, `createBranch`, `listCommits`, `createCommit` +- **Identity** — `getUser`, `listOrganizations` + +All actions work through the `action` + `params` shape. Outputs are typed — downstream steps can use `{{steps..output}}` to chain values through the workflow. + +## Runtime selection + +The primitive auto-picks the right backend for the environment it's running in: + +| Mode | Triggered when | +|---|---| +| `local` (via `gh` CLI) | `gh auth status` succeeds and no cloud creds are set | +| `cloud` (via Nango) | `NANGO_SECRET_KEY` + `NANGO_GITHUB_CONNECTION_ID` + `NANGO_GITHUB_PROVIDER_CONFIG_KEY` are present | +| `cloud` (via relay-cloud) | `RELAY_CLOUD_API_URL` + `RELAY_CLOUD_API_TOKEN` + `WORKSPACE_ID` are present (fallback when Nango absent) | + +Pick the default (`runtime: 'auto'`) unless you need to pin one for testing. You can also set `runtime` per step via the `config` field — useful when the same workflow creates PRs across multiple tenants with different GitHub App installs. + +## Multi-tenant cloud routing + +Every cloud workspace can have its own GitHub App install — one Nango connection per tenant. `createGitHubStep` accepts a per-step `config` field so a single workflow can route different actions through different connections: + +```ts +createGitHubStep({ + action: 'createPR', + repo: 'AgentWorkforce/cloud', + params: { title, head, base, body }, + config: await githubConfigForRepo({ + repo: 'AgentWorkforce/cloud', + workspaceId: process.env.RELAY_WORKSPACE_ID, + }), +}); +``` + +The primitive itself stays tenant-unaware — it takes a `GitHubRuntimeConfig` and does what it's told. Tenant lookup lives in your app (typically a `connection-resolver` helper). Adding a new GitHub App install is a config row, not a code change. + +## See also + +- [Workflows introduction](/docs/workflows-introduction) — where this primitive shines. +- [Patterns](/docs/workflows-patterns) — canonical workflow shapes that commonly use GitHub steps (PR review loops, multi-repo shipping, etc.). +- [Authentication](/docs/authentication) — credentials model for the cloud runtime modes. diff --git a/web/content/docs/quickstart.mdx b/web/content/docs/quickstart.mdx index 241c384b4..160c183a5 100644 --- a/web/content/docs/quickstart.mdx +++ b/web/content/docs/quickstart.mdx @@ -125,10 +125,22 @@ asyncio.run(main()) ## CLI -If you want to run Agent Relay directly from the terminal instead of embedding the SDK in an app, install the CLI globally and start a local relay session: +If you want to run Agent Relay directly from the terminal instead of embedding the SDK in an app, install the CLI globally and start a local relay session. + +Install `agent-relay`. The install script is the preferred option — it pulls the native broker binary for your platform along with the CLI. + + +```bash install script (recommended) +curl -fsSL https://raw.githubusercontent.com/AgentWorkforce/relay/main/install.sh | bash +``` +```bash npm +npm install -g agent-relay +``` + + +Start a local broker and spawn agents: ```bash -npm i -g agent-relay agent-relay up agent-relay spawn planner claude "Break the work into steps" agent-relay spawn coder codex "Implement the approved plan" @@ -137,3 +149,26 @@ agent-relay who ``` See [CLI Overview](/docs/cli-overview) for the full command surface and [Broker Lifecycle](/docs/cli-broker-lifecycle) for running the local broker. + +### Let another agent orchestrate the team + +If you want a host agent (Claude, Codex, or any CLI) to autonomously spawn and coordinate sub-agents from inside its own session, install the `running-headless-orchestrator` skill. It wires the `agent-relay spawn` / `agent-relay dm` commands into the host's tool surface so the host can run the team without leaving its session. + + +```bash prpm (recommended) +# Install for a specific host +npx prpm install @agent-relay/running-headless-orchestrator --as claude + +# Or install for multiple hosts at once +npx prpm install @agent-relay/running-headless-orchestrator --as claude,codex +``` +```bash skills +npx skills add https://github.com/agentworkforce/skills --skill running-headless-orchestrator +``` + + +Once installed, you can prompt the host like: + +> "Use the running-headless-orchestrator skill and spawn a claude agent called `reviewer`, DM it instructions, and wait for its verdict before you proceed." + +The host uses the skill's tools to run `agent-relay spawn reviewer claude ...`, send DMs via `agent-relay dm`, and read replies — all without you touching the shell. diff --git a/web/content/docs/workflows-common-mistakes.mdx b/web/content/docs/workflows-common-mistakes.mdx new file mode 100644 index 000000000..d081e56a6 --- /dev/null +++ b/web/content/docs/workflows-common-mistakes.mdx @@ -0,0 +1,114 @@ +--- +title: 'Common mistakes' +description: 'Every workflow author hits these once. Here is the list, so you can hit them zero times.' +--- + +Every one of these has bitten a real workflow. Copy the fix column. + +## General + +| Mistake | Fix | +|---------|-----| +| Using raw top-level `await` in a workflow file | Wrap in `async function main() { ... }` — executor-driven files sometimes behave like CJS. | +| `export default workflow(...)...build()` | No `.build()`. Chain ends with `.run()` — and the file MUST call `.run()`, not just export config. | +| `createWorkflowRenderer` | Does not exist. Use `.run({ cwd: process.cwd() })`. | +| Hardcoded model strings (`model: 'opus'`) | Use constants: `import { ClaudeModels } from '@agent-relay/config'` → `model: ClaudeModels.OPUS`. | +| Using `require()` in ESM projects | Check `package.json` for `"type": "module"` — use `import` if ESM. | +| Relative import `'../workflows/builder.js'` | Use `import { workflow } from '@agent-relay/sdk/workflows'`. | + +## Parallelism and waves + +| Mistake | Fix | +|---------|-----| +| Every step depends on the previous one | Only add `dependsOn` when there's a real data dependency. Independent steps with the same `dependsOn` parallelize. | +| One giant workflow per feature | Split into smaller workflows that can run in parallel waves (4-7× speedup). | +| `maxConcurrency: 16` with many parallel steps | Cap at 5-6. Broker times out at 10+. | +| Workers depending on lead step (DAG deadlock) | Both depend on the shared context step; downstream depends on the lead. | + +**DAG deadlock anti-pattern:** +```yaml +# WRONG +steps: + - name: coordinate + dependsOn: [context] # lead waits for WORKER_DONE... + - name: work-a + dependsOn: [coordinate] # ...but work-a can't start until coordinate finishes + +# RIGHT +steps: + - name: work-a + dependsOn: [context] # starts with lead + - name: coordinate + dependsOn: [context] # starts with workers + - name: merge + dependsOn: [work-a, coordinate] +``` + +Rule: if a lead step's task mentions downstream step names alongside waiting keywords, that's a deadlock. + +## Steps and verification + +| Mistake | Fix | +|---------|-----| +| Self-review step with no timeout | Set `timeout: 300_000` (5 min). Codex hangs in non-interactive review. | +| Adding exit instructions to tasks | Runner handles self-termination automatically. | +| Setting `timeoutMs` on agents/steps | Use global `.timeout()` only. | +| Using the `general` channel | Set `.channel('wf-name')` for isolation. | +| `{{steps.X.output}}` without `dependsOn: ['X']` | Output won't be available yet. | +| Requiring exact sentinel as only completion gate | Use `exit_code` or `file_exists` verification. | +| Writing 100-line task prompts | Split into lead + workers on a channel. | +| Single step editing 4+ files | Agents modify 1-2 then exit. Split to one file per step with verify gates. | +| Relying on agents to `git commit` | Agents emit markers without running git. Use deterministic commit step. | +| File-writing steps without `file_exists` verification | `exit_code` auto-passes even if no file written. Use `{ type: 'file_exists', value: 'path' }` for creation. | +| Non-interactive agent reading large files via tools | Pre-read in deterministic step, inject via `{{steps.X.output}}`. | + +**Verification token gotcha:** If the token (e.g. `STEP_COMPLETE`) appears in the task text, the runner requires it **twice** in output (once from task echo, once from agent). Prefer `exit_code` for code-editing steps to avoid this. + +Only these four verification types are valid: `exit_code`, `output_contains`, `file_exists`, `custom`. Invalid types are silently ignored and fall through to process-exit auto-pass. + +## Patterns + +| Mistake | Fix | +|---------|-----| +| `pattern('single')` on cloud runner | Not supported — use `dag`. | +| `pattern('supervisor')` with one agent | Same agent is owner + specialist. Use `dag`. | +| `pipeline` but expecting auto-supervisor | Only hub patterns auto-harden. Use `.pattern('supervisor')`. | +| `fan-out` / `hub-spoke` for simple parallel workers | Use `dag` instead. | +| Workers without `preset: 'worker'` in one-shot DAG lead+worker flows | Add preset for clean stdout when chaining `{{steps.X.output}}`. Not needed for interactive team patterns. | +| Separate reviewer agent from lead in interactive team | Merge lead + reviewer into one interactive Claude agent — reviews between rounds, fewer agents. | +| Chaining `{{steps.X.output}}` from interactive agents | PTY output is garbled. Use deterministic steps or `preset: 'worker'`. | + +## Shell and YAML + +| Mistake | Fix | +|---------|-----| +| Using `_` in YAML numbers (`timeoutMs: 1_200_000`) | YAML doesn't support `_` separators. | +| Workflow timeout under 30 min for complex workflows | Use `3600000` (1 hour) as default. | +| Raw fenced code blocks inside workflow task template literals | Avoid fenced examples inside template strings, or move to referenced files. Especially fragile with language tags like `swift` or `diff`. | +| `grep "foo\|bar\|baz"` (basic alternation) | Use `grep -Eq "foo\|bar\|baz"` — basic alternation can silently misbehave. | +| Cloud sandbox `/bin/sh` trying to parse `${PIPESTATUS[0]}` | Wrap in `bash -c '...'` (SINGLE quotes). `/bin/sh` on Daytona is dash; single quotes stop sh from expanding bash-only syntax. | +| `SIBLING_PATH="$RAW_VALUE"` (double-quoted shell assignments from user input) | Use single-quoted form: `SIBLING_PATH='...'`. Double quotes still expand `$(...)`, backticks, `\`. | + +## Cross-repo / worktrees + +| Mistake | Fix | +|---------|-----| +| Workflow ending without worktree + PR for cross-repo changes | Add `setup-worktree` at start and `push-and-pr` + `cleanup-worktree` at end. | +| Not printing PR URL after `gh pr create` | Add a final deterministic step: `echo "PR: $(cat pr-url.txt)"` or capture in the `gh pr create` command. | +| Touching the user's main `../other-repo` checkout | Create a sibling worktree instead: `git -C ../other-repo worktree add ../other-repo- -b `. | +| Linking sibling packages via relative imports (`file:`) | Use `applySiblingLinks` from `@agent-relay/sdk/workflows`. `file:` paths end up in committed `package.json`, which is wrong. | +| Agent fabricates an interface via `declare module` | See [Setup helpers → applySiblingLinks](/docs/workflows-setup-helpers#applysiblinglinks). Link the real sibling so agents see head-of-main types. | + +## Output and exit codes + +| Mistake | Fix | +|---------|-----| +| Relying on `agent-relay run` exit code for sub-workflow status | The runner exits 0 even when the inner workflow fails. Check `$?` AND grep `"Workflow status: failed"` from the log. | +| Committing `.trajectories/`, `logs/`, or `package-lock.json` along with code | Stage only product files: `git add src/ test/` etc. Execution exhaust shouldn't be in PRs unless the user explicitly asks. | +| `--sync-code` leaves out a file I just created | Cloud tarball is built from `git ls-files` (tracked paths only) + working-tree contents. Untracked files are silently excluded. `git add` the file before `agent-relay cloud run --sync-code` — staging is enough, commit is optional. | + +## See also + +- [Patterns](/docs/workflows-patterns) — shapes that avoid most of these mistakes. +- [Setup helpers](/docs/workflows-setup-helpers) — conventions that compound (everyone benefits when the helper's right). +- [Builder API reference](/docs/reference-workflows) — the canonical API surface. diff --git a/web/content/docs/workflows-introduction.mdx b/web/content/docs/workflows-introduction.mdx new file mode 100644 index 000000000..e60f6def2 --- /dev/null +++ b/web/content/docs/workflows-introduction.mdx @@ -0,0 +1,180 @@ +--- +title: 'Workflows' +description: 'Orchestrate multi-step, multi-agent execution across Relay workers. Write once, run locally or in the cloud.' +--- + +Workflows are how Relay runs real work. A workflow **composes the Relay primitives** — [channels](/docs/channels), [DMs](/docs/dms), [threads](/docs/threads), [file sharing](/docs/file-sharing), [scheduling](/docs/scheduling) — into a repeatable, multi-step, multi-agent execution plan. You write them in TypeScript, Python, or YAML; you run them with `agent-relay run`. + +## Why workflows (vs. just spawning an agent) + +Spawning a single agent is fine for a one-off. Workflows earn their weight when you need: + +- **Multiple agents coordinating** — a lead that plans, workers that implement, a reviewer that gates. Wired together over channels + DMs. +- **Cross-repo changes** — edit two repos as part of one unit of work, open linked PRs. +- **Verification gates** — tests must pass before the commit. Build must pass before the push. Regressions must be zero before the PR opens. +- **Parallelism with barriers** — fan out independent work, wait at the right moments, move on. +- **Repeatability** — this is the point. A workflow file is an artifact. Commit it, re-run it next week, share it with the team, let a scheduler fire it nightly, resume it from a partial failure. Same shape every time. + +Workflows turn ad-hoc "I typed prompts into three Claude windows" coordination into something you can automate, review, and repeat. That's the whole point. + +## How workflows use primitives + +A workflow doesn't replace the [primitives](/docs/sending-messages) — it composes them: + +- **[Channels](/docs/channels)** are where agents in a workflow coordinate. Each workflow opens a dedicated channel (`.channel('wf-my-feature')`) so the team has a clean, isolated room. +- **[DMs](/docs/dms)** let a lead address a specific worker without broadcasting. +- **[Threads](/docs/threads)** keep sub-conversations organized when a workflow step has its own back-and-forth. +- **[File sharing](/docs/file-sharing)** is how agents hand artifacts to each other (diff payloads, review bundles, etc.) when they span processes. +- **[Scheduling](/docs/scheduling)** turns a workflow into a cron job via RelayCron — the same workflow file runs on a schedule, unchanged. + +If a primitive gives you atomic agent-to-agent capabilities, a workflow gives you the orchestration that makes them useful at scale. + +Workflows also ship their own **workflow-specific primitives** — e.g. the [GitHub primitive](/docs/github-primitive) — integration steps shaped specifically for `workflow()`. Most production workflows use it for PR creation, file reads, issue updates. + +## Mental model + +A workflow is a named, typed orchestration of **steps** run by the Relay runner. Step types: + +- **Agent** — spawn a CLI (`claude`, `codex`, `gemini`, etc.) with a task prompt. Agent reads the prompt, edits files, writes tests, posts on channels. +- **Deterministic** — a shell command. Reads, builds, tests, verifications, commits, pushes. No LLM. +- **Worktree** — create or clean up a git worktree for isolated work. +- **Integration** — call a registered external integration (e.g. a job runner) as a typed step. + +Steps declare dependencies (`dependsOn`). Steps with no shared dependency run in parallel up to `maxConcurrency`. + +### Patterns + +The default topology is a **DAG** (`.pattern('dag')`), but the SDK ships 25 swarm patterns. Pick the one that matches how work actually flows between agents: + +| Shape | Patterns | +|---|---| +| **DAG / trees** | `dag`, `pipeline`, `fan-out`, `scatter-gather`, `map-reduce`, `hierarchical`, `cascade` | +| **Hub + workers** | `hub-spoke`, `supervisor`, `handoff` | +| **Iterative loops** | `debate`, `reflection`, `review-loop`, `red-team`, `verifier` | +| **Non-fixed topology** | `mesh`, `swarm`, `blackboard` | +| **Selection / competition** | `auction`, `competitive`, `consensus` | +| **Error handling** | `circuit-breaker`, `saga`, `escalation` | + +When you use a hub pattern (`supervisor`, `hub-spoke`, `fan-out`), the runner auto-hardens it — a supervisor gets spawned to monitor workers and issue `OWNER_DECISION` if they stall. Plain `dag` and `pipeline` don't auto-harden; you wire supervision yourself if you want it. + +See [Patterns](/docs/workflows-patterns) for the canonical shapes most workflows actually use. + +## A minimal example + +```typescript +import { workflow } from '@agent-relay/sdk/workflows'; + +async function main() { + const result = await workflow('add-tests-to-parser') + .description('Write tests for the parser and run them until green') + .pattern('dag') + .channel('wf-parser-tests') + + .agent('impl', { cli: 'codex', preset: 'worker' }) + .agent('tester', { cli: 'codex', preset: 'worker' }) + + .step('read-parser', { + type: 'deterministic', + command: 'cat src/parser.ts', + captureOutput: true, + }) + .step('write-tests', { + agent: 'tester', + dependsOn: ['read-parser'], + task: `Write vitest tests for the parser. Current contents: + +{{steps.read-parser.output}} + +Create test/parser.test.ts.`, + verification: { type: 'file_exists', value: 'test/parser.test.ts' }, + }) + .step('run-tests', { + type: 'deterministic', + dependsOn: ['write-tests'], + command: 'npx vitest run test/parser.test.ts', + failOnError: true, + }) + + .run({ cwd: process.cwd() }); + + console.log('Status:', result.status); + if (result.status !== 'completed') process.exit(1); +} + +main().catch((e) => { console.error(e); process.exit(1); }); +``` + +Run it: + +```bash +agent-relay run workflows/add-tests-to-parser.ts +``` + +See also: + +- [Quickstart](/docs/workflows-quickstart) — walks through a working example end-to-end. +- [Builder API reference](/docs/reference-workflows) — every method on `workflow()`. +- [Patterns](/docs/workflows-patterns) — 80-to-100, lead+workers, test-fix-rerun, cross-repo. +- [Setup helpers](/docs/workflows-setup-helpers) — `applySageRepoSetup`, `applySiblingLinks`, repo-setup conventions. +- [Common mistakes](/docs/workflows-common-mistakes) — the bugs every workflow author hits once. +- [Run from CLI](/docs/cli-workflows) — the `agent-relay run` command, dry-runs, resumes. + +## Two-minute concept guide + +**Patterns.** `dag` (default), `fan-out`, `pipeline`, `hub-spoke`, `supervisor`, `debate`, and a dozen more. Pick based on how work flows between agents. See [Patterns](/docs/workflows-patterns). + +**Verification gates.** Every step can declare `verification: { type: 'exit_code' }` / `file_exists` / `output_contains`. The runner won't mark a step complete without the gate passing. Use `exit_code` for code-editing steps; use `file_exists` for creation steps. + +**Step output chaining.** `{{steps..output}}` in a downstream task injects the upstream step's stdout. Works great for deterministic steps; fragile for interactive agents (their output has ANSI/spinner noise). + +**Completion signals.** Steps complete through a multi-signal pipeline: verification gate → `OWNER_DECISION` → evidence + clean exit → marker fast-path → process-exit fallback. You describe the deliverable; you don't have to add sentinels. + +**Cloud execution.** Submit the same workflow file to `agent-relay cloud` to run it in a Daytona sandbox. Files get committed + pushed; PRs open automatically if your master workflow is set up for it. + +```bash +# Almost always pass --sync-code — without it, the sandbox has NO +# code at all (there's no fallback clone from origin). The tarball +# is built from `git ls-files` + working-tree contents: tracked +# files (including staged-but-uncommitted) are synced; untracked +# files are NOT. `git add` is enough — commit is optional. +git add workflows/my-workflow.ts +agent-relay cloud run workflows/my-workflow.ts --sync-code +``` + +See [Cloud commands → `--sync-code`](/docs/cli-cloud-commands#-sync-code-uploading-your-local-repo) for the full sync semantics (what's in, what's out, edge cases). + +## Let an agent write your workflows + +If you want a host agent (Claude, Codex, etc.) to autonomously author workflow files — follow the canonical patterns, avoid the common pitfalls, get features to 100% instead of 80% — install the workflow-authoring skills. The agent reads them alongside your prompt and writes workflows that match repo conventions. + + +```bash prpm (recommended) +# Install for a specific host +npx prpm install @agent-relay/writing-agent-relay-workflows --as claude +npx prpm install @agent-relay/relay-80-100-workflow --as claude + +# Or install both for multiple hosts at once +npx prpm install @agent-relay/writing-agent-relay-workflows @agent-relay/relay-80-100-workflow --as claude,codex +``` +```bash skills +npx skills add https://github.com/agentworkforce/skills --skill writing-agent-relay-workflows +npx skills add https://github.com/agentworkforce/skills --skill relay-80-100-workflow +``` + + +**The two skills:** + +- **`writing-agent-relay-workflows`** — the core authoring guide. Failure prevention rules, step sizing, pattern selection, parallelism/waves, common mistakes, multi-file edit patterns, DAG deadlock anti-patterns, lead+worker teams. +- **`relay-80-100-workflow`** — the validation-gate layer. Test-fix-rerun loop, verify gates after every edit, PGlite for in-memory Postgres testing, regression-check patterns. Takes features from "compiles" to "tested and proven-working before commit." + +Once installed, prompts like these just work: + +> "Write a workflow that adds a `pending` status to `src/types.ts` and the service layer, with tests. Use the 80-to-100 pattern so the commit only lands if tests pass." + +> "Split the migration work into two parallel waves. Cross-repo: sage + cloud. Open linked PRs at the end." + +## Next + +- Start with the [Quickstart](/docs/workflows-quickstart) for a working example you can run in 5 minutes. +- Already comfortable? Jump to [Patterns](/docs/workflows-patterns) for canonical multi-agent shapes. +- Fighting a bug? [Common mistakes](/docs/workflows-common-mistakes) has the usual suspects. diff --git a/web/content/docs/workflows-patterns.mdx b/web/content/docs/workflows-patterns.mdx new file mode 100644 index 000000000..8d694d2a4 --- /dev/null +++ b/web/content/docs/workflows-patterns.mdx @@ -0,0 +1,272 @@ +--- +title: 'Workflow patterns' +description: 'Canonical shapes for multi-agent workflows: test-fix-rerun, lead + workers, cross-repo, supervisor.' +--- + +These are the patterns that show up over and over in production workflows. Copy the shape, adapt the details. + +## Test-fix-rerun (the 80-to-100 pattern) + +Most workflows get features to ~80%: code written, types check, maybe a build passes. The gap to 100% is **running the tests inside the workflow and fixing failures before commit**. + +Three-step loop: + +```typescript +// 1. Run tests (don't fail the workflow — let the agent fix it) +.step('run-tests', { + type: 'deterministic', + dependsOn: ['write-tests'], + command: 'npx vitest run test/my-feature.test.ts 2>&1 | tail -60', + captureOutput: true, + failOnError: false, +}) + +// 2. Agent reads output, fixes, re-runs +.step('fix-tests', { + agent: 'tester', + dependsOn: ['run-tests'], + task: `Check the test output and fix any failures. + +Output: +{{steps.run-tests.output}} + +If all pass, do nothing. If there are failures, read the test + source, +fix, re-run via \`npx vitest run test/my-feature.test.ts\`. Keep iterating +until ALL tests pass.`, + verification: { type: 'exit_code' }, +}) + +// 3. Deterministic final run — this one MUST pass +.step('run-tests-final', { + type: 'deterministic', + dependsOn: ['fix-tests'], + command: 'npx vitest run test/my-feature.test.ts 2>&1', + failOnError: true, +}) +``` + +**Why three steps instead of one:** +- First run captures output for the agent to diagnose. +- Agent step can iterate (read errors, fix, re-run) multiple times. +- Final deterministic run is the gate — no agent judgment, pass/fail. + +Follow the same shape for `npx tsc --noEmit`, `npm run build`, `npm test` — any deterministic check you want a gate behind. + +## Verify gates after every agent edit + +Never trust that an agent edited a file correctly. Add a deterministic verify step after every agent edit: + +```typescript +.step('edit-schema', { + agent: 'impl', + task: 'Edit packages/web/lib/db/schema.ts ...', + verification: { type: 'exit_code' }, +}) +.step('verify-schema', { + type: 'deterministic', + dependsOn: ['edit-schema'], + command: `if git diff --quiet packages/web/lib/db/schema.ts; then echo "NOT MODIFIED"; exit 1; fi +grep "my_new_table" packages/web/lib/db/schema.ts >/dev/null && echo OK`, + failOnError: true, +}) +``` + +What to verify: +- File was actually modified (`git diff --quiet` returns non-zero). +- Key content exists (grep for table names, function names, imports). +- For new files: `verification: { type: 'file_exists', value: '...' }`. + +What NOT to verify: +- Exact content (too brittle — agents format differently). +- Line counts (meaningless). + +## Lead + workers (interactive team) + +When a task touches multiple files and may need iteration, use a **lead + workers team on a shared channel** rather than a sequential DAG of one-shot agents. The lead coordinates, reviews, and posts feedback; workers implement and iterate. + +```typescript +.agent('lead', { + cli: 'claude', + preset: 'lead', + role: 'Architect and reviewer — assigns work, reviews diffs, posts feedback', +}) +.agent('impl-new', { + cli: 'codex', + role: 'Creates new files. Listens on channel for assignments + feedback.', + // No preset — interactive, receives channel messages +}) +.agent('impl-modify', { + cli: 'codex', + role: 'Edits existing files. Listens on channel.', +}) + +// All three share the same dependsOn — they start concurrently +.step('lead-coordinate', { + agent: 'lead', + dependsOn: ['context'], + task: `You are the lead on #channel. Workers: impl-new, impl-modify. +Post the plan. Assign files. Review their work. Post feedback. +Workers iterate. Exit when all files are correct.`, +}) +.step('impl-new-work', { + agent: 'impl-new', + dependsOn: ['context'], // same dep as lead = parallel start + task: 'You are impl-new on #channel. Wait for the lead\'s plan. Create files as assigned.', +}) +.step('impl-modify-work', { + agent: 'impl-modify', + dependsOn: ['context'], + task: 'You are impl-modify on #channel. Wait for plan. Edit as assigned.', +}) + +// Downstream gates on lead — lead exits when satisfied +.step('verify', { type: 'deterministic', dependsOn: ['lead-coordinate'], ... }) +``` + +**Key behaviors:** +- **Workers self-organize from channel context.** They read each other's completion messages and start dependent work without waiting for the lead to relay. +- **Lead-as-reviewer is more efficient than a separate reviewer agent.** The lead reads actual files and runs typecheck between rounds. +- **No feedback loop needed = fast path.** If workers get it right first try, the interactive pattern completes just as fast as one-shot. + +When to use interactive team vs one-shot DAG: + +| Scenario | Pattern | +|----------|---------| +| 4+ files, likely needs iteration | Interactive team | +| Simple edits, well-specified | One-shot DAG with `preset: 'worker'` | +| Cross-agent review feedback loop | Interactive team | +| Independent tasks, no coordination | Fan-out with non-interactive workers | + +## Multi-file edit pattern (one file per step) + +Agents reliably edit 1-2 files per step but fail on 4+. Split multi-file edits into **one agent step per file**, each with a deterministic verify gate: + +```typescript +.step('read-types', { + type: 'deterministic', + command: 'cat src/types.ts', + captureOutput: true, +}) +.step('edit-types', { + agent: 'impl', + dependsOn: ['read-types'], + task: `Edit src/types.ts. Current contents: +{{steps.read-types.output}} +Add 'pending' to the Status union. Only edit this one file.`, + verification: { type: 'exit_code' }, +}) +.step('verify-types', { + type: 'deterministic', + dependsOn: ['edit-types'], + command: `if git diff --quiet src/types.ts; then echo "NOT MODIFIED"; exit 1; fi; echo OK`, + failOnError: true, +}) + +.step('read-service', { + type: 'deterministic', + dependsOn: ['verify-types'], + command: 'cat src/service.ts', + captureOutput: true, +}) +.step('edit-service', { ... }) +.step('verify-service', { ... }) + +.step('commit', { + type: 'deterministic', + dependsOn: ['verify-service'], + command: 'git add src/types.ts src/service.ts && git commit -m "feat: add pending status"', +}) +``` + +**Rules:** +- Read the file in a deterministic step **right before** the edit (not all files upfront — agent sees stale state if main moved). +- Tell the agent "Only edit this one file" to prevent it touching others. +- Verify with `git diff --quiet` after each edit — fail fast if the agent didn't write. +- Always commit with a deterministic step, never an agent step. + +## Cross-repo workflows + +When the unit of work spans two repos (e.g. a feature that touches both `sage` and `cloud/infra`), the master creates a sibling worktree for the second repo and orchestrates work across both: + +```typescript +const CLOUD_WORKTREE = '../cloud-my-feature'; + +// Wave 0: ensure both repos are ready +.step('setup-cloud-worktree', { + type: 'deterministic', + command: `if [ ! -d "${CLOUD_WORKTREE}" ]; then + git -C ../cloud worktree add -B feat/my-change "${CLOUD_WORKTREE}" origin/main + fi`, +}) + +// Wave 1: parallel work on each repo +.step('wave1-cloud-change', { + type: 'deterministic', + dependsOn: ['setup-cloud-worktree'], + command: `cd ${CLOUD_WORKTREE} && agent-relay run $OLDPWD/workflows/01-cloud.ts`, +}) +.step('wave1-sage-change', { + type: 'deterministic', + dependsOn: ['install-deps'], + command: 'agent-relay run workflows/02-sage.ts', +}) + +// Final: cross-repo commit + push + two linked PRs +.step('open-cloud-pr', { + type: 'deterministic', + dependsOn: ['wave1-cloud-change'], + command: `cd ${CLOUD_WORKTREE} && gh pr create --title '...' --body '...'`, +}) +.step('open-sage-pr', { ... }) +``` + +**Gotchas:** +- Don't touch the user's main `../cloud` checkout — always create a sibling worktree. +- Sibling repos linked as dependencies? See [Setup helpers → applySiblingLinks](/docs/workflows-setup-helpers#applysiblinglinks) to avoid "agents see stale interfaces" bugs. +- Each repo gets its own PR; merge order depends on dependency direction (infra usually first). + +## Supervisor pattern + +Use `.pattern('supervisor')` (or `hub-spoke` / `fan-out`) when workers need oversight — the runner auto-assigns a supervisor agent as owner for worker steps. The supervisor monitors progress, nudges idle workers, and issues `OWNER_DECISION`. + +| Use case | Pattern | Why | +|----------|---------|-----| +| Sequential, no monitoring | `pipeline` | Simple, no overhead | +| Workers need oversight | `supervisor` | Auto-owner monitors | +| Local/small models | `supervisor` | Supervisor catches stuck workers | +| All non-interactive | `pipeline` or `dag` | No PTY = no supervision needed | + +Auto-hardening only activates for hub patterns (`supervisor`, `hub-spoke`, `fan-out`) — not `pipeline` or `dag`. + +## Parallelism: waves and fan-out + +Two workflows or two steps can run in parallel if they don't conflict on files. Heuristics: + +| Touch zone | Can parallelize? | +|---|---| +| Different `packages/*/src/` dirs | ✅ Yes | +| Different `app/` routes | ✅ Yes | +| Same package, different subdirs | ⚠️ Usually yes | +| Same files (shared config, root `package.json`) | ❌ No | +| Explicit dep | ❌ No — ordered waves | + +Fan out by having multiple steps share the same `dependsOn`: + +```typescript +// BAD — unnecessary sequential chain +.step('fix-component-a', { dependsOn: ['review'] }) +.step('fix-component-b', { dependsOn: ['fix-component-a'] }) // why wait? + +// GOOD — parallel fan-out, merge at the end +.step('fix-component-a', { agent: 'impl-1', dependsOn: ['review'] }) +.step('fix-component-b', { agent: 'impl-2', dependsOn: ['review'] }) // parallel +.step('verify-all', { agent: 'reviewer', dependsOn: ['fix-component-a', 'fix-component-b'] }) +``` + +Cap `maxConcurrency` at **4-6**. Spawning 10+ agents simultaneously causes broker timeouts. + +## See also + +- [Setup helpers](/docs/workflows-setup-helpers) — repo-setup conventions and `applySiblingLinks`. +- [Common mistakes](/docs/workflows-common-mistakes) — the bugs that break these patterns in practice. +- [Builder API reference](/docs/reference-workflows) — every method on `workflow()`. diff --git a/web/content/docs/workflows-quickstart.mdx b/web/content/docs/workflows-quickstart.mdx new file mode 100644 index 000000000..102e2c11c --- /dev/null +++ b/web/content/docs/workflows-quickstart.mdx @@ -0,0 +1,165 @@ +--- +title: 'Workflows quickstart' +description: 'A working workflow in 5 minutes: plan, implement, test, verify, commit.' +--- + +This page walks through building and running a real workflow end-to-end. By the end, you'll have a workflow that plans a small feature with a lead agent, implements it with a worker agent, runs tests, and commits — only if everything passes. + +## Prerequisites + +- `@agent-relay/cli` installed globally: `npm i -g @agent-relay/cli` +- `@agent-relay/sdk` installed in the target repo: `npm i @agent-relay/sdk` +- One CLI installed and authenticated: `claude`, `codex`, `gemini`, `aider`, or `goose` + +Check: `agent-relay --version`. + +## 1. Create the workflow file + +```bash +mkdir -p workflows +``` + +`workflows/add-greeting.ts`: + +```typescript +import { workflow } from '@agent-relay/sdk/workflows'; +import { ClaudeModels, CodexModels } from '@agent-relay/config'; + +async function main() { + const result = await workflow('add-greeting') + .description('Add a greeting function + tests') + .pattern('dag') + .channel('wf-add-greeting') + .maxConcurrency(3) + .timeout(900_000) + + .agent('lead', { + cli: 'claude', + model: ClaudeModels.SONNET, + preset: 'lead', + role: 'Plans the shape of the greeting function and tests', + retries: 1, + }) + .agent('impl', { + cli: 'codex', + model: CodexModels.GPT_5_4, + role: 'Writes src/greeting.ts and its tests', + retries: 2, + }) + + .step('plan', { + agent: 'lead', + task: `Post a short plan for: + src/greeting.ts — export greet(name: string): string + returns "Hello, !"; empty name -> "Hello, friend!" + test/greeting.test.ts — vitest covering both cases + +Keep plan to 5 bullets.`, + }) + + .step('write-code', { + agent: 'impl', + dependsOn: ['plan'], + task: 'Create src/greeting.ts per the lead plan. Only this file.', + verification: { type: 'file_exists', value: 'src/greeting.ts' }, + }) + + .step('write-tests', { + agent: 'impl', + dependsOn: ['write-code'], + task: 'Create test/greeting.test.ts per the lead plan. Only this file.', + verification: { type: 'file_exists', value: 'test/greeting.test.ts' }, + }) + + .step('run-tests', { + type: 'deterministic', + dependsOn: ['write-tests'], + command: 'npx vitest run test/greeting.test.ts 2>&1 | tail -30', + captureOutput: true, + failOnError: true, + }) + + .step('commit', { + type: 'deterministic', + dependsOn: ['run-tests'], + command: 'git add src/greeting.ts test/greeting.test.ts && git commit -m "feat: add greet()"', + failOnError: true, + }) + + .run({ cwd: process.cwd() }); + + console.log('Workflow status:', result.status); + if (result.status !== 'completed') process.exit(1); +} + +main().catch((e) => { console.error(e); process.exit(1); }); +``` + +## 2. Dry-run to validate + +Always dry-run before a real run — catches typos, missing dependencies, and invalid patterns before you spend agent time: + +```bash +agent-relay run --dry-run workflows/add-greeting.ts +``` + +You should see: + +``` +Validation: PASS (0 errors, 0 warnings) + +Execution Plan (5 steps, 5 waves): + Wave 1: plan (lead) + Wave 2: write-code (impl) + Wave 3: write-tests (impl) + Wave 4: run-tests (undefined) + Wave 5: commit (undefined) +``` + +## 3. Run it + +```bash +agent-relay run workflows/add-greeting.ts +``` + +You'll see the lead agent post a plan on the workflow channel, the worker implement each file in sequence, vitest run against the fresh code, and the commit land only after tests pass. + +## 4. What just happened + +- **`workflow('add-greeting')`** — named the run. Log files + trajectories use this name. +- **`.pattern('dag')`** — steps run in dependency order; independent steps parallelize. +- **`.agent(...)`** — declares the CLIs and their roles. The `preset: 'lead'` gives the lead an interactive shell so it can post to the channel; workers are non-interactive by default. +- **`.step(...)` with `agent: ...`** — spawns the named CLI with the task prompt. The agent runs in the workflow's cwd and can edit files. +- **`type: 'deterministic'`** — pure shell command. No LLM. Use these for reads, tests, builds, commits. +- **`verification`** — gates the step. `file_exists` for creation; `exit_code` for edits; `output_contains` for marker-based checks. +- **`failOnError: true`** — a failed deterministic step stops the whole workflow. + +## 5. Run it in the cloud + +The same workflow file runs unchanged in a Daytona sandbox: + +```bash +# IMPORTANT: `git add` the workflow file first. `--sync-code` tarballs +# git-tracked paths with working-tree contents. Untracked files are +# silently excluded. `git add` is enough — commit is optional. +git add workflows/add-greeting.ts +agent-relay cloud run workflows/add-greeting.ts --sync-code + +# ...note the run ID printed, then: +agent-relay cloud logs --follow +agent-relay cloud sync # pull the produced diff back locally +``` + +Almost every cloud run should pass `--sync-code` — without it the sandbox has no code at all (there's no fallback clone from `origin`). See [Cloud commands → `--sync-code`](/docs/cli-cloud-commands#-sync-code-uploading-your-local-repo) for the full sync semantics. + +## 6. Things to try next + +- Add a **reviewer step** that reads the diff and posts either `LGTM` or `BLOCKERS:`. Gate the commit behind it. +- Swap `pattern('dag')` for `pattern('supervisor')` and see the auto-spawned supervisor monitor the workers. +- Break `write-code` and `write-tests` into a **team** by giving them a shared channel and having the lead post feedback between rounds. See [Patterns → Lead + workers](/docs/workflows-patterns#lead--workers). + +## Full reference + +- [Builder API](/docs/reference-workflows) — every method on `workflow()`. +- [YAML alternative](/docs/reference-workflows#yaml-shape) — same concepts, config-driven. +- [Common mistakes](/docs/workflows-common-mistakes) — bugs everyone hits once. diff --git a/web/content/docs/workflows-setup-helpers.mdx b/web/content/docs/workflows-setup-helpers.mdx new file mode 100644 index 000000000..b918339a1 --- /dev/null +++ b/web/content/docs/workflows-setup-helpers.mdx @@ -0,0 +1,168 @@ +--- +title: 'Setup helpers' +description: 'Shared setup steps for workflows — branch checkout, install, build, sibling linking. Keep boilerplate out of each workflow file.' +--- + +Every workflow that produces code needs roughly the same prelude: checkout a branch, install deps, maybe build a shared package or two. Without a shared helper, the first workflow that adds a new prerequisite (e.g. "build the platform package because its types point at `dist/`") only fixes itself — every other workflow silently misses it. + +This page covers two conventions that compose well: + +1. **Per-repo setup helper** — one file per consumer repo that adds `setup-branch` + `install-deps` steps. +2. **`applySiblingLinks`** — the SDK helper that links sibling-repo packages into the workflow's working directory so agents see the real, current interface instead of whatever's on npm. + +## Per-repo setup helper + +Put shared prelude in `workflows/lib/-setup.ts`. Every workflow in that repo calls it. + +```typescript +// workflows/lib/my-repo-setup.ts +export interface MyRepoSetupOptions { + branch: string; + committerName?: string; + extraSetupCommands?: string[]; + skipWorkspaceBuild?: boolean; +} + +interface StepChain { + step: (name: string, cfg: unknown) => StepChain; +} + +export function applyMyRepoSetup(wf: T, opts: MyRepoSetupOptions): T { + const committerName = opts.committerName ?? 'My Workflow Bot'; + const setupBranchCommand = [ + 'set -e', + 'git config user.email "agent@my-org.local"', + `git config user.name ${JSON.stringify(committerName)}`, + `git checkout -B ${opts.branch}`, + ...(opts.extraSetupCommands ?? []), + ].join(' && '); + + const installCommand = opts.skipWorkspaceBuild + ? 'npm install --legacy-peer-deps --no-audit --no-fund 2>&1 | tail -10' + : [ + 'npm install --legacy-peer-deps --no-audit --no-fund 2>&1 | tail -10', + 'npm run build --workspaces --if-present 2>&1 | tail -20', + ].join(' && '); + + const chain = wf as unknown as StepChain; + chain + .step('setup-branch', { + type: 'deterministic', + command: setupBranchCommand, + captureOutput: true, + failOnError: true, + }) + .step('install-deps', { + type: 'deterministic', + dependsOn: ['setup-branch'], + command: installCommand, + captureOutput: true, + failOnError: true, + }); + + return wf; +} +``` + +Usage: + +```typescript +import { workflow } from '@agent-relay/sdk/workflows'; +import { applyMyRepoSetup } from './lib/my-repo-setup'; + +const baseWf = workflow(NAME) + .description('...') + .pattern('dag') + .agent('impl', { ... }); + +const wf = applyMyRepoSetup(baseWf, { + branch: 'feat/my-change', + committerName: 'Feature X Bot', +}); + +await wf + .step('read-spec', { type: 'deterministic', dependsOn: ['install-deps'], ... }) + // ... rest of the workflow + .run({ cwd: process.cwd() }); +``` + +**Rules:** +- The helper lives in the **consumer repo**, not the SDK. Different repos have different languages, package managers, build graphs. +- Pre-build any workspace package whose `package.json` `main`/`types` point at a generated `dist/`. Fresh sandboxes don't have that `dist/` yet; agents will invent workarounds (e.g. `external-modules.d.ts` shims) rather than run the build. +- Every install step includes `--legacy-peer-deps --no-audit --no-fund 2>&1 | tail -10` (or equivalent) because full install output blows past `captureOutput` size limits. + +## `applySiblingLinks` + +Ships as part of `@agent-relay/sdk/workflows`. Adds one deterministic step that links sibling-repo packages into the workflow's working directory so agents see the real, head-of-main interface — not whatever `npm install` resolved. + +**Problem it solves:** a workflow consumes `@my-org/some-package`. The producer repo has new exports on its `main` branch that aren't published to npm yet. Without linking, agents writing workflow code hit `has no exported member 'newThing'` and — rather than stop — reach for `declare module` augmentations or fallback implementations. The workflow ships code that can't integrate with the real package once it publishes. + +```typescript +import { workflow, applySiblingLinks } from '@agent-relay/sdk/workflows'; + +const baseWf = workflow('my-feature').pattern('dag').agent('impl', { ... }); + +const wf = applySiblingLinks(baseWf, { + dependsOn: ['install-deps'], + links: [ + { + name: '@my-org/some-package', + path: '../some-repo/packages/some-package', + expect: ['newThing', 'anotherThing'], + }, + { + name: 'my_python_pkg', + path: '../py-repo/src/my_python_pkg', + expect: ['classify_signal'], + }, + ], +}); +``` + +**Auto-detect by manifest:** + +| Manifest in sibling | Mechanism | +|---|---| +| `package.json` | `npm link` (symlinks into `node_modules` — committed files untouched) | +| `pyproject.toml` / `setup.py` / `setup.cfg` | `uv pip install --system -e` (falls back to `pip` / `pip3`) | + +**Fail-fast on:** +- Missing sibling path. +- Unknown manifest (none of the supported files present). +- Link command failure. +- Missing expected export (post-link smoke test via `node --input-type=module` or `python3 -c`). + +The `expect` array is the final guardrail. Even if the link command succeeded, if the real interface doesn't match what the workflow expects, the step fails before any agent writes a line. + +### When to use it + +Use `applySiblingLinks` whenever your workflow consumes a package that: +- Lives in a sibling repo (monorepo-of-repos setup). +- Has changes on its `main` branch that may not be published yet. +- Your workflow targets head-of-main semantics, not published semantics. + +Don't use it for third-party packages (no sibling worktree). Use it for your own org's packages that you're iterating on concurrently. + +### Options + +| Option | Default | Notes | +|---|---|---| +| `links` | (required) | Array of `{name, path, expect?}` | +| `stepName` | `'setup-sibling-links'` | Name of the emitted step | +| `dependsOn` | `['install-deps']` | Typically after install so `node_modules` exists | + +## Shell rules for setup helpers + +Setup helpers emit shell commands. A few hard-won rules from debugging them in Daytona sandboxes: + +- **Use `bash -c '...'` (single-quoted) for multi-line scripts.** Daytona's `/bin/sh` is dash, which doesn't support `${PIPESTATUS[0]}`. Bash with single-quoted outer prevents dash from expanding bash-only syntax. +- **Single-quote values in bash assignments** (`VAR='literal'`, not `VAR="literal"`). Double-quoted strings still expand `$VAR`, `$(cmd)`, and backticks — which is fine for tokens you control but dangerous for user-supplied paths / names. +- **`set -euo pipefail` at the top.** Fail on unset vars, fail on any pipe stage failure. +- **`2>&1 | tail -N` on noisy installers.** Full npm/pip output blows past `captureOutput` size limits; tail keeps enough to diagnose. + +See the SDK source for [`applySiblingLinks`](https://github.com/AgentWorkforce/relay/blob/main/packages/sdk/src/workflows/sibling-links.ts) for a reference implementation that handles all of these. + +## See also + +- [Patterns](/docs/workflows-patterns) — how these helpers plug into real workflow shapes. +- [Common mistakes](/docs/workflows-common-mistakes) — shell-quoting pitfalls, hook-related failures. diff --git a/web/lib/docs-nav.ts b/web/lib/docs-nav.ts index 1dcc38027..cc8f2ae85 100644 --- a/web/lib/docs-nav.ts +++ b/web/lib/docs-nav.ts @@ -1,6 +1,12 @@ export interface NavItem { title: string; slug: string; + /** + * Optional nested items rendered as an indented sub-list beneath this + * item. Used to group related pages (e.g. all messaging primitives under + * "Message") without creating a separate top-level nav group. + */ + children?: NavItem[]; } export interface NavGroup { @@ -14,28 +20,48 @@ export const docsNav: NavGroup[] = [ items: [ { title: 'Introduction', slug: 'introduction' }, { title: 'Quickstart', slug: 'quickstart' }, + { title: 'Spawning an agent', slug: 'spawning-an-agent' }, + { title: 'Event handlers', slug: 'event-handlers' }, ], }, { - title: 'Basics', + title: 'Primitives', items: [ - { title: 'Spawning an agent', slug: 'spawning-an-agent' }, - { title: 'Sending messages', slug: 'sending-messages' }, - { title: 'Event handlers', slug: 'event-handlers' }, - { title: 'Channels', slug: 'channels' }, - { title: 'DMs', slug: 'dms' }, - { title: 'Threads', slug: 'threads' }, - { title: 'Emoji reactions', slug: 'emoji-reactions' }, - { title: 'File sharing', slug: 'file-sharing' }, - { title: 'Authentication', slug: 'authentication' }, - { title: 'Permissions', slug: 'permissions' }, - { title: 'Scheduling', slug: 'scheduling' }, + { + title: 'Message', + slug: 'sending-messages', + children: [ + { title: 'Channels', slug: 'channels' }, + { title: 'DMs', slug: 'dms' }, + { title: 'Threads', slug: 'threads' }, + { title: 'Emoji reactions', slug: 'emoji-reactions' }, + ], + }, + { title: 'File', slug: 'file-sharing' }, + { + title: 'Auth', + slug: 'authentication', + children: [{ title: 'Permissions', slug: 'permissions' }], + }, + { title: 'Schedule', slug: 'scheduling' }, + ], + }, + { + title: 'Workflows', + items: [ + { title: 'Introduction', slug: 'workflows-introduction' }, + { title: 'Quickstart', slug: 'workflows-quickstart' }, + { title: 'Builder API', slug: 'reference-workflows' }, + { title: 'Patterns', slug: 'workflows-patterns' }, + { title: 'Setup helpers', slug: 'workflows-setup-helpers' }, + { title: 'GitHub primitive', slug: 'github-primitive' }, + { title: 'Common mistakes', slug: 'workflows-common-mistakes' }, + { title: 'Run from CLI', slug: 'cli-workflows' }, ], }, { title: 'Advanced', items: [ - { title: 'Workflows', slug: 'reference-workflows' }, { title: 'Cloud', slug: 'cloud' }, { title: 'Workforce', slug: 'workforce' }, ], @@ -54,7 +80,6 @@ export const docsNav: NavGroup[] = [ { title: 'Broker lifecycle', slug: 'cli-broker-lifecycle' }, { title: 'Agent management', slug: 'cli-agent-management' }, { title: 'Messaging', slug: 'cli-messaging' }, - { title: 'Run workflows', slug: 'cli-workflows' }, { title: 'Cloud commands', slug: 'cli-cloud-commands' }, { title: 'On the relay', slug: 'cli-on-the-relay' }, { title: 'CLI reference', slug: 'reference-cli' }, @@ -79,9 +104,19 @@ export const docsNav: NavGroup[] = [ }, ]; +/** Walk a NavItem tree and collect every slug (root + children). */ +function collectSlugs(items: NavItem[]): string[] { + const out: string[] = []; + for (const item of items) { + out.push(item.slug); + if (item.children) out.push(...collectSlugs(item.children)); + } + return out; +} + /** All doc slugs including hidden pages (for static generation + search) */ const ALL_SLUGS = [ - ...docsNav.flatMap((group) => group.items.map((item) => item.slug)), + ...docsNav.flatMap((group) => collectSlugs(group.items)), // Hidden from nav but still routable 'communicate', 'communicate-ai-sdk', @@ -94,7 +129,6 @@ const ALL_SLUGS = [ 'communicate-crewai', 'local-mode', 'reference-openclaw', - 'reference-workflows', ]; /** Flat list of all doc slugs for static generation */