From 90e13936c1914d5876a3bc4319368ae9f07de0d1 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 10 Apr 2026 20:24:49 +0200 Subject: [PATCH 1/2] feat: add meta workflows for --agent flag implementation Add 5 phase meta workflows for implementing the --agent flag feature: - Phase 1: SDK persona resolution (resolvePersonaByIdOrIntent, derivePreset, derivePattern) - Phase 2: Workflow generator module - Phase 3: CLI --agent flag integration - Phase 4: Unit and integration tests - Phase 5: Documentation Each phase includes spec and implementation workflow files. --- build-plans/00-meta-workflow-coordinator.ts | 248 +++ build-plans/01-sdk-persona-resolution.spec.md | 509 +++++ build-plans/01-sdk-persona-resolution.ts | 285 +++ build-plans/02-workflow-generator.spec.md | 975 +++++++++ build-plans/02-workflow-generator.ts | 341 +++ build-plans/03-cli-agent-flag.spec.md | 939 ++++++++ build-plans/03-cli-agent-flag.ts | 429 ++++ build-plans/04-tests.spec.md | 1916 +++++++++++++++++ build-plans/04-tests.ts | 410 ++++ build-plans/05-documentation.spec.md | 706 ++++++ build-plans/05-documentation.ts | 470 ++++ 11 files changed, 7228 insertions(+) create mode 100644 build-plans/00-meta-workflow-coordinator.ts create mode 100644 build-plans/01-sdk-persona-resolution.spec.md create mode 100644 build-plans/01-sdk-persona-resolution.ts create mode 100644 build-plans/02-workflow-generator.spec.md create mode 100644 build-plans/02-workflow-generator.ts create mode 100644 build-plans/03-cli-agent-flag.spec.md create mode 100644 build-plans/03-cli-agent-flag.ts create mode 100644 build-plans/04-tests.spec.md create mode 100644 build-plans/04-tests.ts create mode 100644 build-plans/05-documentation.spec.md create mode 100644 build-plans/05-documentation.ts diff --git a/build-plans/00-meta-workflow-coordinator.ts b/build-plans/00-meta-workflow-coordinator.ts new file mode 100644 index 000000000..75e80eb84 --- /dev/null +++ b/build-plans/00-meta-workflow-coordinator.ts @@ -0,0 +1,248 @@ +/** + * Meta Workflow Coordinator + * + * This workflow coordinates the creation of all Phase 1-5 meta workflows + * for the --agent flag feature by spawning Claude and Codex sub-agents. + */ + +import { workflow } from '@agent-relay/sdk/workflows'; + +const WORKFLOWS_DIR = 'workflows/meta-agent-flag'; + +async function main() { + const result = await workflow('meta-workflow-coordinator') + .description('Coordinate creation of --agent flag meta workflows') + .pattern('pipeline') + .channel('wf-meta-workflow-coordinator') + .maxConcurrency(1) + .timeout(3_600_000) + + .agent('claude-designer', { + cli: 'claude', + preset: 'worker', + role: 'Writes specification documents for workflow implementation phases', + retries: 1, + }) + + .agent('codex-implementer', { + cli: 'codex', + preset: 'worker', + role: 'Implements TypeScript workflow files from specifications', + retries: 2, + }) + + // Phase 1: Design Phase 1 spec + .step('design-phase-1', { + agent: 'claude-designer', + task: `Write a specification document for Phase 1 of the --agent flag feature. + +Read the design plan at: /Users/khaliqgant/Projects/AgentWorkforce/workforce/workflows/investigation/design-plan.md + +Write the spec to: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/01-sdk-persona-resolution.spec.md + +The spec should include: +- Goal: Add resolvePersonaByIdOrIntent(), derivePreset(), derivePattern() to SDK +- Files to create: packages/sdk/src/workflows/persona-utils.ts, packages/sdk/src/workflows/__tests__/persona-utils.test.ts +- Key implementation details from the design plan +- TypeScript interfaces for WorkflowGeneratorInput, PersonaSelection +- Dependencies: none (Phase 1) + +Format as markdown with code examples.`, + verification: { type: 'exit_code' }, + retries: 1, + }) + + // Phase 2: Design Phase 2 spec + .step('design-phase-2', { + agent: 'claude-designer', + dependsOn: ['design-phase-1'], + task: `Write a specification document for Phase 2 of the --agent flag feature. + +Read the design plan at: /Users/khaliqgant/Projects/AgentWorkforce/workforce/workflows/investigation/design-plan.md + +Write the spec to: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/02-workflow-generator.spec.md + +The spec should include: +- Goal: Create workflow generator module in packages/sdk/src/ +- Files to create: packages/sdk/src/workflow-generator.ts, __tests__/workflow-generator.test.ts +- Key functions: generateWorkflow(), emitBootstrapPhase(), emitSkillPhase(), emitContextPhase(), emitTaskPhase(), emitVerificationPhase(), emitFinalPhase() +- WorkflowGeneratorInput interface +- Dependencies: Phase 1 (persona-utils) + +Format as markdown with code examples.`, + verification: { type: 'exit_code' }, + retries: 1, + }) + + // Phase 3: Design Phase 3 spec + .step('design-phase-3', { + agent: 'claude-designer', + dependsOn: ['design-phase-2'], + task: `Write a specification document for Phase 3 of the --agent flag feature. + +Read the design plan at: /Users/khaliqgant/Projects/AgentWorkforce/workforce/workflows/investigation/design-plan.md + +Write the spec to: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/03-cli-agent-flag.spec.md + +The spec should include: +- Goal: Wire --agent flag into CLI +- Files to modify: packages/sdk/src/workflows/cli.ts +- CLI flag parsing for --agent, --profile, --tier, --dry-run, --context, --verify, --output, --concurrency, --timeout +- Default context file heuristics per intent +- ResolvePersonaByIdOrIntent integration +- Dependencies: Phase 2 (workflow-generator) + +Format as markdown with code examples.`, + verification: { type: 'exit_code' }, + retries: 1, + }) + + // Phase 4: Design Phase 4 spec + .step('design-phase-4', { + agent: 'claude-designer', + dependsOn: ['design-phase-3'], + task: `Write a specification document for Phase 4 of the --agent flag feature. + +Read the design plan at: /Users/khaliqgant/Projects/AgentWorkforce/workforce/workflows/investigation/design-plan.md + +Write the spec to: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/04-tests.spec.md + +The spec should include: +- Goal: Unit and integration tests for all new SDK functions +- Files to create: packages/sdk/src/workflows/__tests__/persona-utils.test.ts, workflow-generator.test.ts, workflow-generator.integration.test.ts +- Test cases for resolvePersonaByIdOrIntent with all 13 persona IDs and intents +- Test cases for derivePreset and derivePattern +- Snapshot tests comparing generated workflows against reference workflows +- Dependencies: Phase 3 (CLI integration) + +Format as markdown with code examples.`, + verification: { type: 'exit_code' }, + retries: 1, + }) + + // Phase 5: Design Phase 5 spec + .step('design-phase-5', { + agent: 'claude-designer', + dependsOn: ['design-phase-4'], + task: `Write a specification document for Phase 5 of the --agent flag feature. + +Read the design plan at: /Users/khaliqgant/Projects/AgentWorkforce/workforce/workflows/investigation/design-plan.md + +Write the spec to: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/05-documentation.spec.md + +The spec should include: +- Goal: Documentation for the --agent flag feature +- Files to modify: README.md, CLI help text in cli.ts +- Usage examples for: + 1. Basic: agent-relay run "task" --agent + 2. With context: agent-relay run "task" --agent --context file1 --context file2 + 3. Dry run: agent-relay run "task" --agent --dry-run +- SDK exports documentation +- Dependencies: Phase 4 (tests) + +Format as markdown with code examples.`, + verification: { type: 'exit_code' }, + retries: 1, + }) + + // Now implement all meta workflows using codex + .step('implement-phase-1', { + agent: 'codex-implementer', + dependsOn: ['design-phase-1'], + task: `Implement the Phase 1 meta workflow from the spec. + +Read the spec at: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/01-sdk-persona-resolution.spec.md + +Create the file: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/01-sdk-persona-resolution.ts + +This should be a complete agent-relay workflow file that: +1. Uses the workflow builder API: const { workflow } = require('@agent-relay/sdk/workflows') +2. Creates packages/sdk/src/workflows/persona-utils.ts with the SDK functions +3. Creates unit tests +4. Follows the existing SDK code patterns + +Use .step() for deterministic steps and .agent() for agent steps.`, + verification: { type: 'exit_code' }, + retries: 2, + }) + + .step('implement-phase-2', { + agent: 'codex-implementer', + dependsOn: ['design-phase-2', 'implement-phase-1'], + task: `Implement the Phase 2 meta workflow from the spec. + +Read the spec at: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/02-workflow-generator.spec.md + +Create the file: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/02-workflow-generator.ts + +This should be a complete agent-relay workflow file that creates: +1. packages/sdk/src/workflow-generator.ts - the main generator module +2. Template functions for each phase (bootstrap, skills, context, task, verification, final) + +The generator should output complete workflow .ts files.`, + verification: { type: 'exit_code' }, + retries: 2, + }) + + .step('implement-phase-3', { + agent: 'codex-implementer', + dependsOn: ['design-phase-3', 'implement-phase-2'], + task: `Implement the Phase 3 meta workflow from the spec. + +Read the spec at: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/03-cli-agent-flag.spec.md + +Create the file: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/03-cli-agent-flag.ts + +This should be a complete agent-relay workflow file that: +1. Modifies packages/sdk/src/workflows/cli.ts to add --agent flag parsing +2. Adds resolvePersonaByIdOrIntent integration +3. Adds default context heuristics +4. Handles --dry-run mode`, + verification: { type: 'exit_code' }, + retries: 2, + }) + + .step('implement-phase-4', { + agent: 'codex-implementer', + dependsOn: ['design-phase-4', 'implement-phase-3'], + task: `Implement the Phase 4 meta workflow from the spec. + +Read the spec at: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/04-tests.spec.md + +Create the file: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/04-tests.ts + +This should be a complete agent-relay workflow file that: +1. Creates unit tests for persona-utils +2. Creates unit tests for workflow-generator +3. Creates integration tests`, + verification: { type: 'exit_code' }, + retries: 2, + }) + + .step('implement-phase-5', { + agent: 'codex-implementer', + dependsOn: ['design-phase-5', 'implement-phase-4'], + task: `Implement the Phase 5 meta workflow from the spec. + +Read the spec at: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/05-documentation.spec.md + +Create the file: /Users/khaliqgant/Projects/AgentWorkforce/relay-workflows/${WORKFLOWS_DIR}/05-documentation.ts + +This should be a complete agent-relay workflow file that: +1. Updates README.md with --agent flag documentation +2. Adds usage examples +3. Updates CLI help text`, + verification: { type: 'exit_code' }, + retries: 2, + }) + + .onError('fail-fast') + .run({ cwd: '/Users/khaliqgant/Projects/AgentWorkforce/relay-workflows' }); + + console.log('Result:', result.status); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/build-plans/01-sdk-persona-resolution.spec.md b/build-plans/01-sdk-persona-resolution.spec.md new file mode 100644 index 000000000..176b3c91f --- /dev/null +++ b/build-plans/01-sdk-persona-resolution.spec.md @@ -0,0 +1,509 @@ +# Phase 1 Specification: SDK Persona Resolution Utilities + +> Add `resolvePersonaByIdOrIntent()`, `derivePreset()`, and `derivePattern()` to the relay SDK workflows package. + +**Phase:** 1 of 5 +**Dependencies:** None (foundational phase) +**Target files:** + +- `packages/sdk/src/workflows/persona-utils.ts` (new) +- `packages/sdk/src/workflows/__tests__/persona-utils.test.ts` (new) + +--- + +## Goal + +Provide a self-contained persona resolution layer inside `@agent-relay/sdk/workflows` that the `--agent` CLI flag and the workflow generator (Phase 2) can consume. The module must: + +1. Resolve a `--agent ` CLI value to a concrete intent, preset, and swarm pattern +2. Map persona IDs to intents (reverse lookup) and intents to persona IDs (forward lookup) +3. Derive an `AgentPreset` (`'worker'` | `'analyst'`) from any intent string +4. Derive a `SwarmPattern` (`'dag'` | `'pipeline'`) from any intent string +5. Operate without external dependencies -- pure functions + an in-memory registry + +--- + +## TypeScript Interfaces + +### WorkflowGeneratorInput + +This is the primary input to the Phase 2 workflow generator, defined here so that Phase 1 types are forward-compatible. + +```ts +import type { SwarmPattern, AgentPreset } from './types.js'; + +/** + * Complete input for the workflow generator (Phase 2). + * Phase 1 produces the persona-related fields; CLI parsing fills the rest. + */ +export interface WorkflowGeneratorInput { + // From CLI parsing + taskDescription: string; + workflowName: string; // slugified from taskDescription + + // From persona resolution (Phase 1) + persona: PersonaProfile; + selection: PersonaResolution; // includes intent, preset, pattern + skillPlan: SkillMaterializationPlan; + + // From CLI flags or defaults + contextFiles: ContextFileSpec[]; + verifications: VerificationSpec[]; + outputFile?: string; + maxConcurrency: number; // default: 4 + timeout: number; // default: 3_600_000 +} + +export interface ContextFileSpec { + /** Step name in the generated workflow, e.g. 'read-publish-yml' */ + stepName: string; + /** Shell command to capture output, e.g. 'cat .github/workflows/publish.yml' */ + command: string; +} + +export interface VerificationSpec { + /** Step name in the generated workflow, e.g. 'verify-no-npm-token' */ + stepName: string; + /** Shell command that must exit 0, e.g. 'grep -q "id-token" ...' */ + command: string; +} + +export interface SkillMaterializationPlan { + installs: Array<{ skillId: string; command: string }>; + manifestPath?: string; +} +``` + +### PersonaSelection + +```ts +/** + * Input for resolving a persona reference from the CLI. + */ +export interface PersonaSelection { + /** The --agent value: either a persona ID or an intent string. */ + ref: string; + /** Optional profile hint for disambiguation when multiple personas share an intent. */ + profile?: PersonaProfile; + /** Optional context for resolution. */ + context?: { + workflowType?: string; + taskType?: string; + }; +} +``` + +### PersonaResolution + +```ts +/** + * Result of persona resolution -- everything downstream needs to generate a workflow. + */ +export interface PersonaResolution { + /** The resolved persona profile, if found in the registry. */ + persona?: PersonaProfile; + /** The resolved intent string (always populated). */ + intent: string; + /** Derived preset: 'worker' or 'analyst'. */ + preset: AgentPreset; + /** Derived swarm pattern: 'dag' or 'pipeline'. */ + pattern: SwarmPattern; + /** Whether a concrete persona was found in the registry. */ + resolved: boolean; + /** How the ref was interpreted. */ + resolutionType: 'intent' | 'persona_id' | 'derived'; +} +``` + +### PersonaProfile + +```ts +/** + * A persona profile describes an agent's role, capabilities, and defaults. + */ +export interface PersonaProfile { + id: string; + name: string; + description?: string; + /** Primary intent this persona serves (e.g., 'review', 'debugging'). */ + intent?: string; + /** Default preset for agents with this persona. */ + preset?: AgentPreset; + /** Preferred swarm pattern for multi-agent coordination. */ + pattern?: SwarmPattern; + /** Skills this persona has. */ + skills?: string[]; + /** Additional metadata. */ + metadata?: Record; +} +``` + +### PersonaRegistry + +```ts +/** + * In-memory registry of known persona profiles. + * Built at SDK init time; supports O(1) lookups by ID and intent. + */ +export interface PersonaRegistry { + byId: Map; + byIntent: Map; + register(profile: PersonaProfile): void; + getById(id: string): PersonaProfile | undefined; + getByIntent(intent: string): string[]; + buildReverseMap(): Map; +} +``` + +--- + +## Constant Definitions + +### Analyst intents + +Intents that map to `preset: 'analyst'` (read-only analysis, no code modification): + +```ts +export const ANALYST_INTENTS = [ + 'review', + 'architecture-plan', + 'requirements-analysis', + 'security-review', + 'verification', + 'test-strategy', +] as const; +``` + +### Pipeline intents + +Intents that map to `pattern: 'pipeline'` (inherently sequential processing): + +```ts +export const PIPELINE_INTENTS = ['requirements-analysis', 'documentation', 'tdd-enforcement'] as const; +``` + +### Preset derivation table + +All 13 production persona intents from the design plan: + +| Intent | Preset | Rationale | +| ------------------------------- | --------- | ------------------------------ | +| `implement-frontend` | `worker` | Modifies UI code | +| `review` | `analyst` | Read-only analysis | +| `architecture-plan` | `analyst` | Produces plans, not code | +| `requirements-analysis` | `analyst` | Read-only analysis | +| `debugging` | `worker` | Modifies code to fix bugs | +| `security-review` | `analyst` | Read-only analysis | +| `documentation` | `worker` | Creates/modifies doc files | +| `verification` | `analyst` | Read-only evidence checking | +| `test-strategy` | `analyst` | Produces strategy, not code | +| `tdd-enforcement` | `worker` | May create/modify test files | +| `flake-investigation` | `worker` | Modifies code to fix flakes | +| `opencode-workflow-correctness` | `worker` | Modifies config/code | +| `npm-provenance` | `worker` | Modifies workflow/config files | + +**Rule:** If intent is in `ANALYST_INTENTS`, return `'analyst'`. Otherwise return `'worker'`. + +### Pattern derivation table + +| Intent | Pattern | Rationale | +| ----------------------- | ---------- | ------------------------------------------------------ | +| `requirements-analysis` | `pipeline` | Sequential: read -> analyze -> produce -> verify | +| `documentation` | `pipeline` | Sequential: read code -> write docs -> verify | +| `tdd-enforcement` | `pipeline` | Sequential red-green-refactor cycles | +| All other intents | `dag` | Parallel context reads + convergent analysis/execution | + +**Rule:** If intent is in `PIPELINE_INTENTS`, return `'pipeline'`. Otherwise return `'dag'`. + +--- + +## Function Specifications + +### `derivePreset(intent: string): AgentPreset` + +Pure function. Case-insensitive. Returns `'analyst'` for analyst intents, `'worker'` otherwise. + +```ts +export function derivePreset(intent: string): AgentPreset { + const normalized = intent.toLowerCase().trim(); + if (ANALYST_INTENTS.includes(normalized as AnalystIntent)) { + return 'analyst'; + } + return 'worker'; +} +``` + +### `derivePattern(intent: string): SwarmPattern` + +Pure function. Case-insensitive. Returns `'pipeline'` for pipeline intents, `'dag'` otherwise. + +```ts +export function derivePattern(intent: string): SwarmPattern { + const normalized = intent.toLowerCase().trim(); + if (PIPELINE_INTENTS.includes(normalized as PipelineIntent)) { + return 'pipeline'; + } + return 'dag'; +} +``` + +### `resolvePersonaByIdOrIntent(ref: string, profile?: PersonaProfile): PersonaResolution` + +Two-step lookup with derivation fallback: + +1. **Try as intent** -- check `personaRegistry.getByIntent(ref)`. If found, resolve using the first matching persona (or the profile hint if it matches). +2. **Try as persona ID** -- check `personaRegistry.getById(ref)`. If found, extract the intent from the persona. +3. **Fallback** -- treat `ref` as the intent, derive preset and pattern, set `resolved: false`. + +```ts +export function resolvePersonaByIdOrIntent(ref: string, profile?: PersonaProfile): PersonaResolution { + const normalized = ref.toLowerCase().trim(); + + // Step 1: Try as intent + const intentPersonaIds = personaRegistry.getByIntent(normalized); + if (intentPersonaIds.length > 0) { + const personaId = profile?.id && intentPersonaIds.includes(profile.id) ? profile.id : intentPersonaIds[0]; + const persona = personaRegistry.getById(personaId); + const intent = persona?.intent || normalized; + return { + persona, + intent, + preset: persona?.preset || derivePreset(intent), + pattern: persona?.pattern || derivePattern(intent), + resolved: true, + resolutionType: 'intent', + }; + } + + // Step 2: Try as persona ID + const persona = personaRegistry.getById(normalized); + if (persona) { + const intent = persona.intent || getPersonaIdToIntentMap().get(normalized) || normalized; + return { + persona, + intent, + preset: persona.preset || derivePreset(intent), + pattern: persona.pattern || derivePattern(intent), + resolved: true, + resolutionType: 'persona_id', + }; + } + + // Step 3: Fallback derivation + return { + persona: undefined, + intent: normalized, + preset: derivePreset(normalized), + pattern: derivePattern(normalized), + resolved: false, + resolutionType: 'derived', + }; +} +``` + +### Helper functions + +```ts +export function isAnalystIntent(intent: string): boolean; +export function isPipelineIntent(intent: string): boolean; +``` + +### Registry management + +```ts +export function initPersonaRegistry(profiles: PersonaProfile[]): void; +export function resetPersonaRegistry(): void; +export function getPersonaIdToIntentMap(): Map; +export const personaRegistry: PersonaRegistry; +``` + +--- + +## Default persona profiles + +The module initializes with 10 default profiles on import. These cover the standard workforce personas: + +| ID | Name | Intent | Preset | Pattern | +| ------------------------- | ----------------------- | ----------------------- | --------- | ---------- | +| `reviewer-v1` | Code Reviewer | `review` | `analyst` | `dag` | +| `reviewer-v2` | Senior Reviewer | `review` | `analyst` | `dag` | +| `architect-v1` | Architecture Planner | `architecture-plan` | `analyst` | `dag` | +| `requirements-analyst-v1` | Requirements Analyst | `requirements-analysis` | `analyst` | `pipeline` | +| `security-reviewer-v1` | Security Reviewer | `security-review` | `analyst` | `dag` | +| `verifier-v1` | Verification Specialist | `verification` | `analyst` | `dag` | +| `test-strategist-v1` | Test Strategist | `test-strategy` | `analyst` | `dag` | +| `docs-writer-v1` | Documentation Writer | `documentation` | `worker` | `pipeline` | +| `tdd-coach-v1` | TDD Coach | `tdd-enforcement` | `worker` | `pipeline` | +| `code-worker-v1` | Code Worker | `code-gen` | `worker` | `dag` | + +The registry is extensible -- consumers can call `personaRegistry.register()` to add custom profiles or `initPersonaRegistry()` to replace all defaults. + +--- + +## File: `packages/sdk/src/workflows/persona-utils.ts` + +### Structure + +``` +persona-utils.ts + ├── Intent constants (ANALYST_INTENTS, PIPELINE_INTENTS) + ├── Type exports (AnalystIntent, PipelineIntent) + ├── Interface definitions (PersonaProfile, PersonaSelection, PersonaResolution, PersonaRegistry) + ├── Forward-compatible types (WorkflowGeneratorInput, ContextFileSpec, VerificationSpec, SkillMaterializationPlan) + ├── Registry implementation (_registry singleton) + ├── Registry management (init, reset, getPersonaIdToIntentMap) + ├── Derivation functions (derivePreset, derivePattern) + ├── Query helpers (isAnalystIntent, isPipelineIntent) + ├── Resolution function (resolvePersonaByIdOrIntent) + ├── Convenience wrapper (resolvePersonaSelection) + ├── Default profiles (DEFAULT_PERSONA_PROFILES) + └── Auto-init (initPersonaRegistry called on import) +``` + +### Imports + +```ts +import type { AgentPreset } from './types.js'; +import type { SwarmPattern } from './types.js'; +``` + +Only depends on types already in the SDK's `workflows/types.ts`. No external dependencies. + +--- + +## File: `packages/sdk/src/workflows/__tests__/persona-utils.test.ts` + +### Test structure (vitest) + +```ts +import { describe, it, expect, beforeEach } from 'vitest'; +import { + derivePreset, + derivePattern, + resolvePersonaByIdOrIntent, + isAnalystIntent, + isPipelineIntent, + resetPersonaRegistry, + initPersonaRegistry, + DEFAULT_PERSONA_PROFILES, + ANALYST_INTENTS, + PIPELINE_INTENTS, + type PersonaProfile, +} from '../persona-utils.js'; +``` + +### Test cases + +#### `derivePreset` + +| Test | Input | Expected | +| ------------------ | ---------------------------------------------------------------------------------------------------------------------- | ---------------- | +| Analyst intents | `'review'`, `'architecture-plan'`, `'requirements-analysis'`, `'security-review'`, `'verification'`, `'test-strategy'` | `'analyst'` each | +| Case insensitivity | `'REVIEW'`, `'Security-Review'` | `'analyst'` each | +| Worker intents | `'code-gen'`, `'refactor'`, `'documentation'` | `'worker'` each | +| Unknown intents | `'unknown-intent'`, `''` | `'worker'` each | + +#### `derivePattern` + +| Test | Input | Expected | +| ------------------ | ----------------------------------------------------------------- | ----------------- | +| Pipeline intents | `'requirements-analysis'`, `'documentation'`, `'tdd-enforcement'` | `'pipeline'` each | +| Case insensitivity | `'DOCUMENTATION'`, `'TDD-Enforcement'` | `'pipeline'` each | +| DAG intents | `'review'`, `'code-gen'`, `'architecture-plan'` | `'dag'` each | +| Unknown intents | `'unknown-intent'`, `''` | `'dag'` each | + +#### `isAnalystIntent` / `isPipelineIntent` + +- All `ANALYST_INTENTS` entries return `true` for `isAnalystIntent` +- Non-analyst intents return `false` +- All `PIPELINE_INTENTS` entries return `true` for `isPipelineIntent` +- Non-pipeline intents return `false` + +#### `resolvePersonaByIdOrIntent` + +Each test uses `beforeEach` to reset and reinitialize the registry with `DEFAULT_PERSONA_PROFILES`. + +| Test group | Test | Input | Assertions | +| ------------------------- | --------------------------------- | -------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | +| **Intent resolution** | Resolve `'review'` | `resolvePersonaByIdOrIntent('review')` | `resolved: true`, `resolutionType: 'intent'`, `persona.id: 'reviewer-v1'`, `preset: 'analyst'`, `pattern: 'dag'` | +| | Resolve `'security-review'` | `resolvePersonaByIdOrIntent('security-review')` | `resolved: true`, `persona.id: 'security-reviewer-v1'`, `preset: 'analyst'` | +| | Resolve `'requirements-analysis'` | `resolvePersonaByIdOrIntent('requirements-analysis')` | `pattern: 'pipeline'`, `preset: 'analyst'` | +| | Resolve `'documentation'` | `resolvePersonaByIdOrIntent('documentation')` | `pattern: 'pipeline'`, `preset: 'worker'` | +| **Persona ID resolution** | Resolve `'reviewer-v2'` | `resolvePersonaByIdOrIntent('reviewer-v2')` | `resolved: true`, `resolutionType: 'persona_id'`, `persona.id: 'reviewer-v2'` | +| | Resolve `'architect-v1'` | `resolvePersonaByIdOrIntent('architect-v1')` | `intent: 'architecture-plan'`, `preset: 'analyst'` | +| **Fallback derivation** | Unknown ref | `resolvePersonaByIdOrIntent('unknown-persona')` | `resolved: false`, `resolutionType: 'derived'`, `preset: 'worker'`, `pattern: 'dag'` | +| **Profile hint** | Disambiguate | `resolvePersonaByIdOrIntent('review', { id: 'reviewer-v2', name: 'Senior Reviewer', intent: 'review' })` | `persona.id: 'reviewer-v2'` (not `reviewer-v1`) | +| **Case handling** | Uppercase persona ID | `resolvePersonaByIdOrIntent('REVIEWER-V1')` | `resolved: true`, `persona.id: 'reviewer-v1'` | +| | Mixed-case intent | `resolvePersonaByIdOrIntent('Security-Review')` | `resolved: true`, `resolutionType: 'intent'` | + +#### `DEFAULT_PERSONA_PROFILES` + +- All 10 expected persona IDs are present +- All profiles have valid `preset` values (`'lead' | 'worker' | 'reviewer' | 'analyst'`) +- All profiles have valid `pattern` values (any `SwarmPattern` value) + +--- + +## SDK Export Changes + +The new file must be re-exported from the workflows index. Add to `packages/sdk/src/workflows/index.ts`: + +```ts +export { + derivePreset, + derivePattern, + resolvePersonaByIdOrIntent, + resolvePersonaSelection, + isAnalystIntent, + isPipelineIntent, + personaRegistry, + initPersonaRegistry, + resetPersonaRegistry, + getPersonaIdToIntentMap, + DEFAULT_PERSONA_PROFILES, + ANALYST_INTENTS, + PIPELINE_INTENTS, + type PersonaProfile, + type PersonaSelection, + type PersonaResolution, + type PersonaRegistry, + type AnalystIntent, + type PipelineIntent, + type WorkflowGeneratorInput, + type ContextFileSpec, + type VerificationSpec, + type SkillMaterializationPlan, +} from './persona-utils.js'; +``` + +--- + +## Implementation Notes + +1. **No external dependencies.** The module only imports `AgentPreset` and `SwarmPattern` from the existing `types.ts` in the same package. No npm packages, no network calls, no filesystem access. + +2. **Registry is a module-level singleton.** The `_registry` object is created once on module load. `DEFAULT_PERSONA_PROFILES` is auto-registered via `initPersonaRegistry()` at the bottom of the file. Tests use `resetPersonaRegistry()` + `initPersonaRegistry()` in `beforeEach` for isolation. + +3. **Lazy reverse map.** The `personaIdToIntent` reverse map is built lazily on first `getPersonaIdToIntentMap()` call and cached. `resetPersonaRegistry()` clears the cache. + +4. **Case-insensitive matching.** All ref/intent values are normalized to lowercase via `.toLowerCase().trim()` before lookup. + +5. **Forward-compatible types.** `WorkflowGeneratorInput`, `ContextFileSpec`, `VerificationSpec`, and `SkillMaterializationPlan` are defined here but consumed in Phase 2. This prevents a Phase 2 dependency back into Phase 1 types. + +6. **Pattern follows existing codebase conventions.** The file uses ES module imports with `.js` extensions, `type` keyword for type-only imports, and JSDoc comments for exported functions -- matching the patterns in `builder.ts` and `types.ts`. + +--- + +## Acceptance Criteria + +- [ ] `derivePreset()` returns `'analyst'` for all 6 analyst intents and `'worker'` for all others +- [ ] `derivePattern()` returns `'pipeline'` for all 3 pipeline intents and `'dag'` for all others +- [ ] `resolvePersonaByIdOrIntent()` resolves all 10 default persona IDs correctly +- [ ] `resolvePersonaByIdOrIntent()` resolves all registered intents correctly +- [ ] `resolvePersonaByIdOrIntent()` falls back to derivation for unknown refs (does not throw) +- [ ] Profile hint disambiguates when multiple personas share an intent +- [ ] All lookups are case-insensitive +- [ ] `resetPersonaRegistry()` + `initPersonaRegistry()` provides test isolation +- [ ] All tests pass via `vitest` +- [ ] No new external dependencies introduced +- [ ] Types are exported and available to Phase 2 consumers diff --git a/build-plans/01-sdk-persona-resolution.ts b/build-plans/01-sdk-persona-resolution.ts new file mode 100644 index 000000000..3bb578185 --- /dev/null +++ b/build-plans/01-sdk-persona-resolution.ts @@ -0,0 +1,285 @@ +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { workflow } = require('@agent-relay/sdk/workflows'); + +const REPO_ROOT = '/Users/khaliqgant/Projects/AgentWorkforce/relay-workflows'; +const SPEC_PATH = 'workflows/meta-agent-flag/01-sdk-persona-resolution.spec.md'; +const SDK_WORKFLOWS_DIR = 'packages/sdk/src/workflows'; +const PERSONA_UTILS_PATH = `${SDK_WORKFLOWS_DIR}/persona-utils.ts`; +const PERSONA_TEST_PATH = `${SDK_WORKFLOWS_DIR}/__tests__/persona-utils.test.ts`; +const WORKFLOWS_INDEX_PATH = `${SDK_WORKFLOWS_DIR}/index.ts`; + +async function main() { + const wf = workflow('phase-1-sdk-persona-resolution') + .description('Implement SDK persona resolution utilities and tests from the Phase 1 meta-agent flag spec') + .pattern('dag') + .channel('wf-phase-1-sdk-persona-resolution') + .maxConcurrency(4) + .timeout(3_600_000) + .agent('sdk-implementer', { + cli: 'codex', + preset: 'worker', + role: 'Focused TypeScript SDK implementer for workflow utilities', + retries: 2, + }) + .agent('test-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused vitest test author for SDK workflow utilities', + retries: 2, + }) + .agent('sdk-reviewer', { + cli: 'codex', + preset: 'reviewer', + role: 'Reviews SDK utility implementation for spec conformance and regression risk', + retries: 1, + }); + + wf.step('guard-not-main', { + type: 'deterministic', + command: [ + 'branch="$(git branch --show-current)"', + 'if [ "$branch" = "main" ]; then echo "Refusing to run Phase 1 workflow on main"; exit 1; fi', + 'echo "Running on branch: ${branch:-detached}"', + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-spec', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${SPEC_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-sdk-types', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/types.ts`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-workflows-index', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${WORKFLOWS_INDEX_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-test-patterns', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/__tests__/template-resolver.test.ts`, + `sed -n '1,120p' ${SDK_WORKFLOWS_DIR}/__tests__/verification.test.ts`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('implement-persona-utils', { + agent: 'sdk-implementer', + dependsOn: ['read-spec', 'read-sdk-types'], + task: ` +You are implementing Phase 1 of the --agent flag feature. Do not use Relaycast MCP tools or spawn sub-agents. + +Create ${PERSONA_UTILS_PATH} from the Phase 1 spec. + +Spec: +{{steps.read-spec.output}} + +Relevant existing workflow SDK types: +{{steps.read-sdk-types.output}} + +Requirements: +1. Import only \`AgentPreset\` and \`SwarmPattern\` from \`./types.js\` using type imports. +2. Export ANALYST_INTENTS, PIPELINE_INTENTS, DEFAULT_PERSONA_PROFILES, all specified interfaces/types, registry helpers, derivation helpers, resolvePersonaByIdOrIntent(), and resolvePersonaSelection(). +3. Use a module-level in-memory PersonaRegistry with case-insensitive by-id and by-intent lookups. +4. Implement lazy getPersonaIdToIntentMap() cache invalidation on register/init/reset. +5. Auto-initialize the registry with the 10 default profiles on import. + +Only edit ${PERSONA_UTILS_PATH}. End your output with PERSONA_UTILS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'PERSONA_UTILS_DONE' }, + retries: 2, + }); + + wf.step('verify-persona-utils-file', { + type: 'deterministic', + dependsOn: ['implement-persona-utils'], + command: [ + `test -f ${PERSONA_UTILS_PATH}`, + `grep -q "export function derivePreset" ${PERSONA_UTILS_PATH}`, + `grep -q "export function derivePattern" ${PERSONA_UTILS_PATH}`, + `grep -q "export function resolvePersonaByIdOrIntent" ${PERSONA_UTILS_PATH}`, + `grep -q "export function resolvePersonaSelection" ${PERSONA_UTILS_PATH}`, + `grep -q "DEFAULT_PERSONA_PROFILES" ${PERSONA_UTILS_PATH}`, + `grep -q "reviewer-v1" ${PERSONA_UTILS_PATH}`, + `grep -q "code-worker-v1" ${PERSONA_UTILS_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('export-persona-utils', { + agent: 'sdk-implementer', + dependsOn: ['read-spec', 'read-workflows-index', 'verify-persona-utils-file'], + task: ` +You are exporting the Phase 1 persona utility public API. Do not use Relaycast MCP tools or spawn sub-agents. + +Update ${WORKFLOWS_INDEX_PATH} to re-export the persona-utils public API. + +Spec export block: +{{steps.read-spec.output}} + +Current index: +{{steps.read-workflows-index.output}} + +Requirements: +1. Preserve every existing export in ${WORKFLOWS_INDEX_PATH}. +2. Add the explicit export block from the "SDK Export Changes" section of the spec. +3. Export from \`./persona-utils.js\`. +4. Do not modify any other file. + +End your output with INDEX_EXPORT_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'INDEX_EXPORT_DONE' }, + retries: 2, + }); + + wf.step('verify-index-export', { + type: 'deterministic', + dependsOn: ['export-persona-utils'], + command: [ + `grep -q "persona-utils.js" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "resolvePersonaByIdOrIntent" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "WorkflowGeneratorInput" ${WORKFLOWS_INDEX_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('write-persona-utils-tests', { + agent: 'test-writer', + dependsOn: ['read-spec', 'read-test-patterns', 'verify-persona-utils-file'], + task: ` +You are writing Phase 1 persona utility tests. Do not use Relaycast MCP tools or spawn sub-agents. + +Create ${PERSONA_TEST_PATH} with focused vitest coverage for persona-utils. + +Spec: +{{steps.read-spec.output}} + +Existing test style examples: +{{steps.read-test-patterns.output}} + +Requirements: +1. Import from \`../persona-utils.js\` and use vitest describe/it/expect/beforeEach. +2. Cover derivePreset(), derivePattern(), isAnalystIntent(), and isPipelineIntent(). +3. Cover intent, persona ID, fallback, profile hint, uppercase ID, and mixed-case intent resolution. +4. Reset and initialize DEFAULT_PERSONA_PROFILES in beforeEach. +5. Validate all 10 default persona IDs plus preset/pattern validity. + +Only edit ${PERSONA_TEST_PATH}. End your output with PERSONA_TESTS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'PERSONA_TESTS_DONE' }, + retries: 2, + }); + + wf.step('verify-test-file', { + type: 'deterministic', + dependsOn: ['write-persona-utils-tests'], + command: [ + `test -f ${PERSONA_TEST_PATH}`, + `grep -q "derivePreset" ${PERSONA_TEST_PATH}`, + `grep -q "derivePattern" ${PERSONA_TEST_PATH}`, + `grep -q "resolvePersonaByIdOrIntent" ${PERSONA_TEST_PATH}`, + `grep -q "reviewer-v2" ${PERSONA_TEST_PATH}`, + `grep -q "requirements-analysis" ${PERSONA_TEST_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('run-persona-utils-tests', { + type: 'deterministic', + dependsOn: ['verify-index-export', 'verify-test-file'], + command: `npx vitest run ${PERSONA_TEST_PATH} --reporter=verbose`, + captureOutput: true, + failOnError: true, + }); + + wf.step('typecheck-sdk', { + type: 'deterministic', + dependsOn: ['run-persona-utils-tests'], + command: 'cd packages/sdk && npx tsc -p tsconfig.json --noEmit', + captureOutput: true, + failOnError: true, + }); + + wf.step('review-phase-1', { + agent: 'sdk-reviewer', + dependsOn: ['run-persona-utils-tests', 'typecheck-sdk'], + task: ` +You are reviewing Phase 1 persona resolution artifacts. Do not use Relaycast MCP tools or spawn sub-agents. + +Review the Phase 1 persona resolution implementation. Do not edit files. + +Spec: +{{steps.read-spec.output}} + +Check: +1. ${PERSONA_UTILS_PATH} has no external dependencies or filesystem access. +2. derivePreset(), derivePattern(), registry helpers, and resolvePersonaByIdOrIntent() match the spec. +3. ${WORKFLOWS_INDEX_PATH} re-exports the full persona-utils public API. +4. ${PERSONA_TEST_PATH} covers the required behaviors and isolation reset. +5. Focused vitest and SDK typecheck passed: +{{steps.run-persona-utils-tests.output}} +{{steps.typecheck-sdk.output}} + +Output REVIEW_PASS if the implementation is acceptable; otherwise output REVIEW_FAIL with concrete blockers. +`.trim(), + verification: { type: 'output_contains', value: 'REVIEW_PASS' }, + retries: 1, + }); + + wf.step('summarize-artifacts', { + type: 'deterministic', + dependsOn: ['review-phase-1'], + command: [ + `echo "Phase 1 persona resolution workflow completed."`, + `echo "Artifacts:"`, + `echo "- ${PERSONA_UTILS_PATH}"`, + `echo "- ${PERSONA_TEST_PATH}"`, + `echo "- ${WORKFLOWS_INDEX_PATH}"`, + `git diff -- ${PERSONA_UTILS_PATH} ${PERSONA_TEST_PATH} ${WORKFLOWS_INDEX_PATH} | sed -n '1,220p'`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + const result = await wf.onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }).run({ + cwd: REPO_ROOT, + }); + + if ('status' in result) { + console.log(`Result: ${result.status}`); + } else { + console.log('Dry run completed.'); + return; + } + + if (result.status !== 'completed') { + process.exitCode = 1; + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/build-plans/02-workflow-generator.spec.md b/build-plans/02-workflow-generator.spec.md new file mode 100644 index 000000000..c284e1fa8 --- /dev/null +++ b/build-plans/02-workflow-generator.spec.md @@ -0,0 +1,975 @@ +# Phase 2 Specification: Workflow Generator Module + +> Generate complete, runnable workflow TypeScript files from a `WorkflowGeneratorInput` using the SDK `WorkflowBuilder` API. + +**Phase:** 2 of 5 +**Dependencies:** Phase 1 (persona-utils — provides `WorkflowGeneratorInput`, `PersonaResolution`, `ContextFileSpec`, `VerificationSpec`, `SkillMaterializationPlan`) +**Target files:** + +- `packages/sdk/src/workflows/workflow-generator.ts` (new) +- `packages/sdk/src/workflows/__tests__/workflow-generator.test.ts` (new) + +--- + +## Goal + +Create a workflow generator module that transforms a `WorkflowGeneratorInput` (produced by Phase 1 persona resolution + CLI flag parsing) into a complete, self-contained TypeScript workflow file. The generated file uses the SDK's `WorkflowBuilder` API (`workflow()`, `.agent()`, `.step()`, `.run()`) and follows the six-phase execution model: + +1. **Bootstrap** — set up the workflow, declare agents, configure the swarm pattern +2. **Skills** — install any required skills/tools for the agent persona +3. **Context** — read context files via deterministic shell steps +4. **Task** — execute the main agent task, referencing context outputs via `{{steps.X.output}}` +5. **Verification** — run verification commands to validate results +6. **Final** — complete the workflow with a summary step + +The generator must produce valid TypeScript that can be written to disk and executed directly via `npx tsx ` or `agent-relay run`. + +--- + +## Architecture Overview + +``` +WorkflowGeneratorInput + │ + ▼ + generateWorkflow() + │ + ├── emitBootstrapPhase() → imports, workflow(), .pattern(), .channel(), agents + ├── emitSkillPhase() → deterministic steps for skill installs + ├── emitContextPhase() → deterministic steps to capture file contents + ├── emitTaskPhase() → agent step(s) with {{steps.X.output}} chaining + ├── emitVerificationPhase()→ deterministic steps with exit_code verification + └── emitFinalPhase() → summary step, .onError(), .run() + │ + ▼ + GeneratedWorkflow + (source: string, metadata: WorkflowMetadata) +``` + +--- + +## TypeScript Interfaces + +### GeneratedWorkflow + +```ts +/** + * Output of the workflow generator. + */ +export interface GeneratedWorkflow { + /** Complete TypeScript source code for the workflow file. */ + source: string; + /** Metadata about the generated workflow for tooling/logging. */ + metadata: WorkflowMetadata; +} +``` + +### WorkflowMetadata + +```ts +/** + * Metadata about a generated workflow — used for logging, dry-run reports, + * and trajectory recording. + */ +export interface WorkflowMetadata { + /** Workflow name (slugified from task description). */ + name: string; + /** Swarm pattern used. */ + pattern: SwarmPattern; + /** Agent preset used. */ + preset: AgentPreset; + /** Number of agents declared. */ + agentCount: number; + /** Total number of steps generated. */ + stepCount: number; + /** Breakdown of steps by phase. */ + phases: { + bootstrap: number; + skills: number; + context: number; + task: number; + verification: number; + final: number; + }; + /** Whether the workflow was generated with skill installs. */ + hasSkills: boolean; + /** Whether the workflow includes verification steps. */ + hasVerification: boolean; + /** Estimated execution waves (for dry-run preview). */ + estimatedWaves: number; +} +``` + +### WorkflowGeneratorOptions + +```ts +/** + * Options that control code generation behavior (not the workflow itself). + */ +export interface WorkflowGeneratorOptions { + /** Indent style: 'spaces' (default) or 'tabs'. */ + indent?: 'spaces' | 'tabs'; + /** Number of spaces per indent level (default: 2). */ + indentSize?: number; + /** Include inline comments in generated code (default: true). */ + comments?: boolean; + /** Include a header comment with generation metadata (default: true). */ + header?: boolean; +} +``` + +### Re-exported from Phase 1 + +The following types are consumed directly from `persona-utils.ts` (Phase 1): + +- `WorkflowGeneratorInput` — primary input +- `PersonaResolution` — resolved persona with intent, preset, pattern +- `PersonaProfile` — persona details +- `ContextFileSpec` — `{ stepName, command }` for context-gathering steps +- `VerificationSpec` — `{ stepName, command }` for verification steps +- `SkillMaterializationPlan` — `{ installs, manifestPath }` for skill setup + +--- + +## Function Specifications + +### `generateWorkflow(input: WorkflowGeneratorInput, options?: WorkflowGeneratorOptions): GeneratedWorkflow` + +Main entry point. Orchestrates all `emit*` functions and concatenates their output into a complete TypeScript source string. + +```ts +import type { WorkflowGeneratorInput } from './persona-utils.js'; + +/** + * Generate a complete workflow TypeScript file from a WorkflowGeneratorInput. + * + * @param input - Resolved persona, task description, context files, and verifications + * @param options - Code generation options (indent style, comments, etc.) + * @returns Generated workflow source code and metadata + */ +export function generateWorkflow( + input: WorkflowGeneratorInput, + options?: WorkflowGeneratorOptions +): GeneratedWorkflow { + const opts = resolveOptions(options); + const lines: string[] = []; + + lines.push(...emitBootstrapPhase(input, opts)); + lines.push(...emitSkillPhase(input, opts)); + lines.push(...emitContextPhase(input, opts)); + lines.push(...emitTaskPhase(input, opts)); + lines.push(...emitVerificationPhase(input, opts)); + lines.push(...emitFinalPhase(input, opts)); + + const source = lines.join('\n'); + const metadata = computeMetadata(input); + + return { source, metadata }; +} +``` + +**Behavior:** + +1. Resolves default options via `resolveOptions()` +2. Calls each `emit*` function in order, collecting lines +3. Joins lines with newlines +4. Computes metadata from the input +5. Returns `{ source, metadata }` + +--- + +### `emitBootstrapPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[]` + +Generates the file header, imports, `main()` function opening, `workflow()` call, `.description()`, `.pattern()`, `.channel()`, `.maxConcurrency()`, `.timeout()`, and all `.agent()` declarations. + +```ts +/** + * Emit the bootstrap phase: imports, workflow declaration, agent definitions. + */ +export function emitBootstrapPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[] { + // ... +} +``` + +**Generated code structure:** + +```ts +/** + * Auto-generated workflow: {workflowName} + * Persona: {persona.name} ({selection.intent}) + * Pattern: {selection.pattern} | Preset: {selection.preset} + * Generated: {ISO timestamp} + */ +import { workflow } from '@agent-relay/sdk/workflows'; + +async function main() { + const result = await workflow('{workflowName}') + .description('{taskDescription}') + .pattern('{selection.pattern}') + .channel('wf-{workflowName}') + .maxConcurrency({maxConcurrency}) + .timeout({timeout}) + + .agent('{agentName}', { + cli: 'claude', + preset: '{selection.preset}', + role: '{persona.description || taskDescription}', + retries: 2, + }) +``` + +**Agent naming rules:** + +- Single agent workflows use `{intent}-agent` as the agent name (e.g., `review-agent`, `debugging-agent`) +- The agent's `cli` defaults to `'claude'`; can be overridden by persona metadata +- The `role` is derived from `persona.description` or falls back to the task description +- `preset` comes directly from `selection.preset` + +--- + +### `emitSkillPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[]` + +Generates deterministic steps for each skill install in the `SkillMaterializationPlan`. Emitted only if `input.skillPlan.installs.length > 0`. + +```ts +/** + * Emit skill installation steps (deterministic shell commands). + * Skipped entirely if no skills need to be installed. + */ +export function emitSkillPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[] { + // ... +} +``` + +**Generated code structure (per install):** + +```ts + .step('install-skill-{skillId}', { + type: 'deterministic', + command: '{install.command}', + failOnError: true, + }) +``` + +**Behavior:** + +- Each install in `skillPlan.installs` becomes a deterministic step +- Step names are slugified: `install-skill-{skillId}` +- All skill steps are independent (no `dependsOn` between them) so they can run in parallel +- If `skillPlan.manifestPath` is set, an additional step reads the manifest for verification + +--- + +### `emitContextPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[]` + +Generates deterministic steps to capture context files. Each `ContextFileSpec` becomes a step whose stdout output is available to downstream agent steps via `{{steps.{stepName}.output}}`. + +```ts +/** + * Emit context-gathering steps (deterministic shell commands). + * Each step captures a file or command output for downstream agent consumption. + */ +export function emitContextPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[] { + // ... +} +``` + +**Generated code structure (per context file):** + +```ts + .step('{contextFile.stepName}', { + type: 'deterministic', + command: '{contextFile.command}', + captureOutput: true, + dependsOn: [{...skillStepNames}], // depend on skill installs if any + }) +``` + +**Behavior:** + +- Each `ContextFileSpec` in `input.contextFiles` produces one deterministic step +- Steps use `captureOutput: true` so their stdout is available via `{{steps.X.output}}` +- If skill steps exist, context steps depend on all skill steps (skills must install before context reads) +- Context steps are independent of each other (parallelizable within the DAG) +- Empty `contextFiles` array produces no steps + +--- + +### `emitTaskPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[]` + +Generates the main agent step(s) that perform the actual task. The task prompt is composed from the original `taskDescription` augmented with references to context step outputs. + +```ts +/** + * Emit the main task execution step(s). + * For DAG patterns: a single agent step referencing all context outputs. + * For pipeline patterns: sequential agent steps with chained outputs. + */ +export function emitTaskPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[] { + // ... +} +``` + +**Generated code structure (DAG pattern — single task step):** + +```ts + .step('execute-task', { + agent: '{agentName}', + task: `{taskDescription} + +Context: +{{steps.{context1.stepName}.output}} +{{steps.{context2.stepName}.output}} +...`, + dependsOn: [{...contextStepNames}], + verification: { type: 'exit_code', value: '0' }, + retries: 2, + }) +``` + +**Generated code structure (pipeline pattern — sequential steps):** + +```ts + .step('analyze-{n}', { + agent: '{agentName}', + task: `Phase {n}: {subTask} + +Input: +{{steps.{previousStep}.output}}`, + dependsOn: ['{previousStep}'], + verification: { type: 'exit_code', value: '0' }, + }) +``` + +**Behavior:** + +- **DAG pattern**: Generates a single `execute-task` step that depends on all context steps. The task prompt includes all context outputs interpolated via `{{steps.X.output}}`. +- **Pipeline pattern**: Generates sequential agent steps where each step depends on the previous one. The first step references context outputs; subsequent steps reference the output of the preceding step. +- The agent name matches the one declared in the bootstrap phase +- Verification defaults to `exit_code` check +- Retries default to 2 for the primary task step + +**Task prompt composition:** + +```ts +function composeTaskPrompt(taskDescription: string, contextStepNames: string[]): string { + if (contextStepNames.length === 0) { + return taskDescription; + } + + const contextRefs = contextStepNames.map((name) => `{{steps.${name}.output}}`).join('\n'); + + return `${taskDescription}\n\nContext:\n${contextRefs}`; +} +``` + +--- + +### `emitVerificationPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[]` + +Generates deterministic verification steps. Each `VerificationSpec` produces a step that runs a shell command and asserts `exit_code === 0`. + +```ts +/** + * Emit verification steps (deterministic shell commands with exit_code checks). + * Skipped entirely if no verifications are specified. + */ +export function emitVerificationPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[] { + // ... +} +``` + +**Generated code structure (per verification):** + +```ts + .step('{verification.stepName}', { + type: 'deterministic', + command: '{verification.command}', + failOnError: true, + dependsOn: ['execute-task'], + }) +``` + +**Behavior:** + +- Each `VerificationSpec` in `input.verifications` produces one deterministic step +- All verification steps depend on the task step(s) completing first +- `failOnError: true` ensures the workflow fails if any verification fails +- Empty `verifications` array produces no steps + +--- + +### `emitFinalPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[]` + +Generates the workflow's closing: `.onError()` strategy, `.run()` call, and the `main()` function wrapper. + +```ts +/** + * Emit the final phase: error handling, run invocation, main() wrapper. + */ +export function emitFinalPhase(input: WorkflowGeneratorInput, opts: ResolvedOptions): string[] { + // ... +} +``` + +**Generated code structure:** + +```ts + .onError('fail-fast') + .run(); + + console.log('Workflow completed:', result.status); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); +``` + +**Behavior:** + +- Error strategy defaults to `'fail-fast'` +- The `.run()` call has no arguments (uses default cwd) +- If `input.outputFile` is set, adds a `console.log` with the output path +- Closes the `main()` function and adds the top-level error handler + +--- + +## Internal Helper Functions + +### `resolveOptions(options?: WorkflowGeneratorOptions): ResolvedOptions` + +```ts +interface ResolvedOptions { + indent: string; // computed indent string (e.g., ' ') + comments: boolean; + header: boolean; +} + +function resolveOptions(options?: WorkflowGeneratorOptions): ResolvedOptions { + const style = options?.indent ?? 'spaces'; + const size = options?.indentSize ?? 2; + return { + indent: style === 'tabs' ? '\t' : ' '.repeat(size), + comments: options?.comments ?? true, + header: options?.header ?? true, + }; +} +``` + +### `computeMetadata(input: WorkflowGeneratorInput): WorkflowMetadata` + +Counts steps per phase and computes estimated waves from the dependency graph. + +```ts +function computeMetadata(input: WorkflowGeneratorInput): WorkflowMetadata { + const skillSteps = input.skillPlan.installs.length; + const contextSteps = input.contextFiles.length; + const taskSteps = input.selection.pattern === 'pipeline' ? 3 : 1; // pipeline splits into analyze/execute/synthesize + const verificationSteps = input.verifications.length; + + // Wave estimation: + // Wave 1: skill installs (parallel) + // Wave 2: context reads (parallel, depend on skills) + // Wave 3+: task steps (1 for DAG, N for pipeline) + // Wave N+1: verification steps (parallel, depend on task) + // Wave N+2: final summary + let waves = 1; // always at least the task wave + if (skillSteps > 0) waves++; + if (contextSteps > 0) waves++; + if (verificationSteps > 0) waves++; + waves++; // final + if (input.selection.pattern === 'pipeline') { + waves += taskSteps - 1; // pipeline adds sequential waves + } + + return { + name: input.workflowName, + pattern: input.selection.pattern, + preset: input.selection.preset, + agentCount: 1, + stepCount: skillSteps + contextSteps + taskSteps + verificationSteps + 1, + phases: { + bootstrap: 0, // bootstrap has no steps, just configuration + skills: skillSteps, + context: contextSteps, + task: taskSteps, + verification: verificationSteps, + final: 1, + }, + hasSkills: skillSteps > 0, + hasVerification: verificationSteps > 0, + estimatedWaves: waves, + }; +} +``` + +### `slugify(text: string): string` + +Converts a task description into a valid workflow name. + +```ts +function slugify(text: string): string { + return text + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, 60); +} +``` + +### `escapeTemplateString(text: string): string` + +Escapes backticks and `${` sequences inside generated template literals. + +```ts +function escapeTemplateString(text: string): string { + return text.replace(/`/g, '\\`').replace(/\$\{/g, '\\${'); +} +``` + +--- + +## Complete Generation Example + +Given this input: + +```ts +const input: WorkflowGeneratorInput = { + taskDescription: 'Review the authentication middleware for security vulnerabilities', + workflowName: 'review-auth-middleware', + persona: { + id: 'security-reviewer-v1', + name: 'Security Reviewer', + description: 'Reviews code for security vulnerabilities and OWASP risks', + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + }, + selection: { + persona: { + /* same as above */ + }, + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + }, + skillPlan: { installs: [] }, + contextFiles: [ + { + stepName: 'read-auth-middleware', + command: 'cat src/middleware/auth.ts', + }, + { + stepName: 'read-auth-tests', + command: 'cat src/middleware/__tests__/auth.test.ts', + }, + ], + verifications: [ + { + stepName: 'verify-no-eval', + command: "! grep -r 'eval(' src/middleware/auth.ts", + }, + ], + outputFile: 'reports/security-review.md', + maxConcurrency: 4, + timeout: 3_600_000, +}; +``` + +The generator produces: + +```ts +/** + * Auto-generated workflow: review-auth-middleware + * Persona: Security Reviewer (security-review) + * Pattern: dag | Preset: analyst + * Generated: 2026-04-10T12:00:00.000Z + */ +import { workflow } from '@agent-relay/sdk/workflows'; + +async function main() { + const result = await workflow('review-auth-middleware') + .description('Review the authentication middleware for security vulnerabilities') + .pattern('dag') + .channel('wf-review-auth-middleware') + .maxConcurrency(4) + .timeout(3600000) + + .agent('security-review-agent', { + cli: 'claude', + preset: 'analyst', + role: 'Reviews code for security vulnerabilities and OWASP risks', + retries: 2, + }) + + // ── Context phase ───────────────────────────────────────────────────── + .step('read-auth-middleware', { + type: 'deterministic', + command: 'cat src/middleware/auth.ts', + captureOutput: true, + }) + + .step('read-auth-tests', { + type: 'deterministic', + command: 'cat src/middleware/__tests__/auth.test.ts', + captureOutput: true, + }) + + // ── Task phase ──────────────────────────────────────────────────────── + .step('execute-task', { + agent: 'security-review-agent', + task: `Review the authentication middleware for security vulnerabilities + +Context: +{{steps.read-auth-middleware.output}} +{{steps.read-auth-tests.output}}`, + dependsOn: ['read-auth-middleware', 'read-auth-tests'], + verification: { type: 'exit_code', value: '0' }, + retries: 2, + }) + + // ── Verification phase ──────────────────────────────────────────────── + .step('verify-no-eval', { + type: 'deterministic', + command: "! grep -r 'eval(' src/middleware/auth.ts", + failOnError: true, + dependsOn: ['execute-task'], + }) + + .onError('fail-fast') + .run(); + + console.log('Workflow completed:', result.status); + console.log('Output:', 'reports/security-review.md'); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); +``` + +--- + +## Pipeline Pattern Example + +For a pipeline-pattern workflow (e.g., `requirements-analysis`), the task phase generates sequential steps: + +```ts + // ── Task phase (pipeline) ───────────────────────────────────────────── + .step('analyze', { + agent: 'requirements-analysis-agent', + task: `Analyze the following context and identify requirements: + +Context: +{{steps.read-spec.output}} +{{steps.read-readme.output}}`, + dependsOn: ['read-spec', 'read-readme'], + verification: { type: 'exit_code', value: '0' }, + }) + + .step('synthesize', { + agent: 'requirements-analysis-agent', + task: `Synthesize the analysis into structured requirements: + +Analysis: +{{steps.analyze.output}}`, + dependsOn: ['analyze'], + verification: { type: 'exit_code', value: '0' }, + }) + + .step('validate', { + agent: 'requirements-analysis-agent', + task: `Validate the requirements for completeness and consistency: + +Requirements: +{{steps.synthesize.output}}`, + dependsOn: ['synthesize'], + verification: { type: 'exit_code', value: '0' }, + }) +``` + +--- + +## File: `packages/sdk/src/workflows/workflow-generator.ts` + +### Structure + +``` +workflow-generator.ts + ├── Type exports (GeneratedWorkflow, WorkflowMetadata, WorkflowGeneratorOptions) + ├── Internal types (ResolvedOptions) + ├── Helper functions (resolveOptions, computeMetadata, slugify, escapeTemplateString) + ├── composeTaskPrompt() + ├── emitBootstrapPhase() + ├── emitSkillPhase() + ├── emitContextPhase() + ├── emitTaskPhase() + ├── emitVerificationPhase() + ├── emitFinalPhase() + └── generateWorkflow() ← main export +``` + +### Imports + +```ts +import type { AgentPreset, SwarmPattern } from './types.js'; +import type { + WorkflowGeneratorInput, + ContextFileSpec, + VerificationSpec, + SkillMaterializationPlan, + PersonaResolution, + PersonaProfile, +} from './persona-utils.js'; +``` + +Only depends on types from Phase 1 (`persona-utils.ts`) and the existing `types.ts`. No external dependencies. + +--- + +## File: `packages/sdk/src/workflows/__tests__/workflow-generator.test.ts` + +### Test structure (vitest) + +```ts +import { describe, it, expect } from 'vitest'; +import { + generateWorkflow, + emitBootstrapPhase, + emitSkillPhase, + emitContextPhase, + emitTaskPhase, + emitVerificationPhase, + emitFinalPhase, + type GeneratedWorkflow, + type WorkflowMetadata, +} from '../workflow-generator.js'; +import type { WorkflowGeneratorInput } from '../persona-utils.js'; +``` + +### Test Fixtures + +```ts +function createMinimalInput(overrides?: Partial): WorkflowGeneratorInput { + return { + taskDescription: 'Test task', + workflowName: 'test-task', + persona: { + id: 'code-worker-v1', + name: 'Code Worker', + intent: 'code-gen', + preset: 'worker', + pattern: 'dag', + }, + selection: { + intent: 'code-gen', + preset: 'worker', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + }, + skillPlan: { installs: [] }, + contextFiles: [], + verifications: [], + maxConcurrency: 4, + timeout: 3_600_000, + ...overrides, + }; +} + +function createFullInput(): WorkflowGeneratorInput { + return createMinimalInput({ + taskDescription: 'Review auth middleware for security issues', + workflowName: 'review-auth-middleware', + persona: { + id: 'security-reviewer-v1', + name: 'Security Reviewer', + description: 'Reviews code for security vulnerabilities', + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + }, + selection: { + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + }, + skillPlan: { + installs: [{ skillId: 'semgrep', command: 'npm install -g semgrep' }], + }, + contextFiles: [ + { stepName: 'read-auth', command: 'cat src/auth.ts' }, + { stepName: 'read-tests', command: 'cat src/auth.test.ts' }, + ], + verifications: [{ stepName: 'verify-no-eval', command: "! grep -r 'eval(' src/" }], + outputFile: 'reports/security.md', + }); +} +``` + +### Test Cases + +#### `generateWorkflow` + +| Test | Input | Assertions | +| ------------------------------------------ | --------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| Minimal input produces valid TypeScript | `createMinimalInput()` | Source contains `import { workflow }`, `async function main()`, `.run()`, `main().catch` | +| Full input includes all phases | `createFullInput()` | Source contains skill install step, both context steps, task step, verification step | +| Metadata is accurate | `createFullInput()` | `metadata.stepCount === 5` (1 skill + 2 context + 1 task + 1 verification), `metadata.hasSkills === true`, `metadata.hasVerification === true` | +| Workflow name appears in source | `createMinimalInput()` | Source contains `workflow('test-task')` | +| Pattern is set from selection | `createMinimalInput({ selection: { ...base, pattern: 'pipeline' } })` | Source contains `.pattern('pipeline')` | +| Channel name is derived from workflow name | `createMinimalInput()` | Source contains `.channel('wf-test-task')` | +| Max concurrency is configurable | `createMinimalInput({ maxConcurrency: 8 })` | Source contains `.maxConcurrency(8)` | +| Timeout is configurable | `createMinimalInput({ timeout: 1_800_000 })` | Source contains `.timeout(1800000)` | +| Output file produces console.log | `createMinimalInput({ outputFile: 'out.md' })` | Source contains `'out.md'` | +| No output file omits log | `createMinimalInput()` | Source does not contain `Output:` | + +#### `emitBootstrapPhase` + +| Test | Input | Assertions | +| ----------------------------------------- | ---------------------------------------- | ----------------------------------------------------------------------- | +| Includes import statement | Any | Output contains `import { workflow } from '@agent-relay/sdk/workflows'` | +| Includes header comment when enabled | `{ header: true }` | Output starts with `/**` | +| Omits header comment when disabled | `{ header: false }` | First line is `import` | +| Agent name follows intent convention | `{ selection.intent: 'review' }` | Output contains `'review-agent'` | +| Agent preset matches selection | `{ selection.preset: 'analyst' }` | Output contains `preset: 'analyst'` | +| Agent role uses persona description | `{ persona.description: 'Custom role' }` | Output contains `role: 'Custom role'` | +| Agent role falls back to task description | `{ persona: { id: 'x', name: 'X' } }` | Output contains `role: '{taskDescription}'` | + +#### `emitSkillPhase` + +| Test | Input | Assertions | +| ---------------------------------------- | ---------------------------------------------------------- | ---------------------------------------------------- | +| No skills produces empty array | `{ skillPlan: { installs: [] } }` | `result.length === 0` | +| Single install produces one step | `{ installs: [{ skillId: 'foo', command: 'npm i foo' }] }` | Output contains `'install-skill-foo'`, `'npm i foo'` | +| Multiple installs produce parallel steps | Two installs | Neither step has `dependsOn` referencing the other | +| Steps are deterministic | Any install | Output contains `type: 'deterministic'` | +| Fail on error is set | Any install | Output contains `failOnError: true` | + +#### `emitContextPhase` + +| Test | Input | Assertions | +| ------------------------------------------- | ---------------------- | --------------------------------------------------------- | +| No context files produces empty array | `{ contextFiles: [] }` | `result.length === 0` | +| Single context file produces one step | One `ContextFileSpec` | Output contains step name and command | +| Capture output is enabled | Any context file | Output contains `captureOutput: true` | +| Steps depend on skill steps when present | Skills + context | Output contains `dependsOn: ['install-skill-...']` | +| Steps have no dependencies when no skills | No skills, has context | No `dependsOn` in context steps | +| Context steps are independent of each other | Multiple context files | No context step lists another context step in `dependsOn` | + +#### `emitTaskPhase` + +| Test | Input | Assertions | +| ------------------------------------------------- | ------------------------- | ---------------------------------------------------------------------------------- | +| DAG produces single execute-task step | `{ pattern: 'dag' }` | Output contains exactly one `.step('execute-task'` | +| Pipeline produces sequential steps | `{ pattern: 'pipeline' }` | Output contains `'analyze'`, `'synthesize'`, `'validate'` with chained `dependsOn` | +| Task step depends on all context steps | 2 context files | `dependsOn` array includes both context step names | +| Task step depends on task step when no context | No context files | `dependsOn` is empty or omitted | +| Context outputs are interpolated into task prompt | 2 context files | Output contains `{{steps.read-auth.output}}` | +| Task description appears in agent task | Any | Output contains the `taskDescription` string | +| Retries default to 2 | Any | Output contains `retries: 2` | +| Verification defaults to exit_code | Any | Output contains `type: 'exit_code'` | + +#### `emitVerificationPhase` + +| Test | Input | Assertions | +| ----------------------------------------------- | ----------------------- | ------------------------------------------------------- | +| No verifications produces empty array | `{ verifications: [] }` | `result.length === 0` | +| Single verification produces one step | One `VerificationSpec` | Output contains step name, command, `failOnError: true` | +| Verification steps depend on task step | DAG pattern | `dependsOn` includes `'execute-task'` | +| Verification steps depend on last pipeline step | Pipeline pattern | `dependsOn` includes `'validate'` | + +#### `emitFinalPhase` + +| Test | Input | Assertions | +| --------------------------------- | ----- | --------------------------------------- | +| Includes onError | Any | Output contains `.onError('fail-fast')` | +| Includes run() call | Any | Output contains `.run()` | +| Closes main function | Any | Output contains `main().catch` | +| Includes process.exit(1) on error | Any | Output contains `process.exit(1)` | + +#### `WorkflowMetadata` + +| Test | Input | Assertions | +| --------------------------------------- | --------------------- | ------------------------------------------------------------------------------------------------- | +| Step count is sum of all phases | Full input | `stepCount === phases.skills + phases.context + phases.task + phases.verification + phases.final` | +| Agent count is 1 | Any | `agentCount === 1` | +| Estimated waves for DAG with all phases | Full input | `estimatedWaves >= 4` (skills + context + task + verification + final) | +| Estimated waves for minimal input | Minimal | `estimatedWaves === 2` (task + final) | +| Pipeline adds extra waves | Pipeline with context | `estimatedWaves > DAG equivalent` | + +#### Edge Cases + +| Test | Scenario | Assertions | +| ----------------------------------------- | ---------------------------------------- | --------------------------------------------- | +| Backticks in task description are escaped | `taskDescription: 'Use \`code\` blocks'` | Generated template literal doesn't break | +| Dollar braces in commands are escaped | `command: 'echo ${HOME}'` | Generated source is valid TypeScript | +| Very long task description is handled | 2000-char description | Source is valid, no truncation | +| Empty task description | `taskDescription: ''` | Produces valid (if minimal) workflow | +| Special characters in workflow name | `workflowName: 'review-auth_v2.1'` | Valid workflow name used in `workflow()` call | + +--- + +## SDK Export Changes + +Add to `packages/sdk/src/workflows/index.ts`: + +```ts +export { + generateWorkflow, + emitBootstrapPhase, + emitSkillPhase, + emitContextPhase, + emitTaskPhase, + emitVerificationPhase, + emitFinalPhase, + type GeneratedWorkflow, + type WorkflowMetadata, + type WorkflowGeneratorOptions, +} from './workflow-generator.js'; +``` + +--- + +## Implementation Notes + +1. **No external dependencies.** The module only imports types from `persona-utils.ts` (Phase 1) and `types.ts`. No npm packages, no network calls, no filesystem access. The generator is a pure function: input in, string out. + +2. **String-based code generation.** The generator builds TypeScript source as an array of string lines. This is intentional — it avoids AST manipulation complexity and keeps the module lightweight. The generated code is formatted consistently via the `ResolvedOptions` indent configuration. + +3. **Template literal safety.** All user-provided strings (task descriptions, commands, file paths) are escaped before being embedded in generated template literals. `escapeTemplateString()` handles backtick and `${` escaping. + +4. **Forward-compatible with Phase 3.** The CLI integration (Phase 3) will call `generateWorkflow()` with a `WorkflowGeneratorInput` assembled from parsed CLI flags and Phase 1 persona resolution. The generator does not assume how its input is constructed. + +5. **Emit functions are independently testable.** Each `emit*` function accepts the full `WorkflowGeneratorInput` and returns `string[]`. This allows unit tests to validate individual phases without generating the entire workflow. + +6. **Pattern follows existing codebase conventions.** The file uses ES module imports with `.js` extensions, `type` keyword for type-only imports, and JSDoc comments for exported functions — matching the patterns in `builder.ts` and `types.ts`. + +7. **Pipeline step names.** For pipeline patterns, the three sequential task steps use fixed names: `'analyze'`, `'synthesize'`, `'validate'`. This convention aligns with the sequential nature of analysis workflows and provides predictable step names for verification dependencies. + +--- + +## Acceptance Criteria + +- [ ] `generateWorkflow()` produces valid TypeScript source that can be parsed without syntax errors +- [ ] Generated workflows use the correct `WorkflowBuilder` API methods (`.workflow()`, `.agent()`, `.step()`, `.pattern()`, `.run()`) +- [ ] DAG pattern generates a single `execute-task` step with parallel context dependencies +- [ ] Pipeline pattern generates sequential `analyze` → `synthesize` → `validate` steps with chained `dependsOn` +- [ ] Context outputs are correctly referenced via `{{steps.X.output}}` interpolation in task prompts +- [ ] Skill install steps are generated as deterministic steps when `skillPlan.installs` is non-empty +- [ ] Verification steps are generated as deterministic steps with `failOnError: true` +- [ ] Agent preset and pattern match the `PersonaResolution` from Phase 1 +- [ ] `WorkflowMetadata` accurately reflects step counts and estimated waves +- [ ] Special characters in task descriptions and commands are properly escaped +- [ ] Empty optional fields (no skills, no context, no verifications) produce valid minimal workflows +- [ ] All emit functions are exported and independently testable +- [ ] All tests pass via `vitest` +- [ ] No new external dependencies introduced +- [ ] Types are exported and available to Phase 3 consumers diff --git a/build-plans/02-workflow-generator.ts b/build-plans/02-workflow-generator.ts new file mode 100644 index 000000000..1b261ce6f --- /dev/null +++ b/build-plans/02-workflow-generator.ts @@ -0,0 +1,341 @@ +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { workflow } = require('@agent-relay/sdk/workflows'); + +const REPO_ROOT = '/Users/khaliqgant/Projects/AgentWorkforce/relay-workflows'; +const SPEC_PATH = 'workflows/meta-agent-flag/02-workflow-generator.spec.md'; +const SDK_WORKFLOWS_DIR = 'packages/sdk/src/workflows'; +const WORKFLOW_GENERATOR_PATH = `${SDK_WORKFLOWS_DIR}/workflow-generator.ts`; +const WORKFLOW_GENERATOR_TEST_PATH = `${SDK_WORKFLOWS_DIR}/__tests__/workflow-generator.test.ts`; +const PERSONA_UTILS_PATH = `${SDK_WORKFLOWS_DIR}/persona-utils.ts`; +const WORKFLOWS_INDEX_PATH = `${SDK_WORKFLOWS_DIR}/index.ts`; +const WORKFLOW_TYPES_PATH = `${SDK_WORKFLOWS_DIR}/types.ts`; +const WORKFLOW_BUILDER_PATH = `${SDK_WORKFLOWS_DIR}/builder.ts`; + +async function main() { + const wf = workflow('phase-2-workflow-generator') + .description('Implement SDK workflow generator utilities and tests from the Phase 2 meta-agent flag spec') + .pattern('dag') + .channel('wf-phase-2-workflow-generator') + .maxConcurrency(4) + .timeout(3_600_000) + .agent('generator-implementer', { + cli: 'codex', + preset: 'worker', + role: 'Focused TypeScript SDK implementer for pure workflow source generation utilities', + retries: 2, + }) + .agent('test-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused vitest test author for SDK workflow generator behavior and emitted source', + retries: 2, + }) + .agent('sdk-reviewer', { + cli: 'codex', + preset: 'reviewer', + role: 'Reviews SDK generator implementation for spec conformance, source safety, and regression risk', + retries: 1, + }); + + wf.step('guard-not-main', { + type: 'deterministic', + command: [ + 'branch="$(git branch --show-current)"', + 'if [ "$branch" = "main" ]; then echo "Refusing to run Phase 2 workflow on main"; exit 1; fi', + 'echo "Running on branch: ${branch:-detached}"', + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-spec', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${SPEC_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-persona-utils', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `if test -f ${PERSONA_UTILS_PATH}; then`, + `sed -n '1,260p' ${PERSONA_UTILS_PATH};`, + `else echo "${PERSONA_UTILS_PATH} is not present yet. Use the Phase 1 spec excerpt embedded in the Phase 2 spec for required types."; fi`, + ].join(' '), + captureOutput: true, + failOnError: false, + }); + + wf.step('read-workflow-types', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `sed -n '1,220p' ${WORKFLOW_TYPES_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-builder-api', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `sed -n '1,240p' ${WORKFLOW_BUILDER_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-workflows-index', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${WORKFLOWS_INDEX_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-test-patterns', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/__tests__/template-resolver.test.ts`, + `sed -n '1,160p' ${SDK_WORKFLOWS_DIR}/__tests__/verification.test.ts`, + `sed -n '1,160p' packages/sdk/src/__tests__/builder-deterministic.test.ts 2>/dev/null || true`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('implement-workflow-generator', { + agent: 'generator-implementer', + dependsOn: ['read-spec', 'read-persona-utils', 'read-workflow-types', 'read-builder-api'], + task: ` +You are implementing Phase 2 of the --agent flag feature. Do not use Relaycast MCP tools or spawn sub-agents. + +Create ${WORKFLOW_GENERATOR_PATH} from the Phase 2 spec. + +Spec: +{{steps.read-spec.output}} + +Current Phase 1 persona utilities, if present: +{{steps.read-persona-utils.output}} + +Relevant workflow SDK types: +{{steps.read-workflow-types.output}} + +Relevant WorkflowBuilder API: +{{steps.read-builder-api.output}} + +Requirements: +1. Export GeneratedWorkflow, WorkflowMetadata, WorkflowGeneratorOptions, generateWorkflow(), and all phase emitters: emitBootstrapPhase(), emitSkillPhase(), emitContextPhase(), emitTaskPhase(), emitVerificationPhase(), emitFinalPhase(). +2. Import AgentPreset and SwarmPattern as types from './types.js'. Import WorkflowGeneratorInput and related Phase 1 types from './persona-utils.js' using type imports. +3. Keep the module pure: no filesystem access, no process access, no network access, and no external npm dependencies. +4. Implement resolveOptions(), computeMetadata(), slugify(), escapeTemplateString(), composeTaskPrompt(), and any small local formatting helpers needed by the emitters. +5. Generated source must be a complete runnable TypeScript workflow file using import { workflow } from '@agent-relay/sdk/workflows', async main(), workflow(...), .description(), .pattern(), .channel(), .maxConcurrency(), .timeout(), .agent(), .step(), .onError('fail-fast'), .run(), and main().catch(...). +6. Use a single agent by default. The agent name must be '-agent' after slugifying the resolved intent. The cli defaults to 'claude' unless persona metadata provides a string cli override. +7. Emit skill install deterministic steps for every input.skillPlan.installs entry. Add a manifest read step when input.skillPlan.manifestPath is set. +8. Emit context deterministic steps with captureOutput: true. When skills exist, context steps depend on all skill and manifest steps; otherwise they remain independent. +9. Emit DAG task workflows as one execute-task step depending on all context steps. Emit pipeline workflows as analyze, synthesize, and validate steps with chained dependencies. +10. Emit verification deterministic steps with failOnError: true. DAG verification depends on execute-task; pipeline verification depends on validate. +11. Escape strings safely for generated single-quoted strings, shell command strings, and template literals. Backticks and dollar-brace sequences in user text must not break generated TypeScript. +12. Metadata must match the spec, including phase counts, hasSkills, hasVerification, agentCount, stepCount, and estimatedWaves. Count the manifest read step as a skills phase step. +13. Preserve existing code style: ES module imports with .js extensions, explicit exported functions, JSDoc on public APIs, and two-space formatting by default. + +Only edit ${WORKFLOW_GENERATOR_PATH}. End your output with WORKFLOW_GENERATOR_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'WORKFLOW_GENERATOR_DONE' }, + retries: 2, + }); + + wf.step('verify-workflow-generator-file', { + type: 'deterministic', + dependsOn: ['implement-workflow-generator'], + command: [ + `test -f ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "export function generateWorkflow" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "export function emitBootstrapPhase" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "export function emitSkillPhase" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "export function emitContextPhase" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "export function emitTaskPhase" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "export function emitVerificationPhase" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "export function emitFinalPhase" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "WorkflowMetadata" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "escapeTemplateString" ${WORKFLOW_GENERATOR_PATH}`, + `grep -q "composeTaskPrompt" ${WORKFLOW_GENERATOR_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('export-workflow-generator', { + agent: 'generator-implementer', + dependsOn: ['read-spec', 'read-workflows-index', 'verify-workflow-generator-file'], + task: ` +Update ${WORKFLOWS_INDEX_PATH} to re-export the workflow generator public API. +Do not use Relaycast MCP tools or spawn sub-agents. + +Spec: +{{steps.read-spec.output}} + +Current index: +{{steps.read-workflows-index.output}} + +Requirements: +1. Preserve every existing export in ${WORKFLOWS_INDEX_PATH}. +2. Add the explicit workflow-generator export block from the SDK Export Changes section. +3. Export from './workflow-generator.js'. +4. Include generateWorkflow, all six emit phase functions, and the GeneratedWorkflow, WorkflowMetadata, and WorkflowGeneratorOptions types. +5. Do not edit any file except ${WORKFLOWS_INDEX_PATH}. + +End your output with WORKFLOW_GENERATOR_EXPORT_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'WORKFLOW_GENERATOR_EXPORT_DONE' }, + retries: 2, + }); + + wf.step('verify-index-export', { + type: 'deterministic', + dependsOn: ['export-workflow-generator'], + command: [ + `grep -q "workflow-generator.js" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "generateWorkflow" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "emitBootstrapPhase" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "WorkflowGeneratorOptions" ${WORKFLOWS_INDEX_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('write-workflow-generator-tests', { + agent: 'test-writer', + dependsOn: ['read-spec', 'read-test-patterns', 'verify-workflow-generator-file'], + task: ` +Create ${WORKFLOW_GENERATOR_TEST_PATH} with focused vitest coverage for the Phase 2 workflow generator. +Do not use Relaycast MCP tools or spawn sub-agents. + +Spec: +{{steps.read-spec.output}} + +Existing test style examples: +{{steps.read-test-patterns.output}} + +Requirements: +1. Import vitest describe, it, and expect. +2. Import generateWorkflow, all six emit phase functions, and exported types from '../workflow-generator.js'. +3. Import WorkflowGeneratorInput as a type from '../persona-utils.js'. +4. Provide createMinimalInput() and createFullInput() fixtures matching the spec. +5. Cover generateWorkflow minimal source, full source, metadata counts, workflow name, pattern, channel, maxConcurrency, timeout, and optional outputFile logging. +6. Cover emitBootstrapPhase header behavior, agent naming, preset, role from persona description, and role fallback. +7. Cover emitSkillPhase no skills, one skill, multiple independent skills, deterministic type, failOnError, and manifestPath behavior. +8. Cover emitContextPhase no context, captureOutput, skill dependencies, no dependencies without skills, and independent context steps. +9. Cover emitTaskPhase DAG execute-task output, pipeline analyze/synthesize/validate chain, context interpolation, retries, and exit_code verification. +10. Cover emitVerificationPhase no verifications, DAG dependency on execute-task, and pipeline dependency on validate. +11. Cover emitFinalPhase onError, run(), main().catch, process.exit(1), and outputFile logging. +12. Cover escaping edge cases for backticks, dollar-brace sequences, quotes, empty strings, long task descriptions, and special workflow names. +13. Keep tests deterministic and do not execute generated workflows. + +Only edit ${WORKFLOW_GENERATOR_TEST_PATH}. End your output with WORKFLOW_GENERATOR_TESTS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'WORKFLOW_GENERATOR_TESTS_DONE' }, + retries: 2, + }); + + wf.step('verify-test-file', { + type: 'deterministic', + dependsOn: ['write-workflow-generator-tests'], + command: [ + `test -f ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "generateWorkflow" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitBootstrapPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitSkillPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitContextPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitTaskPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitVerificationPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitFinalPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "createMinimalInput" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "createFullInput" ${WORKFLOW_GENERATOR_TEST_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('run-workflow-generator-tests', { + type: 'deterministic', + dependsOn: ['verify-index-export', 'verify-test-file'], + command: `npx vitest run ${WORKFLOW_GENERATOR_TEST_PATH} --reporter=verbose`, + captureOutput: true, + failOnError: true, + }); + + wf.step('typecheck-sdk', { + type: 'deterministic', + dependsOn: ['run-workflow-generator-tests'], + command: 'cd packages/sdk && npx tsc -p tsconfig.json --noEmit', + captureOutput: true, + failOnError: true, + }); + + wf.step('review-phase-2', { + agent: 'sdk-reviewer', + dependsOn: ['read-spec', 'run-workflow-generator-tests', 'typecheck-sdk'], + task: ` +Review the Phase 2 workflow generator implementation. Do not edit files. +Do not use Relaycast MCP tools or spawn sub-agents. + +Spec: +{{steps.read-spec.output}} + +Check: +1. ${WORKFLOW_GENERATOR_PATH} is pure and has no external dependencies, filesystem access, process access, or network access. +2. The exported interfaces, helper functions, generateWorkflow(), and all six emit phase functions match the spec. +3. Generated workflows are complete TypeScript files that use the WorkflowBuilder API and include bootstrap, skills, context, task, verification, and final phases. +4. DAG and pipeline patterns produce the required task dependency shapes. +5. Skill manifest reads, context dependencies, verification dependencies, metadata counts, and estimated waves are correct. +6. User-provided strings are escaped safely for emitted TypeScript source. +7. ${WORKFLOWS_INDEX_PATH} re-exports the workflow generator public API. +8. ${WORKFLOW_GENERATOR_TEST_PATH} covers the required behavior without executing generated workflows. +9. Focused vitest and SDK typecheck passed: +{{steps.run-workflow-generator-tests.output}} +{{steps.typecheck-sdk.output}} + +Output REVIEW_PASS if the implementation is acceptable; otherwise output REVIEW_FAIL with concrete blockers. +`.trim(), + verification: { type: 'output_contains', value: 'REVIEW_PASS' }, + retries: 1, + }); + + wf.step('summarize-artifacts', { + type: 'deterministic', + dependsOn: ['review-phase-2'], + command: [ + `echo "Phase 2 workflow generator workflow completed."`, + `echo "Artifacts:"`, + `echo "- ${WORKFLOW_GENERATOR_PATH}"`, + `echo "- ${WORKFLOW_GENERATOR_TEST_PATH}"`, + `echo "- ${WORKFLOWS_INDEX_PATH}"`, + `git diff -- ${WORKFLOW_GENERATOR_PATH} ${WORKFLOW_GENERATOR_TEST_PATH} ${WORKFLOWS_INDEX_PATH} | sed -n '1,260p'`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + const result = await wf.onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }).run({ + cwd: REPO_ROOT, + }); + + if ('status' in result) { + console.log(`Result: ${result.status}`); + } else { + console.log('Dry run completed.'); + return; + } + + if (result.status !== 'completed') { + process.exitCode = 1; + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/build-plans/03-cli-agent-flag.spec.md b/build-plans/03-cli-agent-flag.spec.md new file mode 100644 index 000000000..7bcff5bad --- /dev/null +++ b/build-plans/03-cli-agent-flag.spec.md @@ -0,0 +1,939 @@ +# Phase 3 Specification: CLI `--agent` Flag Integration + +> Wire the `--agent` flag into the CLI entry point so that `relay-workflow "task description" --agent ` resolves a persona, generates a workflow, and executes it — all in one command. + +**Phase:** 3 of 5 +**Dependencies:** Phase 1 (persona-utils), Phase 2 (workflow-generator) +**Target files:** + +- `packages/sdk/src/workflows/cli.ts` (modify) +- `packages/sdk/src/workflows/context-heuristics.ts` (new) +- `packages/sdk/src/workflows/__tests__/context-heuristics.test.ts` (new) + +--- + +## Goal + +Extend the existing CLI (`packages/sdk/src/workflows/cli.ts`) to support a new **agent mode** activated by the `--agent` flag. When present, the CLI bypasses the YAML-based workflow path and instead: + +1. Parses task description and all `--agent`-related flags +2. Resolves the persona via `resolvePersonaByIdOrIntent()` (Phase 1) +3. Infers default context files based on intent heuristics (if `--context` not supplied) +4. Calls `generateWorkflow()` (Phase 2) to produce a runnable workflow +5. Optionally writes the generated workflow to disk (`--output`) +6. Optionally prints a dry-run report instead of executing (`--dry-run`) +7. Executes the generated workflow via `WorkflowRunner` + +This creates a zero-config experience: `relay-workflow "Review auth for vulnerabilities" --agent security-review` is a single command that does everything. + +--- + +## New CLI Flags + +### Flag Definitions + +| Flag | Short | Type | Default | Description | +| ------------------- | ----- | ---------- | ------------ | ---------------------------------------------------------------------- | +| `--agent ` | `-a` | `string` | — | Persona ID or intent string. **Required** for agent mode. | +| `--profile ` | `-p` | `string` | — | Disambiguation hint when multiple personas share an intent. | +| `--tier ` | `-t` | `string` | `'standard'` | Execution tier: `'standard'` or `'premium'`. Controls model selection. | +| `--dry-run` | `-d` | `boolean` | `false` | Print the generated workflow and metadata without executing. | +| `--context ` | `-c` | `string[]` | (heuristic) | Context files to read. Repeatable. Overrides heuristics when provided. | +| `--verify ` | `-v` | `string[]` | `[]` | Verification commands. Repeatable. Each must exit 0. | +| `--output ` | `-o` | `string` | — | Write generated workflow source to this file path. | +| `--concurrency ` | | `number` | `4` | Max concurrent steps in the generated workflow. | +| `--timeout ` | | `number` | `3600000` | Workflow timeout in milliseconds (default: 1 hour). | + +### Flag Parsing Rules + +1. `--agent` is the mode switch. If present, the CLI enters agent mode. If absent, the existing YAML-based path is used unchanged. +2. In agent mode, the first positional argument is the **task description** (not a YAML path). +3. `--context` and `--verify` are repeatable: `--context src/auth.ts --context src/auth.test.ts`. +4. `--dry-run` is a boolean flag (no value). It replaces the existing `DRY_RUN` env var behavior in agent mode. +5. All existing flags (`--resume`, `--workflow`, `--start-from`, `--previous-run-id`, `--validate`) remain unchanged and are **mutually exclusive** with `--agent`. + +### Updated `FLAGS_WITH_VALUES` + +```ts +const FLAGS_WITH_VALUES = new Set([ + '--resume', + '--workflow', + '--start-from', + '--previous-run-id', + '--agent', + '-a', + '--profile', + '-p', + '--tier', + '-t', + '--context', + '-c', + '--verify', + '-v', + '--output', + '-o', + '--concurrency', + '--timeout', +]); +``` + +--- + +## Updated Usage Help + +```ts +function printUsage(): void { + console.log( + ` +Usage: relay-workflow [options] + relay-workflow "" --agent [agent-options] + relay-workflow --resume + +Run a relay.yaml workflow file, or generate and run a workflow from a persona. + +Arguments: + Path to the relay.yaml workflow file + Task description (in agent mode) + +YAML Mode Options: + --workflow Run a specific workflow by name (default: first) + --resume Resume a failed or interrupted run by its run ID + --start-from Start from a specific step, skipping predecessors + --previous-run-id Use cached outputs from a specific prior run + --validate Validate workflow YAML without running + +Agent Mode Options: + --agent, -a Persona ID or intent (e.g., 'security-review', 'reviewer-v1') + --profile, -p Disambiguation hint for shared intents + --tier, -t Execution tier: 'standard' (default) or 'premium' + --dry-run, -d Print generated workflow without executing + --context, -c Context file to read (repeatable) + --verify, -v Verification command (repeatable, must exit 0) + --output, -o Write generated workflow source to file + --concurrency Max concurrent steps (default: 4) + --timeout Workflow timeout in ms (default: 3600000) + +General: + --help Show this help message + +Examples: + # YAML mode + relay-workflow workflows/daytona-migration.yaml + relay-workflow workflows/feature-dev.yaml --workflow build-and-test + + # Agent mode + relay-workflow "Review auth for vulnerabilities" --agent security-review + relay-workflow "Fix flaky test in CI" --agent debugging --context tests/flaky.test.ts + relay-workflow "Write API docs" --agent documentation --dry-run + relay-workflow "Refactor auth module" -a code-gen -c src/auth.ts -o workflow.ts +`.trim() + ); +} +``` + +--- + +## CLI Parsing Implementation + +### `parseAgentFlags(args: string[]): AgentModeFlags | null` + +Returns `null` if `--agent` / `-a` is not present (fall through to YAML mode). Otherwise extracts all agent-mode flags. + +```ts +export interface AgentModeFlags { + taskDescription: string; + agentRef: string; + profile?: string; + tier: 'standard' | 'premium'; + dryRun: boolean; + contextPaths: string[]; + verifyCommands: string[]; + outputPath?: string; + concurrency: number; + timeout: number; +} + +export function parseAgentFlags(args: string[]): AgentModeFlags | null { + // Check for --agent or -a + const agentIdx = args.indexOf('--agent') !== -1 ? args.indexOf('--agent') : args.indexOf('-a'); + + if (agentIdx === -1) return null; + + const agentRef = args[agentIdx + 1]; + if (!agentRef || agentRef.startsWith('-')) { + throw new Error('--agent requires a persona ID or intent value'); + } + + // Extract task description (first positional arg, skipping flags) + const taskDescription = getTaskDescriptionArg(args); + if (!taskDescription) { + throw new Error('Agent mode requires a task description as the first argument'); + } + + // Collect repeatable flags + const contextPaths = collectRepeatable(args, '--context', '-c'); + const verifyCommands = collectRepeatable(args, '--verify', '-v'); + + // Single-value flags + const profile = getFlagValue(args, '--profile', '-p'); + const tier = (getFlagValue(args, '--tier', '-t') ?? 'standard') as 'standard' | 'premium'; + const outputPath = getFlagValue(args, '--output', '-o'); + const concurrency = parseInt(getFlagValue(args, '--concurrency') ?? '4', 10); + const timeout = parseInt(getFlagValue(args, '--timeout') ?? '3600000', 10); + + // Boolean flags + const dryRun = args.includes('--dry-run') || args.includes('-d'); + + // Validate tier + if (tier !== 'standard' && tier !== 'premium') { + throw new Error(`Invalid tier "${tier}". Must be "standard" or "premium".`); + } + + // Validate concurrency + if (isNaN(concurrency) || concurrency < 1 || concurrency > 32) { + throw new Error(`Invalid concurrency "${concurrency}". Must be 1-32.`); + } + + // Validate timeout + if (isNaN(timeout) || timeout < 1000) { + throw new Error(`Invalid timeout "${timeout}". Must be at least 1000ms.`); + } + + return { + taskDescription, + agentRef, + profile, + tier, + dryRun, + contextPaths, + verifyCommands, + outputPath, + concurrency, + timeout, + }; +} +``` + +### Helper: `getTaskDescriptionArg(args: string[]): string | undefined` + +Extracts the first positional argument that is not a flag and not a flag value. In agent mode, this is the task description (typically quoted). + +```ts +function getTaskDescriptionArg(args: string[]): string | undefined { + for (let i = 0; i < args.length; i += 1) { + const arg = args[i]; + if (arg.startsWith('-')) { + if (FLAGS_WITH_VALUES.has(arg)) i += 1; // skip value + continue; + } + return arg; + } + return undefined; +} +``` + +> **Note:** This replaces the existing `getYamlPathArg` in the shared codepath. In YAML mode, the positional arg is a file path. In agent mode, it is a task description. The existing function can be reused since the logic is identical — only the semantic interpretation differs. + +### Helper: `collectRepeatable(args: string[], long: string, short?: string): string[]` + +Collects all values for a repeatable flag. + +```ts +function collectRepeatable(args: string[], long: string, short?: string): string[] { + const values: string[] = []; + for (let i = 0; i < args.length; i += 1) { + if (args[i] === long || (short && args[i] === short)) { + const val = args[i + 1]; + if (val && !val.startsWith('-')) { + values.push(val); + i += 1; + } + } + } + return values; +} +``` + +### Helper: `getFlagValue(args: string[], long: string, short?: string): string | undefined` + +Extracts a single flag value. + +```ts +function getFlagValue(args: string[], long: string, short?: string): string | undefined { + for (let i = 0; i < args.length; i += 1) { + if (args[i] === long || (short && args[i] === short)) { + const val = args[i + 1]; + if (val && !val.startsWith('-')) return val; + } + } + return undefined; +} +``` + +--- + +## Default Context File Heuristics + +### File: `packages/sdk/src/workflows/context-heuristics.ts` + +When `--context` is not provided, the CLI infers context files based on the resolved intent. This provides a zero-config experience for common use cases. + +```ts +import type { ContextFileSpec } from './persona-utils.js'; + +/** + * Heuristic context file mappings per intent. + * Each entry produces ContextFileSpec[] when the corresponding files exist on disk. + */ +export interface ContextHeuristic { + /** Intent this heuristic applies to. */ + intent: string; + /** Candidate file patterns to probe. */ + candidates: CandidateSpec[]; +} + +export interface CandidateSpec { + /** Step name for the generated context step. */ + stepName: string; + /** Glob pattern or literal path to check. */ + pattern: string; + /** Shell command to capture the file. Defaults to `cat `. */ + command?: string; + /** Priority when multiple candidates match (lower = higher priority). */ + priority: number; + /** Max files to include from this pattern (default: 1). */ + maxFiles?: number; +} +``` + +### Intent-to-Context Mapping Table + +| Intent | Candidates | Rationale | +| ----------------------- | -------------------------------------------------------------------------------- | ------------------------------------------------- | +| `review` | `src/**/*.ts` (changed files via `git diff --name-only HEAD~1`), `tsconfig.json` | Review needs the changed files and project config | +| `security-review` | `src/**/*.ts` (changed files), `package.json`, `.env.example` | Security needs dependency info and env patterns | +| `architecture-plan` | `tsconfig.json`, `package.json`, `src/**/index.ts` | Architecture needs entry points and config | +| `requirements-analysis` | `README.md`, `docs/**/*.md`, `package.json` | Requirements derive from existing docs | +| `debugging` | Test output via `npm test 2>&1 \| tail -50`, `src/**/*.test.ts` (failing tests) | Debugging needs error output | +| `documentation` | `README.md`, `src/**/index.ts`, `docs/**/*.md` | Docs need existing docs and public API | +| `verification` | `.github/workflows/*.yml`, `package.json`, `tsconfig.json` | Verification checks CI and config | +| `test-strategy` | `src/**/*.test.ts`, `jest.config.*`, `vitest.config.*` | Test strategy needs existing test structure | +| `tdd-enforcement` | `src/**/*.test.ts`, `src/**/*.ts` (paired source files) | TDD needs test-source pairs | +| `flake-investigation` | CI logs via `gh run view --log-failed 2>&1 \| tail -100`, `src/**/*.test.ts` | Flake investigation needs CI output | +| `npm-provenance` | `.github/workflows/publish.yml`, `package.json`, `.npmrc` | Provenance needs publish config | +| `implement-frontend` | `src/**/*.tsx`, `src/**/*.css`, `package.json` | Frontend needs UI files | +| `code-gen` | `package.json`, `tsconfig.json`, `src/**/index.ts` | General code needs project structure | + +### `inferContextFiles(intent: string, cwd: string): Promise` + +Probes the filesystem for candidate files and returns matching `ContextFileSpec[]`. + +```ts +import { glob } from 'node:fs/promises'; +import { existsSync } from 'node:fs'; +import type { ContextFileSpec } from './persona-utils.js'; + +/** + * Infer context files for a given intent by probing the filesystem. + * + * @param intent - Resolved intent string from persona resolution + * @param cwd - Working directory to search from + * @returns Context file specs for files that exist on disk + */ +export async function inferContextFiles(intent: string, cwd: string): Promise { + const heuristic = CONTEXT_HEURISTICS.find((h) => h.intent === intent.toLowerCase().trim()); + + if (!heuristic) { + // Fallback: read package.json and tsconfig.json if they exist + return inferFallbackContext(cwd); + } + + const results: ContextFileSpec[] = []; + + for (const candidate of heuristic.candidates) { + const matched = await probeCandidate(candidate, cwd); + results.push(...matched); + } + + // Cap total context files at 10 to avoid overwhelming the agent + return results.slice(0, 10); +} +``` + +### `probeCandidate(candidate: CandidateSpec, cwd: string): Promise` + +```ts +async function probeCandidate(candidate: CandidateSpec, cwd: string): Promise { + const maxFiles = candidate.maxFiles ?? 1; + + // If the pattern is a shell command (starts with a known command prefix), use it directly + if ( + candidate.pattern.startsWith('git ') || + candidate.pattern.startsWith('npm ') || + candidate.pattern.startsWith('gh ') || + candidate.pattern.startsWith('cat ') + ) { + return [ + { + stepName: candidate.stepName, + command: candidate.command ?? candidate.pattern, + }, + ]; + } + + // If the pattern is a literal path, check existence + if (!candidate.pattern.includes('*') && !candidate.pattern.includes('{')) { + const fullPath = path.join(cwd, candidate.pattern); + if (existsSync(fullPath)) { + return [ + { + stepName: candidate.stepName, + command: candidate.command ?? `cat ${candidate.pattern}`, + }, + ]; + } + return []; + } + + // Glob expansion + const matches = await globFiles(candidate.pattern, cwd); + return matches.slice(0, maxFiles).map((filePath, idx) => ({ + stepName: maxFiles > 1 ? `${candidate.stepName}-${idx}` : candidate.stepName, + command: candidate.command ?? `cat ${filePath}`, + })); +} +``` + +### `inferFallbackContext(cwd: string): Promise` + +```ts +async function inferFallbackContext(cwd: string): Promise { + const fallbackFiles = ['package.json', 'tsconfig.json', 'README.md']; + const results: ContextFileSpec[] = []; + + for (const file of fallbackFiles) { + if (existsSync(path.join(cwd, file))) { + results.push({ + stepName: `read-${file.replace(/\./g, '-')}`, + command: `cat ${file}`, + }); + } + } + + return results; +} +``` + +--- + +## ResolvePersonaByIdOrIntent Integration + +### `buildWorkflowInput(flags: AgentModeFlags, cwd: string): Promise` + +Bridges CLI flags to the `WorkflowGeneratorInput` expected by the Phase 2 generator. + +```ts +import { + resolvePersonaByIdOrIntent, + type PersonaProfile, + type PersonaResolution, + type WorkflowGeneratorInput, + type ContextFileSpec, + type VerificationSpec, +} from './persona-utils.js'; +import { inferContextFiles } from './context-heuristics.js'; + +/** + * Build a WorkflowGeneratorInput from parsed CLI flags. + * + * 1. Resolve persona via resolvePersonaByIdOrIntent() + * 2. Determine context files (explicit --context or heuristic inference) + * 3. Map --verify commands to VerificationSpec[] + * 4. Assemble the complete input + */ +export async function buildWorkflowInput( + flags: AgentModeFlags, + cwd: string +): Promise { + // Step 1: Resolve persona + const profileHint: PersonaProfile | undefined = flags.profile + ? { id: flags.profile, name: flags.profile } + : undefined; + + const selection: PersonaResolution = resolvePersonaByIdOrIntent(flags.agentRef, profileHint); + + // Step 2: Determine context files + let contextFiles: ContextFileSpec[]; + if (flags.contextPaths.length > 0) { + // Explicit context: each --context path becomes a context file spec + contextFiles = flags.contextPaths.map((filePath, idx) => ({ + stepName: `read-context-${idx}`, + command: `cat ${filePath}`, + })); + } else { + // Heuristic inference based on intent + contextFiles = await inferContextFiles(selection.intent, cwd); + } + + // Step 3: Map --verify commands to VerificationSpec[] + const verifications: VerificationSpec[] = flags.verifyCommands.map((cmd, idx) => ({ + stepName: `verify-${idx}`, + command: cmd, + })); + + // Step 4: Assemble workflow name + const workflowName = slugify(flags.taskDescription); + + // Step 5: Build complete input + return { + taskDescription: flags.taskDescription, + workflowName, + persona: selection.persona ?? { + id: selection.intent, + name: selection.intent, + intent: selection.intent, + preset: selection.preset, + pattern: selection.pattern, + }, + selection, + skillPlan: { installs: [] }, // Skills are resolved in a future phase + contextFiles, + verifications, + outputFile: flags.outputPath, + maxConcurrency: flags.concurrency, + timeout: flags.timeout, + }; +} + +function slugify(text: string): string { + return text + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, 60); +} +``` + +--- + +## Agent Mode Execution Flow + +### Updated `main()` in `cli.ts` + +The following shows the agent-mode branch inserted into the existing `main()` function. The YAML-mode codepath is unchanged. + +```ts +async function main(): Promise { + const args = process.argv.slice(2); + + if (args.length === 0 || args.includes('--help')) { + printUsage(); + process.exit(args.includes('--help') ? 0 : 1); + } + + // ── Agent mode ──────────────────────────────────────────────────────────── + const agentFlags = parseAgentFlags(args); + if (agentFlags) { + await runAgentMode(agentFlags); + return; + } + + // ── Resume mode (unchanged) ─────────────────────────────────────────────── + // ... existing resume code ... + + // ── Normal / validate / dry-run mode (unchanged) ────────────────────────── + // ... existing YAML code ... +} +``` + +### `runAgentMode(flags: AgentModeFlags): Promise` + +```ts +import { generateWorkflow } from './workflow-generator.js'; +import { writeFile } from 'node:fs/promises'; + +async function runAgentMode(flags: AgentModeFlags): Promise { + const cwd = process.cwd(); + + // Step 1: Build workflow input from CLI flags + console.log(chalk.dim(`Resolving persona: ${flags.agentRef}...`)); + const input = await buildWorkflowInput(flags, cwd); + + // Step 2: Log resolution result + const resLabel = input.selection.resolved + ? chalk.green(`resolved → ${input.selection.intent}`) + : chalk.yellow(`derived → ${input.selection.intent}`); + console.log( + chalk.dim('Persona:'), + resLabel, + chalk.dim(`(preset: ${input.selection.preset}, pattern: ${input.selection.pattern})`) + ); + console.log( + chalk.dim(`Context files: ${input.contextFiles.length}`), + chalk.dim(`| Verifications: ${input.verifications.length}`) + ); + + // Step 3: Generate workflow + const generated = generateWorkflow(input); + + // Step 4: Handle --output (write generated source to file) + if (flags.outputPath) { + await writeFile(flags.outputPath, generated.source, 'utf-8'); + console.log(chalk.dim(`Generated workflow written to: ${flags.outputPath}`)); + } + + // Step 5: Handle --dry-run + if (flags.dryRun) { + printDryRunReport(generated, input); + process.exit(0); + } + + // Step 6: Execute the generated workflow + console.log(chalk.dim(`\nExecuting workflow: ${input.workflowName}...`)); + + // Write generated source to a temp file for WorkflowRunner + const tmpPath = path.join(cwd, '.agent-relay', `_agent-${input.workflowName}.ts`); + await writeFile(tmpPath, generated.source, 'utf-8'); + + // Use the standard WorkflowRunner execution path + const dbPath = path.join(cwd, '.agent-relay', 'workflow-runs.jsonl'); + const fileDb = new JsonFileWorkflowDb(dbPath); + const runner = new WorkflowRunner({ db: fileDb }); + + // Install shutdown handler + let shuttingDown = false; + const shutdown = async (signal: string): Promise => { + if (shuttingDown) return; + shuttingDown = true; + console.log(`\n[workflow] ${signal} received — shutting down broker...`); + await runner.relay?.shutdown().catch(() => undefined); + process.exit(130); + }; + process.on('SIGINT', () => void shutdown('SIGINT')); + process.on('SIGTERM', () => void shutdown('SIGTERM')); + + // Parse the generated file as a workflow config and execute + const config = await runner.parseYamlFile(tmpPath); + const result = await runWithListr(runner, config, undefined, undefined); + + if (result.status === 'completed') { + console.log(chalk.green('\nWorkflow completed successfully.')); + process.exit(0); + } else { + console.error(chalk.red(`\nWorkflow ${result.status}${result.error ? `: ${result.error}` : ''}`)); + process.exit(1); + } +} +``` + +### `printDryRunReport(generated: GeneratedWorkflow, input: WorkflowGeneratorInput): void` + +```ts +function printDryRunReport(generated: GeneratedWorkflow, input: WorkflowGeneratorInput): void { + const { metadata } = generated; + + console.log('\n' + chalk.bold('=== Dry Run Report ===')); + console.log(chalk.dim('Workflow: '), metadata.name); + console.log(chalk.dim('Pattern: '), metadata.pattern); + console.log(chalk.dim('Preset: '), metadata.preset); + console.log(chalk.dim('Agents: '), metadata.agentCount); + console.log(chalk.dim('Total steps: '), metadata.stepCount); + console.log(chalk.dim('Est. waves: '), metadata.estimatedWaves); + console.log(chalk.dim('Concurrency: '), input.maxConcurrency); + console.log(chalk.dim('Timeout: '), `${input.timeout}ms`); + + console.log('\n' + chalk.bold('Phases:')); + console.log(chalk.dim(' Skills: '), metadata.phases.skills || 'none'); + console.log(chalk.dim(' Context: '), metadata.phases.context || 'none'); + console.log(chalk.dim(' Task: '), metadata.phases.task); + console.log(chalk.dim(' Verification: '), metadata.phases.verification || 'none'); + + if (input.contextFiles.length > 0) { + console.log('\n' + chalk.bold('Context files:')); + for (const ctx of input.contextFiles) { + console.log(chalk.dim(` ${ctx.stepName}: `) + ctx.command); + } + } + + if (input.verifications.length > 0) { + console.log('\n' + chalk.bold('Verifications:')); + for (const ver of input.verifications) { + console.log(chalk.dim(` ${ver.stepName}: `) + ver.command); + } + } + + if (input.outputFile) { + console.log('\n' + chalk.dim('Output: ') + input.outputFile); + } + + console.log('\n' + chalk.bold('Generated source:')); + console.log(chalk.dim('─'.repeat(72))); + console.log(generated.source); + console.log(chalk.dim('─'.repeat(72))); +} +``` + +--- + +## Mutual Exclusivity Validation + +Agent mode flags must not be combined with YAML mode flags. Add validation at the top of `runAgentMode()`: + +```ts +function validateFlagExclusivity(args: string[]): void { + const yamlOnlyFlags = ['--resume', '--workflow', '--start-from', '--previous-run-id', '--validate']; + const agentOnlyFlags = ['--agent', '-a', '--profile', '-p', '--tier', '-t', '--verify', '-v']; + + const hasAgent = args.some((a) => a === '--agent' || a === '-a'); + if (!hasAgent) return; + + for (const flag of yamlOnlyFlags) { + if (args.includes(flag)) { + throw new Error( + `"${flag}" cannot be used with --agent. ` + `Use either YAML mode or agent mode, not both.` + ); + } + } +} +``` + +--- + +## File Structure Changes + +### `packages/sdk/src/workflows/cli.ts` — Modifications + +``` +cli.ts (modified) + ├── Updated FLAGS_WITH_VALUES (add agent-mode flags) + ├── Updated printUsage() (add agent-mode docs) + ├── New: parseAgentFlags() + ├── New: getTaskDescriptionArg() (replaces getYamlPathArg in shared logic) + ├── New: collectRepeatable() + ├── New: getFlagValue() + ├── New: validateFlagExclusivity() + ├── New: buildWorkflowInput() + ├── New: runAgentMode() + ├── New: printDryRunReport() + ├── Modified: main() (insert agent-mode branch before resume-mode) + └── New imports: persona-utils, workflow-generator, context-heuristics +``` + +### `packages/sdk/src/workflows/context-heuristics.ts` — New File + +``` +context-heuristics.ts (new) + ├── Interface definitions (ContextHeuristic, CandidateSpec) + ├── CONTEXT_HEURISTICS constant (intent-to-candidate mapping) + ├── inferContextFiles() + ├── probeCandidate() + ├── inferFallbackContext() + └── globFiles() helper +``` + +### Imports Added to `cli.ts` + +```ts +import { + resolvePersonaByIdOrIntent, + type PersonaProfile, + type PersonaResolution, + type WorkflowGeneratorInput, + type ContextFileSpec, + type VerificationSpec, +} from './persona-utils.js'; +import { generateWorkflow, type GeneratedWorkflow } from './workflow-generator.js'; +import { inferContextFiles } from './context-heuristics.js'; +import { writeFile } from 'node:fs/promises'; +``` + +--- + +## SDK Export Changes + +Add to `packages/sdk/src/workflows/index.ts`: + +```ts +export { inferContextFiles, type ContextHeuristic, type CandidateSpec } from './context-heuristics.js'; + +export { parseAgentFlags, buildWorkflowInput, type AgentModeFlags } from './cli.js'; +``` + +--- + +## File: `packages/sdk/src/workflows/__tests__/context-heuristics.test.ts` + +### Test Structure (vitest) + +```ts +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import { inferContextFiles } from '../context-heuristics.js'; +``` + +### Test Cases + +#### `inferContextFiles` + +| Test | Setup | Input | Assertions | +| -------------------------------------- | ------------------------------- | ------------------------------------------------ | -------------------------------------------- | +| Returns fallback when intent unknown | Create `package.json` in tmpdir | `inferContextFiles('unknown-intent', tmpdir)` | Returns spec for `package.json` | +| Returns empty when no files match | Empty tmpdir | `inferContextFiles('review', tmpdir)` | `result.length === 0` | +| Finds tsconfig for architecture-plan | Create `tsconfig.json` | `inferContextFiles('architecture-plan', tmpdir)` | Result includes `read-tsconfig-json` step | +| Finds package.json for security-review | Create `package.json` | `inferContextFiles('security-review', tmpdir)` | Result includes step with `cat package.json` | +| Finds README for documentation | Create `README.md` | `inferContextFiles('documentation', tmpdir)` | Result includes step with `cat README.md` | +| Caps at 10 context files | Create 15 matching files | `inferContextFiles('review', tmpdir)` | `result.length <= 10` | +| Case-insensitive intent matching | Create `package.json` | `inferContextFiles('SECURITY-REVIEW', tmpdir)` | Same as lowercase | +| Uses git diff for review intent | Has git repo | `inferContextFiles('review', tmpdir)` | Step command includes `git diff` | + +--- + +## End-to-End Flow Example + +### Command + +```bash +relay-workflow "Review the auth middleware for OWASP vulnerabilities" \ + --agent security-review \ + --context src/middleware/auth.ts \ + --context src/middleware/auth.test.ts \ + --verify "! grep -r 'eval(' src/middleware/" \ + --output reports/security-workflow.ts +``` + +### Execution Steps + +1. **Parse flags:** + + ``` + taskDescription: "Review the auth middleware for OWASP vulnerabilities" + agentRef: "security-review" + contextPaths: ["src/middleware/auth.ts", "src/middleware/auth.test.ts"] + verifyCommands: ["! grep -r 'eval(' src/middleware/"] + outputPath: "reports/security-workflow.ts" + dryRun: false, concurrency: 4, timeout: 3600000 + ``` + +2. **Resolve persona:** + + ``` + resolvePersonaByIdOrIntent("security-review") + → { resolved: true, intent: "security-review", preset: "analyst", pattern: "dag", + persona: { id: "security-reviewer-v1", name: "Security Reviewer" } } + ``` + +3. **Build context files (explicit):** + + ``` + [ + { stepName: "read-context-0", command: "cat src/middleware/auth.ts" }, + { stepName: "read-context-1", command: "cat src/middleware/auth.test.ts" }, + ] + ``` + +4. **Build verifications:** + + ``` + [ + { stepName: "verify-0", command: "! grep -r 'eval(' src/middleware/" }, + ] + ``` + +5. **Generate workflow** via `generateWorkflow(input)` + +6. **Write to `reports/security-workflow.ts`** + +7. **Execute** via `WorkflowRunner` + +### Dry-Run Example + +```bash +relay-workflow "Write API docs for the SDK" --agent documentation --dry-run +``` + +Output: + +``` +Resolving persona: documentation... +Persona: resolved → documentation (preset: worker, pattern: pipeline) +Context files: 3 | Verifications: 0 + +=== Dry Run Report === +Workflow: write-api-docs-for-the-sdk +Pattern: pipeline +Preset: worker +Agents: 1 +Total steps: 7 +Est. waves: 6 +Concurrency: 4 +Timeout: 3600000ms + +Phases: + Skills: none + Context: 3 + Task: 3 + Verification: none + +Context files: + read-readme: cat README.md + read-index-0: cat src/index.ts + read-docs-0: cat docs/getting-started.md + +Generated source: +──────────────────────────────────────────────────────────────────────── +/** + * Auto-generated workflow: write-api-docs-for-the-sdk + * Persona: Documentation Writer (documentation) + * Pattern: pipeline | Preset: worker + * Generated: 2026-04-10T12:00:00.000Z + */ +import { workflow } from '@agent-relay/sdk/workflows'; +... +──────────────────────────────────────────────────────────────────────── +``` + +--- + +## Implementation Notes + +1. **Backward compatible.** The `--agent` flag is entirely opt-in. Without it, the CLI behaves exactly as before. No existing flags or behaviors are changed. + +2. **Positional argument reuse.** Both YAML mode and agent mode use the first positional argument — a file path in YAML mode, a task description in agent mode. The `getYamlPathArg` helper is reused via `getTaskDescriptionArg` (identical logic, different semantics). + +3. **Context heuristics are best-effort.** `inferContextFiles()` probes the filesystem and returns only files that exist. If no heuristic matches the intent, it falls back to common project files (`package.json`, `tsconfig.json`, `README.md`). The 10-file cap prevents overwhelming the agent with context. + +4. **Generated workflow is ephemeral.** The workflow is written to `.agent-relay/_agent-{name}.ts` for execution and can be cleaned up later. The `--output` flag provides a way to persist the generated source at a user-chosen location. + +5. **No new npm dependencies.** All new code uses Node.js built-ins (`fs`, `path`), existing SDK types from Phase 1/2, and `chalk` (already a dependency). The `glob` function uses the Node.js 22+ built-in `fs.glob` or falls back to `globby` if available. + +6. **Tier flag is forward-looking.** The `--tier` flag is parsed and validated but not consumed by the workflow generator in this phase. It will be used in a future phase to select model tiers (e.g., `claude-sonnet` for standard, `claude-opus` for premium). + +7. **Validation is fail-fast.** Invalid flag combinations, missing required values, and out-of-range numbers all throw synchronous errors before any async work begins. + +8. **Pattern follows existing CLI conventions.** The flag parsing uses the same manual `args` iteration pattern as the existing CLI code (no external argument parser). This maintains consistency and avoids adding dependencies like `yargs` or `commander`. + +--- + +## Acceptance Criteria + +- [ ] `--agent` / `-a` flag activates agent mode; without it, existing YAML mode is unchanged +- [ ] `parseAgentFlags()` correctly extracts all flags including repeatable `--context` and `--verify` +- [ ] `buildWorkflowInput()` calls `resolvePersonaByIdOrIntent()` and produces a valid `WorkflowGeneratorInput` +- [ ] Explicit `--context` paths override heuristic inference +- [ ] `inferContextFiles()` returns appropriate context for all 13 intents when files exist +- [ ] `inferContextFiles()` falls back to common files for unknown intents +- [ ] `--dry-run` prints the generated workflow source and metadata without executing +- [ ] `--output` writes the generated workflow to the specified path +- [ ] `--concurrency` and `--timeout` are forwarded to `WorkflowGeneratorInput` +- [ ] `--tier` is parsed and validated (`'standard'` | `'premium'`) +- [ ] `--profile` is passed as a disambiguation hint to `resolvePersonaByIdOrIntent()` +- [ ] Agent-mode flags and YAML-mode flags are mutually exclusive (error on mix) +- [ ] Short flags (`-a`, `-p`, `-t`, `-d`, `-c`, `-v`, `-o`) work correctly +- [ ] Invalid flag values produce clear error messages (not stack traces) +- [ ] Context file heuristics cap at 10 files +- [ ] Generated workflow executes successfully via `WorkflowRunner` +- [ ] All tests pass via `vitest` +- [ ] No new external npm dependencies introduced +- [ ] Updated `--help` output documents all new flags with examples diff --git a/build-plans/03-cli-agent-flag.ts b/build-plans/03-cli-agent-flag.ts new file mode 100644 index 000000000..85d9e1b05 --- /dev/null +++ b/build-plans/03-cli-agent-flag.ts @@ -0,0 +1,429 @@ +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { workflow } = require('@agent-relay/sdk/workflows'); + +const REPO_ROOT = '/Users/khaliqgant/Projects/AgentWorkforce/relay-workflows'; +const SPEC_PATH = 'workflows/meta-agent-flag/03-cli-agent-flag.spec.md'; +const SDK_WORKFLOWS_DIR = 'packages/sdk/src/workflows'; +const CLI_PATH = `${SDK_WORKFLOWS_DIR}/cli.ts`; +const CONTEXT_HEURISTICS_PATH = `${SDK_WORKFLOWS_DIR}/context-heuristics.ts`; +const CONTEXT_HEURISTICS_TEST_PATH = `${SDK_WORKFLOWS_DIR}/__tests__/context-heuristics.test.ts`; +const PERSONA_UTILS_PATH = `${SDK_WORKFLOWS_DIR}/persona-utils.ts`; +const WORKFLOW_GENERATOR_PATH = `${SDK_WORKFLOWS_DIR}/workflow-generator.ts`; +const WORKFLOWS_INDEX_PATH = `${SDK_WORKFLOWS_DIR}/index.ts`; +const RUNNER_PATH = `${SDK_WORKFLOWS_DIR}/runner.ts`; +const RUN_HELPER_PATH = `${SDK_WORKFLOWS_DIR}/run.ts`; + +async function main() { + const wf = workflow('phase-3-cli-agent-flag') + .description('Implement CLI --agent mode, context heuristics, exports, and tests from the Phase 3 spec') + .pattern('dag') + .channel('wf-phase-3-cli-agent-flag') + .maxConcurrency(4) + .timeout(3_600_000) + .agent('cli-implementer', { + cli: 'codex', + preset: 'worker', + role: 'Focused TypeScript SDK CLI implementer for relay-workflow agent mode', + retries: 2, + }) + .agent('context-implementer', { + cli: 'codex', + preset: 'worker', + role: 'Focused TypeScript SDK implementer for context inference utilities', + retries: 2, + }) + .agent('test-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused vitest test author for SDK workflow context heuristics', + retries: 2, + }) + .agent('sdk-reviewer', { + cli: 'codex', + preset: 'reviewer', + role: 'Reviews CLI agent-mode implementation for spec conformance and regression risk', + retries: 1, + }); + + wf.step('guard-not-main', { + type: 'deterministic', + command: [ + 'branch="$(git branch --show-current)"', + 'if [ "$branch" = "main" ]; then echo "Refusing to run Phase 3 workflow on main"; exit 1; fi', + 'echo "Running on branch: ${branch:-detached}"', + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-spec', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${SPEC_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-cli', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `sed -n '1,560p' ${CLI_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-phase-1-api', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [`test -f ${PERSONA_UTILS_PATH}`, `sed -n '1,280p' ${PERSONA_UTILS_PATH}`].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-phase-2-api', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [`test -f ${WORKFLOW_GENERATOR_PATH}`, `sed -n '1,320p' ${WORKFLOW_GENERATOR_PATH}`].join( + ' && ' + ), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-workflows-index', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${WORKFLOWS_INDEX_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-runner-execution-path', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [`sed -n '1688,1748p' ${RUNNER_PATH}`, `sed -n '1,140p' ${RUN_HELPER_PATH}`].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-test-patterns', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/__tests__/template-resolver.test.ts`, + `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/__tests__/verification.test.ts`, + `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/__tests__/cli-session-collector.test.ts`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('implement-context-heuristics', { + agent: 'context-implementer', + dependsOn: ['read-spec', 'read-phase-1-api'], + task: ` +You are implementing Phase 3 context inference for the --agent flag feature. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Other workflow workers may edit ${CLI_PATH}, ${WORKFLOWS_INDEX_PATH}, and tests. Own only ${CONTEXT_HEURISTICS_PATH}; do not revert or rewrite files outside that scope. + +Create ${CONTEXT_HEURISTICS_PATH} from the Phase 3 spec. + +Spec: +{{steps.read-spec.output}} + +Phase 1 persona utility API: +{{steps.read-phase-1-api.output}} + +Requirements: +1. Export ContextHeuristic, CandidateSpec, CONTEXT_HEURISTICS, and inferContextFiles(). +2. Import ContextFileSpec as a type from './persona-utils.js'. +3. Implement all 13 intent mappings from the spec: review, security-review, architecture-plan, requirements-analysis, debugging, documentation, verification, test-strategy, tdd-enforcement, flake-investigation, npm-provenance, implement-frontend, and code-gen. +4. Implement literal file probing, command passthrough for git/npm/gh/cat candidates, glob probing, fallback context for package.json/tsconfig.json/README.md, case-insensitive intent matching, deterministic priority ordering, and the 10-file cap. +5. Do not add npm dependencies. If Node type definitions do not expose fs.promises.glob cleanly, implement globFiles() with Node built-ins such as readdir/stat instead of adding globby. +6. Keep commands relative to cwd and avoid shelling out during inference except by returning command strings as ContextFileSpec entries. +7. Use concise JSDoc for exported APIs and preserve the SDK style with .js import extensions. + +Only edit ${CONTEXT_HEURISTICS_PATH}. End your output with CONTEXT_HEURISTICS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'CONTEXT_HEURISTICS_DONE' }, + retries: 2, + }); + + wf.step('verify-context-heuristics-file', { + type: 'deterministic', + dependsOn: ['implement-context-heuristics'], + command: [ + `test -f ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "export interface ContextHeuristic" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "export interface CandidateSpec" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "export const CONTEXT_HEURISTICS" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "export async function inferContextFiles" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "security-review" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "architecture-plan" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "flake-investigation" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "npm-provenance" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "implement-frontend" ${CONTEXT_HEURISTICS_PATH}`, + `grep -q "code-gen" ${CONTEXT_HEURISTICS_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('implement-cli-agent-mode', { + agent: 'cli-implementer', + dependsOn: [ + 'read-spec', + 'read-cli', + 'read-phase-1-api', + 'read-phase-2-api', + 'read-runner-execution-path', + ], + task: ` +You are implementing Phase 3 CLI --agent mode. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Other workflow workers may edit ${CONTEXT_HEURISTICS_PATH}, ${WORKFLOWS_INDEX_PATH}, and tests. Own only ${CLI_PATH}; do not revert or rewrite files outside that scope. + +Update ${CLI_PATH} from the Phase 3 spec. + +Spec: +{{steps.read-spec.output}} + +Current CLI: +{{steps.read-cli.output}} + +Phase 1 persona utility API: +{{steps.read-phase-1-api.output}} + +Phase 2 workflow generator API: +{{steps.read-phase-2-api.output}} + +Current runner/run execution path: +{{steps.read-runner-execution-path.output}} + +Requirements: +1. Add all Phase 3 imports, including resolvePersonaByIdOrIntent(), generateWorkflow(), inferContextFiles(), writeFile(), and mkdir() as needed. +2. Update help output exactly for YAML mode, agent mode, and examples. Preserve existing YAML/resume behavior. +3. Update FLAGS_WITH_VALUES for --agent/-a, --profile/-p, --tier/-t, --context/-c, --verify/-v, --output/-o, --concurrency, and --timeout. +4. Export AgentModeFlags, parseAgentFlags(), and buildWorkflowInput(). Add getTaskDescriptionArg(), collectRepeatable(), getFlagValue(), validateFlagExclusivity(), runAgentMode(), printDryRunReport(), and a local slugify helper if needed. +5. Parse repeatable --context and --verify flags, validate tier/concurrency/timeout, support short flags, and throw clear Error messages for invalid values. +6. Validate agent-mode/YAML-mode mutual exclusivity before doing async work. Existing --resume, --workflow, --start-from, --previous-run-id, and --validate must error when combined with --agent. +7. buildWorkflowInput() must call resolvePersonaByIdOrIntent(), pass --profile as a PersonaProfile hint, use explicit --context paths when supplied, otherwise call inferContextFiles(selection.intent, cwd), map --verify commands, and forward outputPath/concurrency/timeout. +8. runAgentMode() must generate a workflow, write --output when supplied, print the dry-run report and avoid execution in --dry-run mode, and execute generated workflows in a way that works with the current runner/generator APIs. If generated.source is TypeScript, do not parse it as YAML unless the runner supports that shape; execute through the existing runnable workflow path while preserving WorkflowRunner-backed execution. +9. Ensure .agent-relay is created before writing any generated temporary workflow file. +10. Make ${CLI_PATH} import-safe before ${WORKFLOWS_INDEX_PATH} exports helpers from it: main() must only run when cli.ts is executed as the entry point, not when imported by the SDK index. +11. Keep YAML-mode DRY_RUN env var behavior unchanged. +12. Do not add npm dependencies and do not edit files outside ${CLI_PATH}. + +Only edit ${CLI_PATH}. End your output with CLI_AGENT_MODE_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'CLI_AGENT_MODE_DONE' }, + retries: 2, + }); + + wf.step('verify-cli-agent-mode', { + type: 'deterministic', + dependsOn: ['implement-cli-agent-mode'], + command: [ + `grep -q -- "relay-workflow \\"\\" --agent " ${CLI_PATH}`, + `grep -q -- "parseAgentFlags" ${CLI_PATH}`, + `grep -q -- "buildWorkflowInput" ${CLI_PATH}`, + `grep -q -- "runAgentMode" ${CLI_PATH}`, + `grep -q -- "printDryRunReport" ${CLI_PATH}`, + `grep -q -- "validateFlagExclusivity" ${CLI_PATH}`, + `grep -q -- "resolvePersonaByIdOrIntent" ${CLI_PATH}`, + `grep -q -- "generateWorkflow" ${CLI_PATH}`, + `grep -q -- "inferContextFiles" ${CLI_PATH}`, + `grep -q -- "--agent, -a" ${CLI_PATH}`, + `grep -q -- "--dry-run" ${CLI_PATH}`, + `grep -q -- "fileURLToPath(import.meta.url)" ${CLI_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('export-phase-3-api', { + agent: 'cli-implementer', + dependsOn: [ + 'read-spec', + 'read-workflows-index', + 'verify-context-heuristics-file', + 'verify-cli-agent-mode', + ], + task: ` +You are exporting Phase 3 SDK workflow APIs. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Other workflow workers may edit tests. Own only ${WORKFLOWS_INDEX_PATH}; do not revert or rewrite files outside that scope. + +Update ${WORKFLOWS_INDEX_PATH} from the Phase 3 spec. + +Spec: +{{steps.read-spec.output}} + +Current index: +{{steps.read-workflows-index.output}} + +Requirements: +1. Preserve every existing export in ${WORKFLOWS_INDEX_PATH}. +2. Add explicit exports for inferContextFiles, ContextHeuristic, and CandidateSpec from './context-heuristics.js'. +3. Add explicit exports for parseAgentFlags, buildWorkflowInput, and AgentModeFlags from './cli.js'. +4. Do not use export-star for cli.ts. Keep the export list narrow so importing the SDK does not expose main(). +5. Do not edit files outside ${WORKFLOWS_INDEX_PATH}. + +Only edit ${WORKFLOWS_INDEX_PATH}. End your output with PHASE_3_EXPORT_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'PHASE_3_EXPORT_DONE' }, + retries: 2, + }); + + wf.step('verify-index-export', { + type: 'deterministic', + dependsOn: ['export-phase-3-api'], + command: [ + `grep -q "context-heuristics.js" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "inferContextFiles" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "ContextHeuristic" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "CandidateSpec" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "cli.js" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "parseAgentFlags" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "buildWorkflowInput" ${WORKFLOWS_INDEX_PATH}`, + `grep -q "AgentModeFlags" ${WORKFLOWS_INDEX_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('write-context-heuristics-tests', { + agent: 'test-writer', + dependsOn: ['read-spec', 'read-test-patterns', 'verify-context-heuristics-file'], + task: ` +You are writing Phase 3 context heuristic tests. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Other workflow workers may edit ${CLI_PATH}, ${CONTEXT_HEURISTICS_PATH}, and ${WORKFLOWS_INDEX_PATH}. Own only ${CONTEXT_HEURISTICS_TEST_PATH}; do not revert or rewrite files outside that scope. + +Create ${CONTEXT_HEURISTICS_TEST_PATH} with focused vitest coverage. + +Spec: +{{steps.read-spec.output}} + +Existing test style examples: +{{steps.read-test-patterns.output}} + +Requirements: +1. Import describe, it, expect, beforeEach, and afterEach from vitest. +2. Use mkdtempSync, writeFileSync, mkdirSync, rmSync, path, and os for isolated temp projects. +3. Import inferContextFiles from '../context-heuristics.js'. +4. Cover the spec cases: fallback for unknown intent, empty result for review with no matches, tsconfig for architecture-plan, package.json for security-review, README for documentation, cap at 10 files, case-insensitive matching, and review intent git diff command. +5. Add at least one assertion that repeat matching files produce deterministic step names and cat commands. +6. Keep tests deterministic and avoid requiring a real network, GitHub CLI, or external dependencies. + +Only edit ${CONTEXT_HEURISTICS_TEST_PATH}. End your output with CONTEXT_HEURISTICS_TESTS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'CONTEXT_HEURISTICS_TESTS_DONE' }, + retries: 2, + }); + + wf.step('verify-context-heuristics-test-file', { + type: 'deterministic', + dependsOn: ['write-context-heuristics-tests'], + command: [ + `test -f ${CONTEXT_HEURISTICS_TEST_PATH}`, + `grep -q "inferContextFiles" ${CONTEXT_HEURISTICS_TEST_PATH}`, + `grep -q "mkdtempSync" ${CONTEXT_HEURISTICS_TEST_PATH}`, + `grep -q "architecture-plan" ${CONTEXT_HEURISTICS_TEST_PATH}`, + `grep -q "security-review" ${CONTEXT_HEURISTICS_TEST_PATH}`, + `grep -q "documentation" ${CONTEXT_HEURISTICS_TEST_PATH}`, + `grep -q "SECURITY-REVIEW" ${CONTEXT_HEURISTICS_TEST_PATH}`, + `grep -q "git diff" ${CONTEXT_HEURISTICS_TEST_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('run-context-heuristics-tests', { + type: 'deterministic', + dependsOn: ['verify-context-heuristics-test-file'], + command: `npx vitest run ${CONTEXT_HEURISTICS_TEST_PATH} --reporter=verbose`, + captureOutput: true, + failOnError: true, + }); + + wf.step('typecheck-sdk', { + type: 'deterministic', + dependsOn: ['verify-index-export', 'run-context-heuristics-tests'], + command: 'cd packages/sdk && npx tsc -p tsconfig.json --noEmit', + captureOutput: true, + failOnError: true, + }); + + wf.step('review-phase-3', { + agent: 'sdk-reviewer', + dependsOn: [ + 'read-spec', + 'verify-cli-agent-mode', + 'verify-index-export', + 'run-context-heuristics-tests', + 'typecheck-sdk', + ], + task: ` +Review the Phase 3 CLI --agent integration. Do not edit files. Do not use Relaycast MCP tools or spawn sub-agents. + +Spec: +{{steps.read-spec.output}} + +Check: +1. ${CLI_PATH} preserves YAML mode, resume mode, validate mode, and existing DRY_RUN env var behavior when --agent is absent. +2. Agent mode activates only on --agent/-a and rejects YAML-only flags with clear messages. +3. parseAgentFlags() handles all long and short flags, repeatable --context/--verify, defaults, and invalid tier/concurrency/timeout values. +4. buildWorkflowInput() calls resolvePersonaByIdOrIntent(), applies profile hints, uses explicit context over heuristics, maps verifications, slugifies workflow names, and forwards output/concurrency/timeout. +5. runAgentMode() writes --output, supports --dry-run without execution, creates .agent-relay before temp writes, and uses an execution path compatible with generated workflow source. +6. ${CLI_PATH} is import-safe before ${WORKFLOWS_INDEX_PATH} exports parseAgentFlags() and buildWorkflowInput(). +7. ${CONTEXT_HEURISTICS_PATH} implements every intent mapping, fallback, command candidates, literal and glob probing, case-insensitive matching, deterministic ordering, and the 10-file cap without new dependencies. +8. ${CONTEXT_HEURISTICS_TEST_PATH} covers the required behavior and the focused vitest run passed: +{{steps.run-context-heuristics-tests.output}} +9. SDK typecheck passed: +{{steps.typecheck-sdk.output}} + +Output REVIEW_PASS if the implementation is acceptable; otherwise output REVIEW_FAIL with concrete blockers. +`.trim(), + verification: { type: 'output_contains', value: 'REVIEW_PASS' }, + retries: 1, + }); + + wf.step('summarize-artifacts', { + type: 'deterministic', + dependsOn: ['review-phase-3'], + command: [ + `echo "Phase 3 CLI agent flag workflow completed."`, + `echo "Artifacts:"`, + `echo "- ${CLI_PATH}"`, + `echo "- ${CONTEXT_HEURISTICS_PATH}"`, + `echo "- ${CONTEXT_HEURISTICS_TEST_PATH}"`, + `echo "- ${WORKFLOWS_INDEX_PATH}"`, + `git diff -- ${CLI_PATH} ${CONTEXT_HEURISTICS_PATH} ${CONTEXT_HEURISTICS_TEST_PATH} ${WORKFLOWS_INDEX_PATH} | sed -n '1,320p'`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + const result = await wf.onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }).run({ + cwd: REPO_ROOT, + }); + + if ('status' in result) { + console.log(`Result: ${result.status}`); + } else { + console.log('Dry run completed.'); + return; + } + + if (result.status !== 'completed') { + process.exitCode = 1; + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/build-plans/04-tests.spec.md b/build-plans/04-tests.spec.md new file mode 100644 index 000000000..8ada9d785 --- /dev/null +++ b/build-plans/04-tests.spec.md @@ -0,0 +1,1916 @@ +# Phase 4 Specification: Unit and Integration Tests + +> Comprehensive test suite for all SDK functions introduced in Phases 1–3: persona resolution, workflow generation, and CLI `--agent` flag integration. + +**Phase:** 4 of 5 +**Dependencies:** Phase 3 (CLI integration — provides `parseAgentFlags`, `inferContextFiles`, and the full end-to-end path) +**Target files:** + +- `packages/sdk/src/workflows/__tests__/persona-utils.test.ts` (expand existing) +- `packages/sdk/src/workflows/__tests__/workflow-generator.test.ts` (expand existing) +- `packages/sdk/src/workflows/__tests__/workflow-generator.integration.test.ts` (new) + +--- + +## Goal + +Deliver a production-quality test suite that: + +1. **Exhaustively validates** `resolvePersonaByIdOrIntent()` across all 13 production persona intents and all 10 default persona IDs +2. **Covers every branch** in `derivePreset()` and `derivePattern()` — all analyst/pipeline intents plus boundary cases +3. **Tests `resolvePersonaSelection()`** as the convenience wrapper accepting `PersonaSelection` input +4. **Validates helper functions** `slugify()` and `escapeTemplateString()` for correctness and edge cases +5. **Snapshot-tests** generated workflow source against reference fixtures to catch accidental regressions +6. **Integration-tests** the full pipeline: persona resolution → workflow generation → source validation +7. **Documents** the expected behavior as executable specs that serve as living documentation + +All tests use **vitest** and run via `npx vitest run` with zero external dependencies beyond the SDK itself. + +--- + +## File 1: `packages/sdk/src/workflows/__tests__/persona-utils.test.ts` + +### Imports + +```ts +import { describe, it, expect, beforeEach } from 'vitest'; +import { + derivePreset, + derivePattern, + resolvePersonaByIdOrIntent, + resolvePersonaSelection, + isAnalystIntent, + isPipelineIntent, + resetPersonaRegistry, + initPersonaRegistry, + getPersonaIdToIntentMap, + personaRegistry, + DEFAULT_PERSONA_PROFILES, + ANALYST_INTENTS, + PIPELINE_INTENTS, + type PersonaProfile, + type PersonaSelection, + type PersonaResolution, +} from '../persona-utils.js'; +``` + +### Test Suite: `derivePreset` + +#### All 13 intents — exhaustive mapping + +```ts +describe('derivePreset', () => { + describe('analyst intents → "analyst"', () => { + it.each([ + 'review', + 'architecture-plan', + 'requirements-analysis', + 'security-review', + 'verification', + 'test-strategy', + ])('returns "analyst" for intent "%s"', (intent) => { + expect(derivePreset(intent)).toBe('analyst'); + }); + }); + + describe('worker intents → "worker"', () => { + it.each([ + 'implement-frontend', + 'debugging', + 'documentation', + 'tdd-enforcement', + 'flake-investigation', + 'opencode-workflow-correctness', + 'npm-provenance', + ])('returns "worker" for intent "%s"', (intent) => { + expect(derivePreset(intent)).toBe('worker'); + }); + }); + + describe('case insensitivity', () => { + it.each([ + ['REVIEW', 'analyst'], + ['Security-Review', 'analyst'], + ['Architecture-Plan', 'analyst'], + ['REQUIREMENTS-ANALYSIS', 'analyst'], + [' review ', 'analyst'], + ['DEBUGGING', 'worker'], + ['Documentation', 'worker'], + ])('derivePreset("%s") → "%s"', (input, expected) => { + expect(derivePreset(input)).toBe(expected); + }); + }); + + describe('unknown / edge-case intents', () => { + it.each([ + ['unknown-intent', 'worker'], + ['', 'worker'], + ['code-gen', 'worker'], + ['refactor', 'worker'], + ['deploy', 'worker'], + ['review-extended', 'worker'], // partial match should NOT match + ['security', 'worker'], // substring should NOT match + ])('derivePreset("%s") → "%s"', (input, expected) => { + expect(derivePreset(input)).toBe(expected); + }); + }); +}); +``` + +### Test Suite: `derivePattern` + +#### All 13 intents — exhaustive mapping + +```ts +describe('derivePattern', () => { + describe('pipeline intents → "pipeline"', () => { + it.each(['requirements-analysis', 'documentation', 'tdd-enforcement'])( + 'returns "pipeline" for intent "%s"', + (intent) => { + expect(derivePattern(intent)).toBe('pipeline'); + } + ); + }); + + describe('DAG intents → "dag"', () => { + it.each([ + 'implement-frontend', + 'review', + 'architecture-plan', + 'debugging', + 'security-review', + 'verification', + 'test-strategy', + 'flake-investigation', + 'opencode-workflow-correctness', + 'npm-provenance', + ])('returns "dag" for intent "%s"', (intent) => { + expect(derivePattern(intent)).toBe('dag'); + }); + }); + + describe('case insensitivity', () => { + it.each([ + ['REQUIREMENTS-ANALYSIS', 'pipeline'], + ['Documentation', 'pipeline'], + ['TDD-Enforcement', 'pipeline'], + [' documentation ', 'pipeline'], + ['REVIEW', 'dag'], + ])('derivePattern("%s") → "%s"', (input, expected) => { + expect(derivePattern(input)).toBe(expected); + }); + }); + + describe('unknown / edge-case intents', () => { + it.each([ + ['unknown-intent', 'dag'], + ['', 'dag'], + ['code-gen', 'dag'], + ['documentation-extended', 'dag'], // partial match should NOT match + ])('derivePattern("%s") → "%s"', (input, expected) => { + expect(derivePattern(input)).toBe(expected); + }); + }); +}); +``` + +### Test Suite: `isAnalystIntent` / `isPipelineIntent` + +```ts +describe('isAnalystIntent', () => { + it.each([...ANALYST_INTENTS])('returns true for "%s"', (intent) => { + expect(isAnalystIntent(intent)).toBe(true); + }); + + it.each(['debugging', 'documentation', 'code-gen', 'unknown'])('returns false for "%s"', (intent) => { + expect(isAnalystIntent(intent)).toBe(false); + }); +}); + +describe('isPipelineIntent', () => { + it.each([...PIPELINE_INTENTS])('returns true for "%s"', (intent) => { + expect(isPipelineIntent(intent)).toBe(true); + }); + + it.each(['review', 'debugging', 'code-gen', 'unknown'])('returns false for "%s"', (intent) => { + expect(isPipelineIntent(intent)).toBe(false); + }); +}); +``` + +### Test Suite: `resolvePersonaByIdOrIntent` + +#### Registry setup + +```ts +describe('resolvePersonaByIdOrIntent', () => { + beforeEach(() => { + resetPersonaRegistry(); + initPersonaRegistry(DEFAULT_PERSONA_PROFILES); + }); +``` + +#### Intent resolution — all 13 production intents + +The 13 production intents split into two groups: 9 intents that have a matching persona in the default 10-profile registry, and 4 intents that fall through to derivation (no default persona registered). + +```ts +describe('intent resolution (resolutionType: "intent")', () => { + it.each([ + ['review', 'reviewer-v1', 'analyst', 'dag'], + ['architecture-plan', 'architect-v1', 'analyst', 'dag'], + ['requirements-analysis', 'requirements-analyst-v1', 'analyst', 'pipeline'], + ['security-review', 'security-reviewer-v1', 'analyst', 'dag'], + ['verification', 'verifier-v1', 'analyst', 'dag'], + ['test-strategy', 'test-strategist-v1', 'analyst', 'dag'], + ['documentation', 'docs-writer-v1', 'worker', 'pipeline'], + ['tdd-enforcement', 'tdd-coach-v1', 'worker', 'pipeline'], + ['code-gen', 'code-worker-v1', 'worker', 'dag'], + ] as const)( + 'resolves intent "%s" → persona "%s", preset "%s", pattern "%s"', + (intent, expectedPersonaId, expectedPreset, expectedPattern) => { + const result = resolvePersonaByIdOrIntent(intent); + expect(result.resolved).toBe(true); + expect(result.resolutionType).toBe('intent'); + expect(result.persona?.id).toBe(expectedPersonaId); + expect(result.intent).toBe(intent); + expect(result.preset).toBe(expectedPreset); + expect(result.pattern).toBe(expectedPattern); + } + ); + + // Intents without a default persona in the 10-profile registry + // These fall through to derivation + it.each([ + ['implement-frontend', 'worker', 'dag'], + ['debugging', 'worker', 'dag'], + ['flake-investigation', 'worker', 'dag'], + ['opencode-workflow-correctness', 'worker', 'dag'], + ['npm-provenance', 'worker', 'dag'], + ] as const)( + 'derives intent "%s" → preset "%s", pattern "%s" (no persona in default registry)', + (intent, expectedPreset, expectedPattern) => { + const result = resolvePersonaByIdOrIntent(intent); + // These intents have no matching persona in the 10-entry default registry + // so they resolve via derivation + expect(result.resolved).toBe(false); + expect(result.resolutionType).toBe('derived'); + expect(result.intent).toBe(intent); + expect(result.preset).toBe(expectedPreset); + expect(result.pattern).toBe(expectedPattern); + } + ); +}); +``` + +#### Persona ID resolution — all 10 default profiles + +```ts +describe('persona ID resolution (resolutionType: "persona_id")', () => { + it.each([ + ['reviewer-v1', 'review', 'analyst', 'dag'], + ['reviewer-v2', 'review', 'analyst', 'dag'], + ['architect-v1', 'architecture-plan', 'analyst', 'dag'], + ['requirements-analyst-v1', 'requirements-analysis', 'analyst', 'pipeline'], + ['security-reviewer-v1', 'security-review', 'analyst', 'dag'], + ['verifier-v1', 'verification', 'analyst', 'dag'], + ['test-strategist-v1', 'test-strategy', 'analyst', 'dag'], + ['docs-writer-v1', 'documentation', 'worker', 'pipeline'], + ['tdd-coach-v1', 'tdd-enforcement', 'worker', 'pipeline'], + ['code-worker-v1', 'code-gen', 'worker', 'dag'], + ] as const)( + 'resolves persona ID "%s" → intent "%s", preset "%s", pattern "%s"', + (personaId, expectedIntent, expectedPreset, expectedPattern) => { + const result = resolvePersonaByIdOrIntent(personaId); + expect(result.resolved).toBe(true); + // May be 'intent' or 'persona_id' depending on whether the intent + // also matches — both are valid resolved states + expect(['intent', 'persona_id']).toContain(result.resolutionType); + expect(result.persona?.id).toBe(personaId); + expect(result.intent).toBe(expectedIntent); + expect(result.preset).toBe(expectedPreset); + expect(result.pattern).toBe(expectedPattern); + } + ); +}); +``` + +#### Fallback derivation + +```ts +describe('fallback derivation (resolutionType: "derived")', () => { + it('returns derived resolution for unknown ref', () => { + const result = resolvePersonaByIdOrIntent('unknown-persona'); + expect(result.resolved).toBe(false); + expect(result.resolutionType).toBe('derived'); + expect(result.persona).toBeUndefined(); + expect(result.intent).toBe('unknown-persona'); + expect(result.preset).toBe('worker'); + expect(result.pattern).toBe('dag'); + }); + + it('returns derived resolution for empty string', () => { + const result = resolvePersonaByIdOrIntent(''); + expect(result.resolved).toBe(false); + expect(result.resolutionType).toBe('derived'); + expect(result.preset).toBe('worker'); + expect(result.pattern).toBe('dag'); + }); + + it('does not throw for any input', () => { + const inputs = ['unknown', '', ' ', 'null', 'undefined', '123', 'a'.repeat(1000), 'review/security']; + for (const input of inputs) { + expect(() => resolvePersonaByIdOrIntent(input)).not.toThrow(); + } + }); +}); +``` + +#### Profile hint disambiguation + +```ts +describe('profile hint disambiguation', () => { + it('selects reviewer-v2 over reviewer-v1 when profile hint matches', () => { + const profile: PersonaProfile = { + id: 'reviewer-v2', + name: 'Senior Reviewer', + intent: 'review', + }; + const result = resolvePersonaByIdOrIntent('review', profile); + expect(result.resolved).toBe(true); + expect(result.persona?.id).toBe('reviewer-v2'); + }); + + it('falls back to first match when profile hint does not match', () => { + const profile: PersonaProfile = { + id: 'nonexistent-v1', + name: 'Ghost', + intent: 'review', + }; + const result = resolvePersonaByIdOrIntent('review', profile); + expect(result.resolved).toBe(true); + expect(result.persona?.id).toBe('reviewer-v1'); + }); + + it('ignores profile hint for persona ID resolution', () => { + const profile: PersonaProfile = { + id: 'reviewer-v2', + name: 'Senior Reviewer', + intent: 'review', + }; + // Direct persona ID lookup ignores the hint + const result = resolvePersonaByIdOrIntent('architect-v1', profile); + expect(result.persona?.id).toBe('architect-v1'); + }); +}); +``` + +#### Case handling + +```ts +describe('case insensitivity', () => { + it('resolves uppercase persona ID', () => { + const result = resolvePersonaByIdOrIntent('REVIEWER-V1'); + expect(result.resolved).toBe(true); + expect(result.persona?.id).toBe('reviewer-v1'); + }); + + it('resolves mixed-case intent', () => { + const result = resolvePersonaByIdOrIntent('Security-Review'); + expect(result.resolved).toBe(true); + expect(result.resolutionType).toBe('intent'); + expect(result.preset).toBe('analyst'); + }); + + it('resolves intent with leading/trailing whitespace', () => { + const result = resolvePersonaByIdOrIntent(' review '); + expect(result.resolved).toBe(true); + expect(result.persona?.id).toBe('reviewer-v1'); + }); +}); +``` + +#### Registry management + +```ts +describe('registry management', () => { + it('resetPersonaRegistry clears all profiles', () => { + resetPersonaRegistry(); + const result = resolvePersonaByIdOrIntent('review'); + expect(result.resolved).toBe(false); + }); + + it('initPersonaRegistry replaces all profiles', () => { + const customProfiles: PersonaProfile[] = [ + { id: 'custom-v1', name: 'Custom', intent: 'custom-intent', preset: 'worker', pattern: 'dag' }, + ]; + resetPersonaRegistry(); + initPersonaRegistry(customProfiles); + const result = resolvePersonaByIdOrIntent('custom-intent'); + expect(result.resolved).toBe(true); + expect(result.persona?.id).toBe('custom-v1'); + }); + + it('personaRegistry.register adds a new profile', () => { + personaRegistry.register({ + id: 'new-persona-v1', + name: 'New Persona', + intent: 'new-intent', + preset: 'worker', + pattern: 'dag', + }); + const result = resolvePersonaByIdOrIntent('new-intent'); + expect(result.resolved).toBe(true); + expect(result.persona?.id).toBe('new-persona-v1'); + }); + + it('getPersonaIdToIntentMap returns correct reverse mapping', () => { + const map = getPersonaIdToIntentMap(); + expect(map.get('reviewer-v1')).toBe('review'); + expect(map.get('architect-v1')).toBe('architecture-plan'); + expect(map.get('docs-writer-v1')).toBe('documentation'); + }); + + it('getPersonaIdToIntentMap contains all 10 default profiles', () => { + const map = getPersonaIdToIntentMap(); + expect(map.size).toBe(10); + }); + + it('double init does not duplicate profiles', () => { + initPersonaRegistry(DEFAULT_PERSONA_PROFILES); + initPersonaRegistry(DEFAULT_PERSONA_PROFILES); + const map = getPersonaIdToIntentMap(); + expect(map.size).toBe(10); + }); +}); +``` + +#### DEFAULT_PERSONA_PROFILES validation + +```ts +describe('DEFAULT_PERSONA_PROFILES', () => { + it('contains exactly 10 profiles', () => { + expect(DEFAULT_PERSONA_PROFILES).toHaveLength(10); + }); + + it('all profiles have required fields', () => { + for (const profile of DEFAULT_PERSONA_PROFILES) { + expect(profile.id).toBeDefined(); + expect(profile.name).toBeDefined(); + expect(profile.intent).toBeDefined(); + expect(profile.preset).toBeDefined(); + expect(profile.pattern).toBeDefined(); + } + }); + + it('all profile IDs are unique', () => { + const ids = DEFAULT_PERSONA_PROFILES.map((p) => p.id); + expect(new Set(ids).size).toBe(ids.length); + }); + + it('contains expected persona IDs', () => { + const ids = DEFAULT_PERSONA_PROFILES.map((p) => p.id); + const expected = [ + 'reviewer-v1', + 'reviewer-v2', + 'architect-v1', + 'requirements-analyst-v1', + 'security-reviewer-v1', + 'verifier-v1', + 'test-strategist-v1', + 'docs-writer-v1', + 'tdd-coach-v1', + 'code-worker-v1', + ]; + for (const id of expected) { + expect(ids).toContain(id); + } + }); + + it('all presets are valid AgentPreset values', () => { + const validPresets = ['lead', 'worker', 'reviewer', 'analyst']; + for (const profile of DEFAULT_PERSONA_PROFILES) { + expect(validPresets).toContain(profile.preset); + } + }); + + it('all patterns are valid SwarmPattern values', () => { + const validPatterns = ['dag', 'pipeline', 'fan-out', 'hub-spoke', 'mesh']; + for (const profile of DEFAULT_PERSONA_PROFILES) { + expect(validPatterns).toContain(profile.pattern); + } + }); +}); +``` + +### Test Suite: `resolvePersonaSelection` + +The convenience wrapper accepts a `PersonaSelection` object and delegates to `resolvePersonaByIdOrIntent`. + +```ts + describe('resolvePersonaSelection', () => { + it('delegates to resolvePersonaByIdOrIntent with ref', () => { + const selection: PersonaSelection = { ref: 'review' }; + const result = resolvePersonaSelection(selection); + expect(result.resolved).toBe(true); + expect(result.intent).toBe('review'); + expect(result.preset).toBe('analyst'); + }); + + it('passes profile hint through', () => { + const selection: PersonaSelection = { + ref: 'review', + profile: { id: 'reviewer-v2', name: 'Senior Reviewer', intent: 'review' }, + }; + const result = resolvePersonaSelection(selection); + expect(result.persona?.id).toBe('reviewer-v2'); + }); + + it('handles unknown ref via derivation', () => { + const selection: PersonaSelection = { ref: 'custom-task' }; + const result = resolvePersonaSelection(selection); + expect(result.resolved).toBe(false); + expect(result.resolutionType).toBe('derived'); + expect(result.preset).toBe('worker'); + expect(result.pattern).toBe('dag'); + }); + + it('passes optional context fields through', () => { + const selection: PersonaSelection = { + ref: 'security-review', + context: { workflowType: 'audit', taskType: 'compliance' }, + }; + const result = resolvePersonaSelection(selection); + expect(result.resolved).toBe(true); + expect(result.intent).toBe('security-review'); + }); + }); +}); +``` + +--- + +## File 2: `packages/sdk/src/workflows/__tests__/workflow-generator.test.ts` + +### Imports + +```ts +import { describe, it, expect } from 'vitest'; +import { + generateWorkflow, + emitBootstrapPhase, + emitSkillPhase, + emitContextPhase, + emitTaskPhase, + emitVerificationPhase, + emitFinalPhase, + slugify, + escapeTemplateString, + type GeneratedWorkflow, + type WorkflowMetadata, +} from '../workflow-generator.js'; +import type { WorkflowGeneratorInput, PersonaResolution } from '../persona-utils.js'; +``` + +### Shared Fixtures + +```ts +function createMinimalInput(overrides?: Partial): WorkflowGeneratorInput { + return { + taskDescription: 'Test task', + workflowName: 'test-task', + persona: { + id: 'code-worker-v1', + name: 'Code Worker', + intent: 'code-gen', + preset: 'worker', + pattern: 'dag', + }, + selection: { + intent: 'code-gen', + preset: 'worker', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + } as PersonaResolution, + skillPlan: { installs: [] }, + contextFiles: [], + verifications: [], + maxConcurrency: 4, + timeout: 3_600_000, + ...overrides, + }; +} + +function createFullInput(): WorkflowGeneratorInput { + return createMinimalInput({ + taskDescription: 'Review auth middleware for security issues', + workflowName: 'review-auth-middleware', + persona: { + id: 'security-reviewer-v1', + name: 'Security Reviewer', + description: 'Reviews code for security vulnerabilities', + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + }, + selection: { + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + } as PersonaResolution, + skillPlan: { + installs: [{ skillId: 'semgrep', command: 'npm install -g semgrep' }], + }, + contextFiles: [ + { stepName: 'read-auth', command: 'cat src/auth.ts' }, + { stepName: 'read-tests', command: 'cat src/auth.test.ts' }, + ], + verifications: [{ stepName: 'verify-no-eval', command: "! grep -r 'eval(' src/" }], + outputFile: 'reports/security.md', + }); +} + +function createPipelineInput(): WorkflowGeneratorInput { + return createMinimalInput({ + taskDescription: 'Analyze requirements from spec documents', + workflowName: 'analyze-requirements', + persona: { + id: 'requirements-analyst-v1', + name: 'Requirements Analyst', + description: 'Analyzes requirements from source material', + intent: 'requirements-analysis', + preset: 'analyst', + pattern: 'pipeline', + }, + selection: { + intent: 'requirements-analysis', + preset: 'analyst', + pattern: 'pipeline', + resolved: true, + resolutionType: 'intent', + } as PersonaResolution, + contextFiles: [ + { stepName: 'read-spec', command: 'cat docs/spec.md' }, + { stepName: 'read-readme', command: 'cat README.md' }, + ], + }); +} +``` + +### Test Suite: `slugify` + +```ts +describe('slugify', () => { + it('converts spaces to hyphens', () => { + expect(slugify('hello world')).toBe('hello-world'); + }); + + it('lowercases all characters', () => { + expect(slugify('Hello World')).toBe('hello-world'); + }); + + it('strips non-alphanumeric characters', () => { + expect(slugify('review: auth & tokens!')).toBe('review-auth-tokens'); + }); + + it('collapses multiple non-alphanumeric chars to single hyphen', () => { + expect(slugify('review --- auth')).toBe('review-auth'); + }); + + it('trims leading and trailing hyphens', () => { + expect(slugify('--hello--')).toBe('hello'); + }); + + it('truncates to 60 characters', () => { + const long = 'a'.repeat(80); + expect(slugify(long).length).toBeLessThanOrEqual(60); + }); + + it('returns empty string for empty input', () => { + expect(slugify('')).toBe(''); + }); + + it('handles special characters', () => { + expect(slugify('fix: bug #42 (auth)')).toBe('fix-bug-42-auth'); + }); +}); +``` + +### Test Suite: `escapeTemplateString` + +```ts +describe('escapeTemplateString', () => { + it('escapes backticks', () => { + expect(escapeTemplateString('use `code` here')).toBe('use \\`code\\` here'); + }); + + it('escapes dollar braces', () => { + expect(escapeTemplateString('echo ${HOME}')).toBe('echo \\${HOME}'); + }); + + it('escapes both backticks and dollar braces', () => { + const input = '`run ${cmd}`'; + const escaped = escapeTemplateString(input); + expect(escaped).toBe('\\`run \\${cmd}\\`'); + }); + + it('passes through plain text unchanged', () => { + expect(escapeTemplateString('hello world')).toBe('hello world'); + }); + + it('handles empty string', () => { + expect(escapeTemplateString('')).toBe(''); + }); + + it('handles consecutive dollar braces', () => { + expect(escapeTemplateString('${a}${b}')).toBe('\\${a}\\${b}'); + }); + + it('does not double-escape already escaped sequences', () => { + // Input is raw user text, not pre-escaped + const input = '\\`already escaped\\`'; + const escaped = escapeTemplateString(input); + expect(escaped).toBe('\\\\\\`already escaped\\\\\\`'); + }); +}); +``` + +### Test Suite: `generateWorkflow` + +```ts +describe('generateWorkflow', () => { + it('produces valid TypeScript structure for minimal input', () => { + const { source } = generateWorkflow(createMinimalInput()); + expect(source).toContain("import { workflow } from '@agent-relay/sdk/workflows'"); + expect(source).toContain('async function main()'); + expect(source).toContain('.run()'); + expect(source).toContain('main().catch'); + }); + + it('includes all phases for full input', () => { + const { source } = generateWorkflow(createFullInput()); + // Skill phase + expect(source).toContain('install-skill-semgrep'); + // Context phase + expect(source).toContain('read-auth'); + expect(source).toContain('read-tests'); + // Task phase + expect(source).toContain('execute-task'); + // Verification phase + expect(source).toContain('verify-no-eval'); + }); + + it('workflow name appears in workflow() call', () => { + const { source } = generateWorkflow(createMinimalInput()); + expect(source).toContain("workflow('test-task')"); + }); + + it('pattern is set from selection', () => { + const { source } = generateWorkflow(createPipelineInput()); + expect(source).toContain(".pattern('pipeline')"); + }); + + it('channel name is derived from workflow name', () => { + const { source } = generateWorkflow(createMinimalInput()); + expect(source).toContain(".channel('wf-test-task')"); + }); + + it('maxConcurrency is configurable', () => { + const { source } = generateWorkflow(createMinimalInput({ maxConcurrency: 8 })); + expect(source).toContain('.maxConcurrency(8)'); + }); + + it('timeout is configurable', () => { + const { source } = generateWorkflow(createMinimalInput({ timeout: 1_800_000 })); + expect(source).toContain('.timeout(1800000)'); + }); + + it('output file produces console.log', () => { + const { source } = generateWorkflow(createMinimalInput({ outputFile: 'out.md' })); + expect(source).toContain('out.md'); + }); + + it('no output file omits output log', () => { + const { source } = generateWorkflow(createMinimalInput()); + expect(source).not.toContain('Output:'); + }); + + it('returns GeneratedWorkflow shape with source and metadata', () => { + const result = generateWorkflow(createMinimalInput()); + expect(result).toHaveProperty('source'); + expect(result).toHaveProperty('metadata'); + expect(typeof result.source).toBe('string'); + expect(typeof result.metadata).toBe('object'); + }); + + it('metadata.name matches workflowName', () => { + const { metadata } = generateWorkflow(createMinimalInput()); + expect(metadata.name).toBe('test-task'); + }); +}); +``` + +### Test Suite: `emitBootstrapPhase` + +```ts +describe('emitBootstrapPhase', () => { + const defaultOpts = { indent: ' ', comments: true, header: true }; + + it('includes import statement', () => { + const lines = emitBootstrapPhase(createMinimalInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("import { workflow } from '@agent-relay/sdk/workflows'"); + }); + + it('includes header comment when enabled', () => { + const lines = emitBootstrapPhase(createMinimalInput(), { ...defaultOpts, header: true }); + expect(lines[0]).toMatch(/^\/\*\*/); + }); + + it('omits header comment when disabled', () => { + const lines = emitBootstrapPhase(createMinimalInput(), { ...defaultOpts, header: false }); + expect(lines[0]).not.toMatch(/^\/\*\*/); + }); + + it('agent name follows intent convention', () => { + const input = createMinimalInput({ + selection: { + intent: 'review', + preset: 'analyst', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + } as PersonaResolution, + }); + const lines = emitBootstrapPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'review-agent'"); + }); + + it('agent preset matches selection', () => { + const input = createMinimalInput({ + selection: { + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + } as PersonaResolution, + }); + const lines = emitBootstrapPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("preset: 'analyst'"); + }); + + it('agent role uses persona description when available', () => { + const input = createMinimalInput({ + persona: { + id: 'x', + name: 'X', + description: 'Custom role description', + intent: 'code-gen', + }, + }); + const lines = emitBootstrapPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("role: 'Custom role description'"); + }); + + it('agent role falls back to task description', () => { + const input = createMinimalInput({ + persona: { id: 'x', name: 'X', intent: 'code-gen' }, + }); + const lines = emitBootstrapPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("role: 'Test task'"); + }); + + it('respects indent option', () => { + const lines = emitBootstrapPhase(createMinimalInput(), { ...defaultOpts, indent: ' ' }); + const output = lines.join('\n'); + // 4-space indent should be present in the output body + expect(output).toMatch(/\n {4}\S/); + }); +}); +``` + +### Test Suite: `emitSkillPhase` + +```ts +describe('emitSkillPhase', () => { + const defaultOpts = { indent: ' ', comments: true, header: true }; + + it('returns empty array when no skills', () => { + const lines = emitSkillPhase(createMinimalInput(), defaultOpts); + expect(lines).toHaveLength(0); + }); + + it('generates step for single skill install', () => { + const input = createMinimalInput({ + skillPlan: { + installs: [{ skillId: 'eslint', command: 'npm install -g eslint' }], + }, + }); + const lines = emitSkillPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'install-skill-eslint'"); + expect(output).toContain("'npm install -g eslint'"); + }); + + it('generates parallel steps for multiple skill installs', () => { + const input = createMinimalInput({ + skillPlan: { + installs: [ + { skillId: 'eslint', command: 'npm install -g eslint' }, + { skillId: 'prettier', command: 'npm install -g prettier' }, + ], + }, + }); + const lines = emitSkillPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'install-skill-eslint'"); + expect(output).toContain("'install-skill-prettier'"); + // Steps should not depend on each other + expect(output).not.toContain("dependsOn: ['install-skill-eslint']"); + }); + + it('marks steps as deterministic', () => { + const input = createMinimalInput({ + skillPlan: { + installs: [{ skillId: 'foo', command: 'npm i foo' }], + }, + }); + const lines = emitSkillPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("type: 'deterministic'"); + }); + + it('sets failOnError: true', () => { + const input = createMinimalInput({ + skillPlan: { + installs: [{ skillId: 'foo', command: 'npm i foo' }], + }, + }); + const lines = emitSkillPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('failOnError: true'); + }); +}); +``` + +### Test Suite: `emitContextPhase` + +```ts +describe('emitContextPhase', () => { + const defaultOpts = { indent: ' ', comments: true, header: true }; + + it('returns empty array when no context files', () => { + const lines = emitContextPhase(createMinimalInput(), defaultOpts); + expect(lines).toHaveLength(0); + }); + + it('generates step for single context file', () => { + const input = createMinimalInput({ + contextFiles: [{ stepName: 'read-config', command: 'cat config.json' }], + }); + const lines = emitContextPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'read-config'"); + expect(output).toContain("'cat config.json'"); + }); + + it('sets captureOutput: true', () => { + const input = createMinimalInput({ + contextFiles: [{ stepName: 'read-it', command: 'cat file.ts' }], + }); + const lines = emitContextPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('captureOutput: true'); + }); + + it('context steps depend on skill steps when present', () => { + const input = createMinimalInput({ + skillPlan: { + installs: [{ skillId: 'tool', command: 'npm i tool' }], + }, + contextFiles: [{ stepName: 'read-it', command: 'cat file.ts' }], + }); + const lines = emitContextPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('install-skill-tool'); + }); + + it('context steps have no dependencies when no skills', () => { + const input = createMinimalInput({ + contextFiles: [{ stepName: 'read-it', command: 'cat file.ts' }], + }); + const lines = emitContextPhase(input, defaultOpts); + const output = lines.join('\n'); + // Should not contain a dependsOn referencing skill steps + expect(output).not.toContain('install-skill'); + }); + + it('context steps are independent of each other', () => { + const input = createMinimalInput({ + contextFiles: [ + { stepName: 'read-a', command: 'cat a.ts' }, + { stepName: 'read-b', command: 'cat b.ts' }, + ], + }); + const lines = emitContextPhase(input, defaultOpts); + const output = lines.join('\n'); + // Neither context step should depend on the other + expect(output).not.toContain("dependsOn: ['read-a']"); + expect(output).not.toContain("dependsOn: ['read-b']"); + }); +}); +``` + +### Test Suite: `emitTaskPhase` + +```ts +describe('emitTaskPhase', () => { + const defaultOpts = { indent: ' ', comments: true, header: true }; + + it('DAG pattern produces single execute-task step', () => { + const input = createMinimalInput({ + contextFiles: [{ stepName: 'read-it', command: 'cat f.ts' }], + }); + const lines = emitTaskPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'execute-task'"); + // Should contain exactly one step declaration + const stepMatches = output.match(/\.step\(/g); + expect(stepMatches).toHaveLength(1); + }); + + it('pipeline pattern produces sequential steps', () => { + const lines = emitTaskPhase(createPipelineInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'analyze'"); + expect(output).toContain("'synthesize'"); + expect(output).toContain("'validate'"); + // Chained dependencies + expect(output).toContain("dependsOn: ['analyze']"); + expect(output).toContain("dependsOn: ['synthesize']"); + }); + + it('task step depends on all context steps', () => { + const input = createMinimalInput({ + contextFiles: [ + { stepName: 'read-auth', command: 'cat auth.ts' }, + { stepName: 'read-tests', command: 'cat auth.test.ts' }, + ], + }); + const lines = emitTaskPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('read-auth'); + expect(output).toContain('read-tests'); + }); + + it('context outputs are interpolated into task prompt', () => { + const input = createMinimalInput({ + contextFiles: [{ stepName: 'read-auth', command: 'cat auth.ts' }], + }); + const lines = emitTaskPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('{{steps.read-auth.output}}'); + }); + + it('task description appears in agent task', () => { + const input = createMinimalInput({ taskDescription: 'Fix the login bug' }); + const lines = emitTaskPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('Fix the login bug'); + }); + + it('retries defaults to 2 for primary task', () => { + const lines = emitTaskPhase(createMinimalInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('retries: 2'); + }); + + it('verification defaults to exit_code', () => { + const lines = emitTaskPhase(createMinimalInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("type: 'exit_code'"); + }); + + it('pipeline first step (analyze) has context deps but not task deps', () => { + const input = createPipelineInput(); + const lines = emitTaskPhase(input, defaultOpts); + const output = lines.join('\n'); + // analyze should depend on context steps, not on other pipeline steps + expect(output).toContain('read-spec'); + expect(output).toContain('read-readme'); + }); +}); +``` + +### Test Suite: `emitVerificationPhase` + +```ts +describe('emitVerificationPhase', () => { + const defaultOpts = { indent: ' ', comments: true, header: true }; + + it('returns empty array when no verifications', () => { + const lines = emitVerificationPhase(createMinimalInput(), defaultOpts); + expect(lines).toHaveLength(0); + }); + + it('generates step for single verification', () => { + const input = createMinimalInput({ + verifications: [{ stepName: 'verify-lint', command: 'npm run lint' }], + }); + const lines = emitVerificationPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'verify-lint'"); + expect(output).toContain("'npm run lint'"); + expect(output).toContain('failOnError: true'); + }); + + it('verification steps depend on task step (DAG)', () => { + const input = createMinimalInput({ + verifications: [{ stepName: 'verify-lint', command: 'npm run lint' }], + }); + const lines = emitVerificationPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'execute-task'"); + }); + + it('verification steps depend on last pipeline step', () => { + const input = createPipelineInput(); + (input as any).verifications = [{ stepName: 'verify-out', command: 'test -f out.md' }]; + const lines = emitVerificationPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'validate'"); + }); + + it('multiple verification steps are parallel', () => { + const input = createMinimalInput({ + verifications: [ + { stepName: 'verify-lint', command: 'npm run lint' }, + { stepName: 'verify-types', command: 'npx tsc --noEmit' }, + ], + }); + const lines = emitVerificationPhase(input, defaultOpts); + const output = lines.join('\n'); + expect(output).toContain("'verify-lint'"); + expect(output).toContain("'verify-types'"); + // Neither verification step depends on the other + expect(output).not.toContain("dependsOn: ['verify-lint']"); + expect(output).not.toContain("dependsOn: ['verify-types']"); + }); +}); +``` + +### Test Suite: `emitFinalPhase` + +```ts +describe('emitFinalPhase', () => { + const defaultOpts = { indent: ' ', comments: true, header: true }; + + it('includes onError strategy', () => { + const lines = emitFinalPhase(createMinimalInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain(".onError('fail-fast')"); + }); + + it('includes run() call', () => { + const lines = emitFinalPhase(createMinimalInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('.run()'); + }); + + it('closes main function with catch handler', () => { + const lines = emitFinalPhase(createMinimalInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('main().catch'); + }); + + it('includes process.exit(1) on error', () => { + const lines = emitFinalPhase(createMinimalInput(), defaultOpts); + const output = lines.join('\n'); + expect(output).toContain('process.exit(1)'); + }); +}); +``` + +### Test Suite: `WorkflowMetadata` + +```ts +describe('WorkflowMetadata', () => { + it('step count is sum of all phases', () => { + const { metadata } = generateWorkflow(createFullInput()); + const expected = + metadata.phases.skills + + metadata.phases.context + + metadata.phases.task + + metadata.phases.verification + + metadata.phases.final; + expect(metadata.stepCount).toBe(expected); + }); + + it('agent count is 1', () => { + const { metadata } = generateWorkflow(createMinimalInput()); + expect(metadata.agentCount).toBe(1); + }); + + it('estimated waves for full DAG input', () => { + const { metadata } = generateWorkflow(createFullInput()); + // skills(1) + context(1) + task(1) + verification(1) + final(1) = 5 + expect(metadata.estimatedWaves).toBeGreaterThanOrEqual(4); + }); + + it('estimated waves for minimal input', () => { + const { metadata } = generateWorkflow(createMinimalInput()); + // task(1) + final(1) = 2 + expect(metadata.estimatedWaves).toBe(2); + }); + + it('pipeline adds extra waves vs DAG equivalent', () => { + const dagInput = createMinimalInput({ + contextFiles: [{ stepName: 'ctx', command: 'cat f.ts' }], + }); + const pipelineInput = createPipelineInput(); + const dagWaves = generateWorkflow(dagInput).metadata.estimatedWaves; + const pipelineWaves = generateWorkflow(pipelineInput).metadata.estimatedWaves; + expect(pipelineWaves).toBeGreaterThan(dagWaves); + }); + + it('hasSkills reflects skill installs', () => { + expect(generateWorkflow(createMinimalInput()).metadata.hasSkills).toBe(false); + expect(generateWorkflow(createFullInput()).metadata.hasSkills).toBe(true); + }); + + it('hasVerification reflects verification steps', () => { + expect(generateWorkflow(createMinimalInput()).metadata.hasVerification).toBe(false); + expect(generateWorkflow(createFullInput()).metadata.hasVerification).toBe(true); + }); + + it('pattern matches the selection pattern', () => { + const dagMeta = generateWorkflow(createMinimalInput()).metadata; + expect(dagMeta.pattern).toBe('dag'); + + const pipeMeta = generateWorkflow(createPipelineInput()).metadata; + expect(pipeMeta.pattern).toBe('pipeline'); + }); + + it('preset matches the selection preset', () => { + const workerMeta = generateWorkflow(createMinimalInput()).metadata; + expect(workerMeta.preset).toBe('worker'); + + const analystMeta = generateWorkflow(createFullInput()).metadata; + expect(analystMeta.preset).toBe('analyst'); + }); +}); +``` + +### Test Suite: Edge Cases + +```ts +describe('edge cases', () => { + it('backticks in task description are escaped', () => { + const input = createMinimalInput({ + taskDescription: 'Use `code` blocks and `template` strings', + }); + const { source } = generateWorkflow(input); + // The generated source should be valid — no unescaped backticks breaking template literals + expect(source).toContain('Use'); + expect(source).not.toMatch(/`code`/); // backticks should be escaped + }); + + it('dollar braces in commands are escaped', () => { + const input = createMinimalInput({ + contextFiles: [{ stepName: 'read-env', command: 'echo ${HOME}' }], + }); + const { source } = generateWorkflow(input); + // Should not produce a raw ${HOME} inside a template literal + expect(source).toContain('echo'); + }); + + it('very long task description is handled', () => { + const longDesc = 'A'.repeat(2000); + const input = createMinimalInput({ taskDescription: longDesc }); + const { source } = generateWorkflow(input); + expect(source).toContain(longDesc); + }); + + it('empty task description produces valid workflow', () => { + const input = createMinimalInput({ taskDescription: '' }); + const { source } = generateWorkflow(input); + expect(source).toContain('.run()'); + expect(source).toContain('main().catch'); + }); + + it('special characters in workflow name', () => { + const input = createMinimalInput({ workflowName: 'review-auth_v2.1' }); + const { source } = generateWorkflow(input); + expect(source).toContain("workflow('review-auth_v2.1')"); + }); + + it('newlines in task description are preserved in prompt', () => { + const input = createMinimalInput({ + taskDescription: 'Line one\nLine two\nLine three', + }); + const { source } = generateWorkflow(input); + expect(source).toContain('Line one'); + expect(source).toContain('Line two'); + }); + + it('single quotes in persona description are escaped', () => { + const input = createMinimalInput({ + persona: { + id: 'x', + name: 'X', + description: "Don't break the string", + intent: 'code-gen', + }, + }); + const { source } = generateWorkflow(input); + // Must not produce unmatched single quotes in generated source + expect(source).toContain('break the string'); + }); +}); +``` + +--- + +## File 3: `packages/sdk/src/workflows/__tests__/workflow-generator.integration.test.ts` + +### Purpose + +End-to-end integration tests that exercise the full pipeline: persona resolution → workflow generation → source validity. These tests verify that all three phases produce coherent output when composed together. + +### Imports + +```ts +import { describe, it, expect, beforeEach } from 'vitest'; +import { + resolvePersonaByIdOrIntent, + resetPersonaRegistry, + initPersonaRegistry, + DEFAULT_PERSONA_PROFILES, + type PersonaResolution, + type WorkflowGeneratorInput, +} from '../persona-utils.js'; +import { generateWorkflow, type GeneratedWorkflow } from '../workflow-generator.js'; +``` + +### Helper: Build input from resolution + +```ts +function buildInput( + resolution: PersonaResolution, + taskDescription: string, + overrides?: Partial +): WorkflowGeneratorInput { + const workflowName = taskDescription + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, 60); + + return { + taskDescription, + workflowName, + persona: resolution.persona ?? { id: 'unknown', name: 'Unknown', intent: resolution.intent }, + selection: resolution, + skillPlan: { installs: [] }, + contextFiles: [], + verifications: [], + maxConcurrency: 4, + timeout: 3_600_000, + ...overrides, + }; +} +``` + +### Integration Test Suite: Resolve → Generate round-trip + +```ts +describe('persona resolution → workflow generation (integration)', () => { + beforeEach(() => { + resetPersonaRegistry(); + initPersonaRegistry(DEFAULT_PERSONA_PROFILES); + }); + + describe('all registered intents produce valid workflows', () => { + it.each([ + ['review', 'Review the auth module'], + ['architecture-plan', 'Plan the migration architecture'], + ['requirements-analysis', 'Analyze requirements from spec'], + ['security-review', 'Audit auth middleware for vulnerabilities'], + ['verification', 'Verify deployment configuration'], + ['test-strategy', 'Design test strategy for payment module'], + ['documentation', 'Write API documentation'], + ['tdd-enforcement', 'Enforce TDD on the parser module'], + ['code-gen', 'Implement user profile service'], + ] as const)('intent "%s" with task "%s" produces runnable workflow', (intent, task) => { + const resolution = resolvePersonaByIdOrIntent(intent); + const input = buildInput(resolution, task); + const { source, metadata } = generateWorkflow(input); + + // Structure checks + expect(source).toContain('import { workflow }'); + expect(source).toContain('async function main()'); + expect(source).toContain('.run()'); + expect(source).toContain('main().catch'); + + // Persona-derived values + expect(source).toContain(`.pattern('${resolution.pattern}')`); + expect(source).toContain(`preset: '${resolution.preset}'`); + + // Metadata consistency + expect(metadata.pattern).toBe(resolution.pattern); + expect(metadata.preset).toBe(resolution.preset); + expect(metadata.agentCount).toBe(1); + expect(metadata.stepCount).toBeGreaterThan(0); + }); + }); + + describe('unregistered intents derive valid workflows', () => { + it.each([ + ['implement-frontend', 'Build new dashboard UI'], + ['debugging', 'Fix flaky WebSocket reconnect'], + ['flake-investigation', 'Investigate CI test flakes'], + ['opencode-workflow-correctness', 'Validate open-source workflow config'], + ['npm-provenance', 'Set up npm provenance attestation'], + ] as const)('derived intent "%s" with task "%s" produces valid workflow', (intent, task) => { + const resolution = resolvePersonaByIdOrIntent(intent); + expect(resolution.resolved).toBe(false); + + const input = buildInput(resolution, task); + const { source, metadata } = generateWorkflow(input); + + // Still produces valid structure + expect(source).toContain('import { workflow }'); + expect(source).toContain('.run()'); + + // Uses derived values + expect(metadata.preset).toBe('worker'); + expect(metadata.pattern).toBe('dag'); + }); + }); + + describe('all 10 persona IDs produce valid workflows', () => { + it.each([ + 'reviewer-v1', + 'reviewer-v2', + 'architect-v1', + 'requirements-analyst-v1', + 'security-reviewer-v1', + 'verifier-v1', + 'test-strategist-v1', + 'docs-writer-v1', + 'tdd-coach-v1', + 'code-worker-v1', + ])('persona ID "%s" resolves and generates a valid workflow', (personaId) => { + const resolution = resolvePersonaByIdOrIntent(personaId); + expect(resolution.resolved).toBe(true); + + const input = buildInput(resolution, `Task for ${personaId}`); + const { source, metadata } = generateWorkflow(input); + + expect(source).toContain('import { workflow }'); + expect(source).toContain('.run()'); + expect(metadata.agentCount).toBe(1); + }); + }); + + describe('cross-pattern consistency', () => { + it('DAG intents all use "dag" pattern in generated source', () => { + const dagIntents = [ + 'review', + 'architecture-plan', + 'security-review', + 'verification', + 'test-strategy', + 'code-gen', + ]; + for (const intent of dagIntents) { + const resolution = resolvePersonaByIdOrIntent(intent); + const input = buildInput(resolution, `Task for ${intent}`); + const { source } = generateWorkflow(input); + expect(source).toContain(".pattern('dag')"); + } + }); + + it('pipeline intents all use "pipeline" pattern in generated source', () => { + const pipelineIntents = ['requirements-analysis', 'documentation', 'tdd-enforcement']; + for (const intent of pipelineIntents) { + const resolution = resolvePersonaByIdOrIntent(intent); + const input = buildInput(resolution, `Task for ${intent}`); + const { source } = generateWorkflow(input); + expect(source).toContain(".pattern('pipeline')"); + } + }); + }); +}); +``` + +### Snapshot Tests: Reference Workflow Comparison + +```ts +describe('snapshot tests: generated workflow source', () => { + beforeEach(() => { + resetPersonaRegistry(); + initPersonaRegistry(DEFAULT_PERSONA_PROFILES); + }); + + it('minimal DAG workflow matches snapshot', () => { + const resolution = resolvePersonaByIdOrIntent('code-gen'); + const input = buildInput(resolution, 'Implement user service', { + contextFiles: [{ stepName: 'read-schema', command: 'cat src/schema.ts' }], + }); + const { source } = generateWorkflow(input); + expect(source).toMatchSnapshot(); + }); + + it('full security review workflow matches snapshot', () => { + const resolution = resolvePersonaByIdOrIntent('security-review'); + const input = buildInput(resolution, 'Audit auth module', { + contextFiles: [ + { stepName: 'read-auth', command: 'cat src/auth.ts' }, + { stepName: 'read-config', command: 'cat src/config.ts' }, + ], + verifications: [ + { stepName: 'verify-no-eval', command: "! grep -r 'eval(' src/" }, + { stepName: 'verify-no-exec', command: "! grep -r 'exec(' src/" }, + ], + skillPlan: { + installs: [{ skillId: 'semgrep', command: 'npx semgrep --install' }], + }, + outputFile: 'reports/security-audit.md', + }); + const { source } = generateWorkflow(input); + expect(source).toMatchSnapshot(); + }); + + it('pipeline requirements analysis workflow matches snapshot', () => { + const resolution = resolvePersonaByIdOrIntent('requirements-analysis'); + const input = buildInput(resolution, 'Analyze product requirements', { + contextFiles: [ + { stepName: 'read-spec', command: 'cat docs/spec.md' }, + { stepName: 'read-readme', command: 'cat README.md' }, + { stepName: 'read-package', command: 'cat package.json' }, + ], + }); + const { source } = generateWorkflow(input); + expect(source).toMatchSnapshot(); + }); + + it('documentation pipeline workflow matches snapshot', () => { + const resolution = resolvePersonaByIdOrIntent('documentation'); + const input = buildInput(resolution, 'Write API reference docs', { + contextFiles: [ + { stepName: 'read-api', command: 'cat src/api/index.ts' }, + { stepName: 'read-types', command: 'cat src/types.ts' }, + ], + outputFile: 'docs/api-reference.md', + }); + const { source } = generateWorkflow(input); + expect(source).toMatchSnapshot(); + }); + + it('TDD enforcement pipeline workflow matches snapshot', () => { + const resolution = resolvePersonaByIdOrIntent('tdd-enforcement'); + const input = buildInput(resolution, 'Enforce TDD on parser module', { + contextFiles: [ + { stepName: 'read-parser', command: 'cat src/parser.ts' }, + { stepName: 'read-parser-tests', command: 'cat src/parser.test.ts' }, + ], + }); + const { source } = generateWorkflow(input); + expect(source).toMatchSnapshot(); + }); + + it('minimal workflow with no context, skills, or verification matches snapshot', () => { + const resolution = resolvePersonaByIdOrIntent('code-gen'); + const input = buildInput(resolution, 'Scaffold a new module'); + const { source } = generateWorkflow(input); + expect(source).toMatchSnapshot(); + }); +}); +``` + +### Metadata Consistency Tests + +```ts +describe('metadata consistency across patterns', () => { + beforeEach(() => { + resetPersonaRegistry(); + initPersonaRegistry(DEFAULT_PERSONA_PROFILES); + }); + + it('DAG metadata phases sum to stepCount', () => { + const resolution = resolvePersonaByIdOrIntent('security-review'); + const input = buildInput(resolution, 'Review code', { + contextFiles: [{ stepName: 'read-src', command: 'cat src/index.ts' }], + verifications: [{ stepName: 'verify-lint', command: 'npm run lint' }], + skillPlan: { + installs: [{ skillId: 'tool', command: 'npm i tool' }], + }, + }); + const { metadata } = generateWorkflow(input); + const sum = + metadata.phases.bootstrap + + metadata.phases.skills + + metadata.phases.context + + metadata.phases.task + + metadata.phases.verification + + metadata.phases.final; + expect(metadata.stepCount).toBe(sum); + }); + + it('pipeline metadata phases sum to stepCount', () => { + const resolution = resolvePersonaByIdOrIntent('requirements-analysis'); + const input = buildInput(resolution, 'Analyze reqs', { + contextFiles: [{ stepName: 'read-doc', command: 'cat doc.md' }], + }); + const { metadata } = generateWorkflow(input); + const sum = + metadata.phases.bootstrap + + metadata.phases.skills + + metadata.phases.context + + metadata.phases.task + + metadata.phases.verification + + metadata.phases.final; + expect(metadata.stepCount).toBe(sum); + }); + + it('pipeline task phase has 3 steps (analyze, synthesize, validate)', () => { + const resolution = resolvePersonaByIdOrIntent('documentation'); + const input = buildInput(resolution, 'Write docs'); + const { metadata } = generateWorkflow(input); + expect(metadata.phases.task).toBe(3); + }); + + it('DAG task phase has 1 step (execute-task)', () => { + const resolution = resolvePersonaByIdOrIntent('review'); + const input = buildInput(resolution, 'Review code'); + const { metadata } = generateWorkflow(input); + expect(metadata.phases.task).toBe(1); + }); + + it('skills phase count matches installs length', () => { + const resolution = resolvePersonaByIdOrIntent('code-gen'); + const input = buildInput(resolution, 'Build something', { + skillPlan: { + installs: [ + { skillId: 'a', command: 'npm i a' }, + { skillId: 'b', command: 'npm i b' }, + { skillId: 'c', command: 'npm i c' }, + ], + }, + }); + const { metadata } = generateWorkflow(input); + expect(metadata.phases.skills).toBe(3); + }); + + it('context phase count matches contextFiles length', () => { + const resolution = resolvePersonaByIdOrIntent('review'); + const input = buildInput(resolution, 'Review code', { + contextFiles: [ + { stepName: 'read-a', command: 'cat a.ts' }, + { stepName: 'read-b', command: 'cat b.ts' }, + ], + }); + const { metadata } = generateWorkflow(input); + expect(metadata.phases.context).toBe(2); + }); + + it('verification phase count matches verifications length', () => { + const resolution = resolvePersonaByIdOrIntent('code-gen'); + const input = buildInput(resolution, 'Build code', { + verifications: [ + { stepName: 'verify-lint', command: 'npm run lint' }, + { stepName: 'verify-types', command: 'npx tsc --noEmit' }, + ], + }); + const { metadata } = generateWorkflow(input); + expect(metadata.phases.verification).toBe(2); + }); +}); +``` + +### Source Validity Tests + +```ts +describe('generated source validity', () => { + beforeEach(() => { + resetPersonaRegistry(); + initPersonaRegistry(DEFAULT_PERSONA_PROFILES); + }); + + it('generated source has balanced braces', () => { + const resolution = resolvePersonaByIdOrIntent('security-review'); + const input = buildInput(resolution, 'Review auth', { + contextFiles: [{ stepName: 'read-src', command: 'cat src/auth.ts' }], + verifications: [{ stepName: 'verify-lint', command: 'npm run lint' }], + skillPlan: { installs: [{ skillId: 'semgrep', command: 'npx semgrep' }] }, + }); + const { source } = generateWorkflow(input); + + const openBraces = (source.match(/\{/g) || []).length; + const closeBraces = (source.match(/\}/g) || []).length; + expect(openBraces).toBe(closeBraces); + }); + + it('generated source has balanced parentheses', () => { + const resolution = resolvePersonaByIdOrIntent('code-gen'); + const input = buildInput(resolution, 'Implement feature'); + const { source } = generateWorkflow(input); + + const openParens = (source.match(/\(/g) || []).length; + const closeParens = (source.match(/\)/g) || []).length; + expect(openParens).toBe(closeParens); + }); + + it('generated source does not contain undefined or null literals in unexpected places', () => { + const resolution = resolvePersonaByIdOrIntent('review'); + const input = buildInput(resolution, 'Review things'); + const { source } = generateWorkflow(input); + + // Should not have stray undefined/null from unfilled template variables + expect(source).not.toMatch(/: undefined[,\n]/); + expect(source).not.toMatch(/: null[,\n]/); + }); + + it('all step names are valid identifiers (lowercase, hyphens, digits)', () => { + const resolution = resolvePersonaByIdOrIntent('security-review'); + const input = buildInput(resolution, 'Review code', { + contextFiles: [ + { stepName: 'read-auth-module', command: 'cat auth.ts' }, + { stepName: 'read-config-v2', command: 'cat config.ts' }, + ], + verifications: [{ stepName: 'verify-no-eval-calls', command: '! grep eval src/' }], + skillPlan: { + installs: [{ skillId: 'semgrep-v1', command: 'npm i semgrep' }], + }, + }); + const { source } = generateWorkflow(input); + + // Extract step names from .step('...') calls + const stepNames = [...source.matchAll(/\.step\('([^']+)'/g)].map((m) => m[1]); + for (const name of stepNames) { + expect(name).toMatch(/^[a-z0-9][a-z0-9-]*$/); + } + }); +}); +``` + +--- + +## Test Runner Configuration + +All test files use the project's existing vitest configuration. No changes to `vitest.config.ts` are required. Tests are run via: + +```bash +# Run all Phase 4 tests +npx vitest run packages/sdk/src/workflows/__tests__/persona-utils.test.ts +npx vitest run packages/sdk/src/workflows/__tests__/workflow-generator.test.ts +npx vitest run packages/sdk/src/workflows/__tests__/workflow-generator.integration.test.ts + +# Run all three together +npx vitest run packages/sdk/src/workflows/__tests__/ + +# Update snapshots after intentional changes +npx vitest run --update packages/sdk/src/workflows/__tests__/workflow-generator.integration.test.ts +``` + +--- + +## Snapshot Management + +### Initial snapshot creation + +On the first run, vitest automatically creates snapshot files in `__tests__/__snapshots__/`. These serve as the reference workflows. + +### When to update snapshots + +Snapshots must be updated (`--update`) whenever: + +- The generated code format changes (indentation, comments, structure) +- New phases are added to the generator +- The WorkflowBuilder API methods change +- Default values (retries, error strategy) are modified + +### Snapshot review process + +After updating, always review the `.snap` file diff to verify the changes are intentional. Generated workflow snapshots are the single source of truth for "what the generator produces." + +--- + +## Test Coverage Matrix + +### `resolvePersonaByIdOrIntent` — Complete Coverage + +| Input Type | Values Tested | Count | +| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ | +| Registered intents | `review`, `architecture-plan`, `requirements-analysis`, `security-review`, `verification`, `test-strategy`, `documentation`, `tdd-enforcement`, `code-gen` | 9 | +| Unregistered intents | `implement-frontend`, `debugging`, `flake-investigation`, `opencode-workflow-correctness`, `npm-provenance` | 5 | +| Persona IDs | `reviewer-v1`, `reviewer-v2`, `architect-v1`, `requirements-analyst-v1`, `security-reviewer-v1`, `verifier-v1`, `test-strategist-v1`, `docs-writer-v1`, `tdd-coach-v1`, `code-worker-v1` | 10 | +| Unknown refs | `unknown-persona`, `''`, `' '`, `'null'`, `'a'.repeat(1000)` | 5 | +| Case variants | `REVIEWER-V1`, `Security-Review`, `' review '` | 3 | +| Profile hints | Match, no-match, ignore-on-ID-lookup | 3 | +| **Total** | | **35** | + +### `resolvePersonaSelection` — Convenience Wrapper + +| Input Type | Values Tested | Count | +| ------------------------ | ----------------------------------- | ----- | +| Basic ref delegation | Intent ref | 1 | +| Profile hint passthrough | reviewer-v2 hint | 1 | +| Unknown ref derivation | Custom unregistered ref | 1 | +| Context fields | workflowType + taskType passthrough | 1 | +| **Total** | | **4** | + +### `derivePreset` — Complete Coverage + +| Input Type | Values Tested | Count | +| --------------- | ----------------------------------------------------------------------------------------------------------------------- | ------ | +| Analyst intents | All 6 from `ANALYST_INTENTS` | 6 | +| Worker intents | All 7 remaining production intents | 7 | +| Case variants | `REVIEW`, `Security-Review`, `Architecture-Plan`, `REQUIREMENTS-ANALYSIS`, `DEBUGGING`, `Documentation`, `' review '` | 7 | +| Unknown/edge | `unknown-intent`, `''`, `code-gen`, `refactor`, `deploy`, `review-extended`, `security` | 7 | +| **Total** | | **27** | + +### `derivePattern` — Complete Coverage + +| Input Type | Values Tested | Count | +| ---------------- | -------------------------------------------------------------------------------------------- | ------ | +| Pipeline intents | All 3 from `PIPELINE_INTENTS` | 3 | +| DAG intents | All 10 remaining production intents | 10 | +| Case variants | `REQUIREMENTS-ANALYSIS`, `Documentation`, `TDD-Enforcement`, `' documentation '`, `REVIEW` | 5 | +| Unknown/edge | `unknown-intent`, `''`, `code-gen`, `documentation-extended` | 4 | +| **Total** | | **22** | + +### `slugify` — Helper Coverage + +| Input Type | Values Tested | Count | +| ---------------- | ------------------------------------------------------------------- | ----- | +| Basic conversion | Spaces, case, special chars, collapse, trim, truncate, empty, mixed | 8 | +| **Total** | | **8** | + +### `escapeTemplateString` — Helper Coverage + +| Input Type | Values Tested | Count | +| ---------- | ---------------------------------------------------------------------- | ----- | +| Escaping | Backticks, dollar braces, both, plain, empty, consecutive, pre-escaped | 7 | +| **Total** | | **7** | + +### `generateWorkflow` — Complete Coverage + +| Category | Test Cases | Count | +| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Structure validation | Import, main function, .run(), .catch, shape, name | 6 | +| Phase inclusion | Skill, context, task, verification steps present | 4 | +| Configuration | Workflow name, pattern, channel, concurrency, timeout, output | 6 | +| Bootstrap phase | Import, header on/off, agent naming, preset, role desc, role fallback, indent | 8 | +| Skill phase | Empty, single, multiple, deterministic, failOnError | 5 | +| Context phase | Empty, single, captureOutput, skill deps, no deps, independence | 6 | +| Task phase | DAG single step, pipeline sequential, context deps, interpolation, description, retries, verification, pipeline first step deps | 8 | +| Verification phase | Empty, single step, DAG deps, pipeline deps, multiple parallel | 5 | +| Final phase | onError, run(), catch, process.exit | 4 | +| Metadata | Step sum, agent count, waves full/minimal, pipeline waves, hasSkills, hasVerification, pattern, preset | 9 | +| Edge cases | Backticks, dollar braces, long description, empty description, special chars, newlines, single quotes | 7 | +| Integration round-trips | 9 registered + 5 unregistered + 10 persona IDs + 2 cross-pattern | 26 | +| Snapshots | 6 reference workflows | 6 | +| Metadata consistency | Phase sums (DAG/pipeline), task phase counts, skills/context/verification counts | 7 | +| Source validity | Balanced braces, balanced parens, no stray undefined/null, valid step names | 4 | +| **Total** | | **117** | + +### Grand Total by File + +| File | Test Count | +| ---------------------------------------- | ---------- | +| `persona-utils.test.ts` | 45 | +| `workflow-generator.test.ts` | 63 | +| `workflow-generator.integration.test.ts` | 53 | +| **Grand Total** | **~161** | + +> Note: counts include individual parameterized test cases from `it.each`. + +--- + +## Acceptance Criteria + +- [ ] All 35 `resolvePersonaByIdOrIntent` test cases pass, covering all 13 intents, all 10 persona IDs, fallback derivation, profile hints, and case handling +- [ ] All 4 `resolvePersonaSelection` wrapper tests pass, verifying delegation and profile passthrough +- [ ] All 27 `derivePreset` test cases pass, covering all 6 analyst intents, all 7 worker intents, case variants, and edge cases +- [ ] All 22 `derivePattern` test cases pass, covering all 3 pipeline intents, all 10 DAG intents, case variants, and edge cases +- [ ] All 8 `slugify` tests pass (spaces, case, special chars, truncation, edge cases) +- [ ] All 7 `escapeTemplateString` tests pass (backticks, dollar braces, combined, edge cases) +- [ ] All `generateWorkflow` unit tests pass for each emit function and the orchestrator +- [ ] All 6 snapshot tests generate stable, reproducible workflow source +- [ ] All 26 integration round-trip tests (resolve → generate) produce valid workflow structure +- [ ] All 7 metadata consistency tests pass (phase sums, task phase counts, phase-specific counts) +- [ ] All 4 source validity tests pass (balanced braces/parens, no stray values, valid step names) +- [ ] Pipeline task phase has exactly 3 steps; DAG task phase has exactly 1 step +- [ ] No new external dependencies introduced +- [ ] All tests run via `npx vitest run` with zero configuration changes +- [ ] Snapshot files are committed and reviewed as part of the PR diff --git a/build-plans/04-tests.ts b/build-plans/04-tests.ts new file mode 100644 index 000000000..5e09b09c0 --- /dev/null +++ b/build-plans/04-tests.ts @@ -0,0 +1,410 @@ +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { workflow } = require('@agent-relay/sdk/workflows'); + +const REPO_ROOT = '/Users/khaliqgant/Projects/AgentWorkforce/relay-workflows'; +const SPEC_PATH = 'workflows/meta-agent-flag/04-tests.spec.md'; +const SDK_WORKFLOWS_DIR = 'packages/sdk/src/workflows'; +const PERSONA_UTILS_PATH = `${SDK_WORKFLOWS_DIR}/persona-utils.ts`; +const WORKFLOW_GENERATOR_PATH = `${SDK_WORKFLOWS_DIR}/workflow-generator.ts`; +const CLI_PATH = `${SDK_WORKFLOWS_DIR}/cli.ts`; +const CONTEXT_HEURISTICS_PATH = `${SDK_WORKFLOWS_DIR}/context-heuristics.ts`; +const PERSONA_TEST_PATH = `${SDK_WORKFLOWS_DIR}/__tests__/persona-utils.test.ts`; +const WORKFLOW_GENERATOR_TEST_PATH = `${SDK_WORKFLOWS_DIR}/__tests__/workflow-generator.test.ts`; +const INTEGRATION_TEST_PATH = `${SDK_WORKFLOWS_DIR}/__tests__/workflow-generator.integration.test.ts`; +const SNAPSHOT_DIR = `${SDK_WORKFLOWS_DIR}/__tests__/__snapshots__`; + +async function main() { + const wf = workflow('phase-4-meta-agent-flag-tests') + .description( + 'Create comprehensive unit and integration tests for Phase 1-3 --agent workflow SDK functionality' + ) + .pattern('dag') + .channel('wf-phase-4-meta-agent-flag-tests') + .maxConcurrency(4) + .timeout(3_600_000) + .agent('persona-test-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused vitest test author for persona resolution utilities', + retries: 2, + }) + .agent('generator-test-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused vitest test author for workflow generator units and emitted source behavior', + retries: 2, + }) + .agent('integration-test-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused vitest integration test author for persona resolution to workflow generation', + retries: 2, + }) + .agent('sdk-reviewer', { + cli: 'codex', + preset: 'reviewer', + role: 'Reviews Phase 4 tests for spec coverage, determinism, and regression value', + retries: 1, + }); + + wf.step('guard-not-main', { + type: 'deterministic', + command: [ + 'branch="$(git branch --show-current)"', + 'if [ "$branch" = "main" ]; then echo "Refusing to run Phase 4 workflow on main"; exit 1; fi', + 'echo "Running on branch: ${branch:-detached}"', + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-spec', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${SPEC_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-phase-1-api', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [`test -f ${PERSONA_UTILS_PATH}`, `sed -n '1,360p' ${PERSONA_UTILS_PATH}`].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-phase-2-api', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [`test -f ${WORKFLOW_GENERATOR_PATH}`, `sed -n '1,520p' ${WORKFLOW_GENERATOR_PATH}`].join( + ' && ' + ), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-phase-3-api', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `test -f ${CLI_PATH}`, + `test -f ${CONTEXT_HEURISTICS_PATH}`, + `grep -n "export .*parseAgentFlags\\|export .*buildWorkflowInput\\|function parseAgentFlags\\|function buildWorkflowInput" ${CLI_PATH} || true`, + `grep -n "export .*inferContextFiles\\|function inferContextFiles" ${CONTEXT_HEURISTICS_PATH} || true`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-existing-tests', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `if test -f ${PERSONA_TEST_PATH}; then sed -n '1,260p' ${PERSONA_TEST_PATH}; else echo "No existing ${PERSONA_TEST_PATH}"; fi`, + `if test -f ${WORKFLOW_GENERATOR_TEST_PATH}; then sed -n '1,320p' ${WORKFLOW_GENERATOR_TEST_PATH}; else echo "No existing ${WORKFLOW_GENERATOR_TEST_PATH}"; fi`, + `if test -f ${INTEGRATION_TEST_PATH}; then sed -n '1,260p' ${INTEGRATION_TEST_PATH}; else echo "No existing ${INTEGRATION_TEST_PATH}"; fi`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-test-patterns', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `sed -n '1,200p' ${SDK_WORKFLOWS_DIR}/__tests__/template-resolver.test.ts`, + `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/__tests__/verification.test.ts`, + `sed -n '1,180p' ${SDK_WORKFLOWS_DIR}/__tests__/cli-session-collector.test.ts`, + `find ${SDK_WORKFLOWS_DIR}/__tests__ -maxdepth 2 -type f -name "*.snap" -print | sed -n '1,80p'`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('write-persona-utils-tests', { + agent: 'persona-test-writer', + dependsOn: ['read-spec', 'read-phase-1-api', 'read-existing-tests', 'read-test-patterns'], + task: ` +You are writing Phase 4 persona utility tests. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Own only ${PERSONA_TEST_PATH}; do not revert or rewrite files outside that scope. + +Spec: +{{steps.read-spec.output}} + +Current persona utility API: +{{steps.read-phase-1-api.output}} + +Existing tests and style examples: +{{steps.read-existing-tests.output}} +{{steps.read-test-patterns.output}} + +Requirements: +1. Create or replace ${PERSONA_TEST_PATH} with the complete spec coverage for derivePreset(), derivePattern(), isAnalystIntent(), isPipelineIntent(), resolvePersonaByIdOrIntent(), DEFAULT_PERSONA_PROFILES, registry management, and resolvePersonaSelection(). +2. Import exactly from '../persona-utils.js' with vitest describe/it/expect/beforeEach, and use type-only imports where appropriate. +3. Cover all production intents, all 10 default persona IDs, unregistered derived intents, profile hints, case/whitespace handling, edge inputs, and registry cache behavior. +4. Reset and initialize DEFAULT_PERSONA_PROFILES in each resolution-focused beforeEach so tests are order-independent. +5. Keep tests deterministic and avoid filesystem, network, snapshots, or external dependencies in this file. + +Only edit ${PERSONA_TEST_PATH}. End your output with PERSONA_UTILS_PHASE4_TESTS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'PERSONA_UTILS_PHASE4_TESTS_DONE' }, + retries: 2, + }); + + wf.step('verify-persona-utils-tests-file', { + type: 'deterministic', + dependsOn: ['write-persona-utils-tests'], + command: [ + `test -f ${PERSONA_TEST_PATH}`, + `grep -q "derivePreset" ${PERSONA_TEST_PATH}`, + `grep -q "derivePattern" ${PERSONA_TEST_PATH}`, + `grep -q "resolvePersonaByIdOrIntent" ${PERSONA_TEST_PATH}`, + `grep -q "resolvePersonaSelection" ${PERSONA_TEST_PATH}`, + `grep -q "DEFAULT_PERSONA_PROFILES" ${PERSONA_TEST_PATH}`, + `grep -q "ANALYST_INTENTS" ${PERSONA_TEST_PATH}`, + `grep -q "PIPELINE_INTENTS" ${PERSONA_TEST_PATH}`, + `grep -q "reviewer-v2" ${PERSONA_TEST_PATH}`, + `grep -q "opencode-workflow-correctness" ${PERSONA_TEST_PATH}`, + `grep -q "npm-provenance" ${PERSONA_TEST_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('write-workflow-generator-tests', { + agent: 'generator-test-writer', + dependsOn: [ + 'read-spec', + 'read-phase-1-api', + 'read-phase-2-api', + 'read-existing-tests', + 'read-test-patterns', + ], + task: ` +You are writing Phase 4 workflow generator unit tests. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Own only ${WORKFLOW_GENERATOR_TEST_PATH}; do not revert or rewrite files outside that scope. + +Spec: +{{steps.read-spec.output}} + +Current generator and persona APIs: +{{steps.read-phase-2-api.output}} +{{steps.read-phase-1-api.output}} + +Existing tests and style examples: +{{steps.read-existing-tests.output}} +{{steps.read-test-patterns.output}} + +Requirements: +1. Create or replace ${WORKFLOW_GENERATOR_TEST_PATH} with complete unit tests for slugify(), escapeTemplateString(), generateWorkflow(), all six emit phase functions, WorkflowMetadata, and edge cases from the spec. +2. Define createMinimalInput(), createFullInput(), and createPipelineInput() fixtures matching the spec and current exported types. +3. Assert emitted TypeScript structure without executing generated workflows. +4. Cover DAG and pipeline task shapes, skill/context/verification dependencies, output logging, escaping, metadata phase counts, estimated waves, and special/long/empty strings. +5. Keep tests deterministic, import from '../workflow-generator.js' and '../persona-utils.js', and avoid filesystem, network, snapshots, or external dependencies in this unit file. + +Only edit ${WORKFLOW_GENERATOR_TEST_PATH}. End your output with WORKFLOW_GENERATOR_PHASE4_TESTS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'WORKFLOW_GENERATOR_PHASE4_TESTS_DONE' }, + retries: 2, + }); + + wf.step('verify-workflow-generator-tests-file', { + type: 'deterministic', + dependsOn: ['write-workflow-generator-tests'], + command: [ + `test -f ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "generateWorkflow" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitBootstrapPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitSkillPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitContextPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitTaskPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitVerificationPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "emitFinalPhase" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "slugify" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "escapeTemplateString" ${WORKFLOW_GENERATOR_TEST_PATH}`, + `grep -q "createPipelineInput" ${WORKFLOW_GENERATOR_TEST_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('write-integration-tests', { + agent: 'integration-test-writer', + dependsOn: [ + 'read-spec', + 'read-phase-1-api', + 'read-phase-2-api', + 'read-phase-3-api', + 'read-existing-tests', + 'read-test-patterns', + ], + task: ` +You are writing Phase 4 workflow generator integration tests. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Own only ${INTEGRATION_TEST_PATH} and generated Vitest snapshots under ${SNAPSHOT_DIR}; do not revert or rewrite files outside that scope. + +Spec: +{{steps.read-spec.output}} + +Current Phase 1-3 APIs: +{{steps.read-phase-1-api.output}} +{{steps.read-phase-2-api.output}} +{{steps.read-phase-3-api.output}} + +Existing tests and snapshot style examples: +{{steps.read-existing-tests.output}} +{{steps.read-test-patterns.output}} + +Requirements: +1. Create ${INTEGRATION_TEST_PATH} with the full persona resolution -> workflow generation integration coverage from the spec. +2. Include buildInput() helper, beforeEach registry reset/init, registered intent round-trips, unregistered derived intent round-trips, all 10 persona ID round-trips, and cross-pattern consistency. +3. Add the six generated workflow source snapshot tests from the spec using toMatchSnapshot(). +4. Add metadata consistency tests and source validity tests for balanced braces/parentheses, no stray undefined/null values, and valid step names. +5. Keep tests deterministic; generated workflows are inspected as strings and must not be executed. + +Only edit ${INTEGRATION_TEST_PATH} and Vitest snapshot files generated for it. End your output with WORKFLOW_GENERATOR_INTEGRATION_TESTS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'WORKFLOW_GENERATOR_INTEGRATION_TESTS_DONE' }, + retries: 2, + }); + + wf.step('verify-integration-tests-file', { + type: 'deterministic', + dependsOn: ['write-integration-tests'], + command: [ + `test -f ${INTEGRATION_TEST_PATH}`, + `grep -q "resolvePersonaByIdOrIntent" ${INTEGRATION_TEST_PATH}`, + `grep -q "generateWorkflow" ${INTEGRATION_TEST_PATH}`, + `grep -q "buildInput" ${INTEGRATION_TEST_PATH}`, + `grep -q "toMatchSnapshot" ${INTEGRATION_TEST_PATH}`, + `grep -q "requirements-analysis" ${INTEGRATION_TEST_PATH}`, + `grep -q "tdd-enforcement" ${INTEGRATION_TEST_PATH}`, + `grep -q "opencode-workflow-correctness" ${INTEGRATION_TEST_PATH}`, + `grep -q "metadata consistency" ${INTEGRATION_TEST_PATH}`, + `grep -q "generated source validity" ${INTEGRATION_TEST_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('run-persona-utils-tests', { + type: 'deterministic', + dependsOn: ['verify-persona-utils-tests-file'], + command: `npx vitest run ${PERSONA_TEST_PATH} --reporter=verbose`, + captureOutput: true, + failOnError: true, + }); + + wf.step('run-workflow-generator-tests', { + type: 'deterministic', + dependsOn: ['verify-workflow-generator-tests-file'], + command: `npx vitest run ${WORKFLOW_GENERATOR_TEST_PATH} --reporter=verbose`, + captureOutput: true, + failOnError: true, + }); + + wf.step('run-integration-tests-update-snapshots', { + type: 'deterministic', + dependsOn: ['verify-integration-tests-file'], + command: `npx vitest run --update ${INTEGRATION_TEST_PATH} --reporter=verbose`, + captureOutput: true, + failOnError: true, + }); + + wf.step('verify-snapshots', { + type: 'deterministic', + dependsOn: ['run-integration-tests-update-snapshots'], + command: [ + `test -d ${SNAPSHOT_DIR}`, + `find ${SNAPSHOT_DIR} -type f -name "workflow-generator.integration.test.ts.snap" -print -quit | grep -q .`, + `grep -R "minimal DAG workflow" ${SNAPSHOT_DIR}/workflow-generator.integration.test.ts.snap`, + `grep -R "full security review workflow" ${SNAPSHOT_DIR}/workflow-generator.integration.test.ts.snap`, + `grep -R "pipeline requirements analysis workflow" ${SNAPSHOT_DIR}/workflow-generator.integration.test.ts.snap`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('run-phase-4-tests', { + type: 'deterministic', + dependsOn: ['run-persona-utils-tests', 'run-workflow-generator-tests', 'verify-snapshots'], + command: [ + `npx vitest run ${PERSONA_TEST_PATH} ${WORKFLOW_GENERATOR_TEST_PATH} ${INTEGRATION_TEST_PATH} --reporter=verbose`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('typecheck-sdk', { + type: 'deterministic', + dependsOn: ['run-phase-4-tests'], + command: 'cd packages/sdk && npx tsc -p tsconfig.json --noEmit', + captureOutput: true, + failOnError: true, + }); + + wf.step('review-phase-4-tests', { + agent: 'sdk-reviewer', + dependsOn: ['read-spec', 'run-phase-4-tests', 'typecheck-sdk'], + task: ` +Review the Phase 4 test implementation. Do not edit files. Do not use Relaycast MCP tools or spawn sub-agents. + +Spec: +{{steps.read-spec.output}} + +Verify: +1. ${PERSONA_TEST_PATH} exhaustively covers persona derivation, resolution, registry behavior, defaults, wrapper behavior, and edge cases from the spec. +2. ${WORKFLOW_GENERATOR_TEST_PATH} covers slugify(), escapeTemplateString(), generateWorkflow(), all six emitters, metadata, DAG/pipeline branches, dependencies, output logging, and escaping edge cases. +3. ${INTEGRATION_TEST_PATH} covers resolution -> generation round-trips, all registered and derived intents, all 10 persona IDs, cross-pattern consistency, six snapshots, metadata consistency, and source validity. +4. Snapshot files were generated and reviewed as stable reference workflow output. +5. Tests are deterministic, do not execute generated workflows, introduce no dependencies, and pass along with SDK typecheck: +{{steps.run-phase-4-tests.output}} +{{steps.typecheck-sdk.output}} + +Output REVIEW_PASS if acceptable; otherwise output REVIEW_FAIL with concrete blockers. +`.trim(), + verification: { type: 'output_contains', value: 'REVIEW_PASS' }, + retries: 1, + }); + + wf.step('summarize-artifacts', { + type: 'deterministic', + dependsOn: ['review-phase-4-tests'], + command: [ + `echo "Phase 4 tests workflow completed."`, + `echo "Artifacts:"`, + `echo "- ${PERSONA_TEST_PATH}"`, + `echo "- ${WORKFLOW_GENERATOR_TEST_PATH}"`, + `echo "- ${INTEGRATION_TEST_PATH}"`, + `echo "- ${SNAPSHOT_DIR}/workflow-generator.integration.test.ts.snap"`, + `git diff -- ${PERSONA_TEST_PATH} ${WORKFLOW_GENERATOR_TEST_PATH} ${INTEGRATION_TEST_PATH} ${SNAPSHOT_DIR} | sed -n '1,420p'`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + const result = await wf.onError('retry', { maxRetries: 2, retryDelayMs: 10_000 }).run({ + cwd: REPO_ROOT, + }); + + if ('status' in result) { + console.log(`Result: ${result.status}`); + } else { + console.log('Dry run completed.'); + return; + } + + if (result.status !== 'completed') { + process.exitCode = 1; + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/build-plans/05-documentation.spec.md b/build-plans/05-documentation.spec.md new file mode 100644 index 000000000..ff14401e6 --- /dev/null +++ b/build-plans/05-documentation.spec.md @@ -0,0 +1,706 @@ +# Phase 5 Specification: Documentation for the `--agent` Flag Feature + +> Document the `--agent` flag feature across README, CLI help text, and SDK API reference so that users and consumers can discover and use persona-driven workflow generation. + +**Phase:** 5 of 5 +**Dependencies:** Phase 4 (tests — all SDK functions must be tested and stable before documenting) +**Target files:** + +- `README.md` (modify — add `--agent` flag section) +- `packages/sdk/src/workflows/cli.ts` (modify — update `printUsage()` help text) +- `docs/reference-sdk.md` (modify — add persona-utils and workflow-generator API docs) +- `web/content/docs/reference-sdk.mdx` (modify — MDX mirror of SDK API docs) +- `docs/agent-flag.md` (new — dedicated guide for the `--agent` flag) +- `web/content/docs/agent-flag.mdx` (new — MDX mirror of the agent flag guide) + +--- + +## Goal + +Provide complete, discoverable documentation for the `--agent` flag feature introduced in Phases 1–4. Documentation must cover three audiences: + +1. **CLI users** — how to use `--agent` from the command line with zero prior setup +2. **SDK consumers** — how to import and use persona resolution and workflow generation programmatically +3. **Contributors** — how the persona registry, derivation rules, and workflow generator internals work + +All documentation follows the docs-sync rule: `.mdx` files in `web/content/docs/` are mirrored as `.md` files in `docs/` with MDX components converted to plain markdown. + +--- + +## File 1: `README.md` — Add `--agent` Flag Section + +### Location + +Add a new section after the existing "Quick Start" or "Usage" section. The section should be titled **"Agent Mode"** and provide a concise overview with three key examples. + +### Content to Add + +````markdown +## Agent Mode + +The `--agent` flag enables persona-driven workflow generation. Instead of writing a YAML workflow file, describe your task in plain text and specify an agent persona — the CLI resolves the persona, generates a typed workflow, and executes it in one command. + +### Basic Usage + +```bash +agent-relay run "Review the auth module for security vulnerabilities" --agent security-review +``` +```` + +This resolves the `security-review` persona, generates a DAG workflow with context-gathering and verification steps, and executes it immediately. + +### With Explicit Context Files + +```bash +agent-relay run "Refactor the payment service" --agent code-gen \ + --context src/services/payment.ts \ + --context src/services/payment.test.ts +``` + +Use `--context` (repeatable) to specify which files the agent should read. When omitted, the CLI infers context files based on the persona's intent. + +### Dry Run + +```bash +agent-relay run "Write API documentation" --agent documentation --dry-run +``` + +The `--dry-run` flag prints the generated workflow source and metadata without executing. Use this to inspect what would run before committing to execution. + +### All Agent Mode Flags + +| Flag | Short | Type | Default | Description | +| ------------------- | ----- | ---------- | ----------- | ---------------------------------------------------------- | +| `--agent ` | `-a` | `string` | — | Persona ID or intent string (required for agent mode) | +| `--profile ` | `-p` | `string` | — | Disambiguation hint when multiple personas share an intent | +| `--tier ` | `-t` | `string` | `standard` | Execution tier: `standard` or `premium` | +| `--dry-run` | `-d` | `boolean` | `false` | Print generated workflow without executing | +| `--context ` | `-c` | `string[]` | (heuristic) | Context file to read (repeatable) | +| `--verify ` | `-v` | `string[]` | `[]` | Verification command, must exit 0 (repeatable) | +| `--output ` | `-o` | `string` | — | Write generated workflow source to file | +| `--concurrency ` | — | `number` | `4` | Max concurrent steps | +| `--timeout ` | — | `number` | `3600000` | Workflow timeout in milliseconds | + +### Available Personas + +| Intent | Preset | Pattern | Description | +| ----------------------- | ------- | -------- | ----------------------------------- | +| `review` | analyst | dag | Code review and quality analysis | +| `security-review` | analyst | dag | Security vulnerability analysis | +| `architecture-plan` | analyst | dag | Architecture planning and design | +| `requirements-analysis` | analyst | pipeline | Requirements gathering and analysis | +| `verification` | analyst | dag | Evidence-based verification | +| `test-strategy` | analyst | dag | Test strategy planning | +| `documentation` | worker | pipeline | Documentation generation | +| `tdd-enforcement` | worker | pipeline | TDD coaching and enforcement | +| `debugging` | worker | dag | Bug investigation and fixing | +| `code-gen` | worker | dag | General code generation | +| `implement-frontend` | worker | dag | Frontend implementation | +| `flake-investigation` | worker | dag | Flaky test investigation | +| `npm-provenance` | worker | dag | NPM provenance configuration | + +```` + +--- + +## File 2: `packages/sdk/src/workflows/cli.ts` — Update `printUsage()` + +### Current State + +The `printUsage()` function in `cli.ts` needs to reflect the agent mode flags as specified in Phase 3. This may have been partially implemented during Phase 3; this phase ensures the help text is complete, accurate, and matches the final API. + +### Updated `printUsage()` Content + +```ts +function printUsage(): void { + console.log( + ` +Usage: relay-workflow [options] + relay-workflow "" --agent [agent-options] + relay-workflow --resume + +Run a relay.yaml workflow file, or generate and run a workflow from a persona. + +Arguments: + Path to the relay.yaml workflow file + Task description (in agent mode) + +YAML Mode Options: + --workflow Run a specific workflow by name (default: first) + --resume Resume a failed or interrupted run by its run ID + --start-from Start from a specific step, skipping predecessors + --previous-run-id Use cached outputs from a specific prior run + --validate Validate workflow YAML without running + +Agent Mode Options: + --agent, -a Persona ID or intent (e.g., 'security-review', 'reviewer-v1') + --profile, -p Disambiguation hint for shared intents + --tier, -t Execution tier: 'standard' (default) or 'premium' + --dry-run, -d Print generated workflow without executing + --context, -c Context file to read (repeatable) + --verify, -v Verification command (repeatable, must exit 0) + --output, -o Write generated workflow source to file + --concurrency Max concurrent steps (default: 4) + --timeout Workflow timeout in ms (default: 3600000) + +General: + --help Show this help message + +Examples: + # YAML mode + relay-workflow workflows/daytona-migration.yaml + relay-workflow workflows/feature-dev.yaml --workflow build-and-test + + # Agent mode — basic + relay-workflow "Review auth for vulnerabilities" --agent security-review + + # Agent mode — with context files + relay-workflow "Fix flaky test in CI" --agent debugging --context tests/flaky.test.ts + + # Agent mode — dry run + relay-workflow "Write API docs" --agent documentation --dry-run + + # Agent mode — with output and verification + relay-workflow "Refactor auth module" -a code-gen -c src/auth.ts -o workflow.ts -v "npm test" +`.trim() + ); +} +```` + +### Validation + +- Help text must display when `--help` is passed or when no arguments are provided +- All flag names, short aliases, and defaults must match the `parseAgentFlags()` implementation from Phase 3 +- Examples must be runnable commands (valid flag combinations) + +--- + +## File 3: `docs/agent-flag.md` — Dedicated Agent Flag Guide + +### Purpose + +A standalone guide for the `--agent` flag feature, providing comprehensive usage documentation with examples, persona reference, and troubleshooting. + +### Content Structure + +````markdown +# Agent Mode (`--agent` Flag) + +The `--agent` flag transforms `relay-workflow` from a YAML executor into a persona-driven workflow generator. Describe your task, pick a persona, and the CLI handles the rest. + +## How It Works + +1. **Parse** — the CLI extracts the task description and `--agent` reference +2. **Resolve** — the persona is resolved via `resolvePersonaByIdOrIntent()`, mapping the reference to an intent, preset (`worker` or `analyst`), and swarm pattern (`dag` or `pipeline`) +3. **Infer** — if no `--context` files are provided, the CLI infers context files from intent heuristics +4. **Generate** — `generateWorkflow()` produces a complete TypeScript workflow using the `WorkflowBuilder` API +5. **Execute** — the generated workflow runs immediately (unless `--dry-run` is set) + +## Usage Examples + +### 1. Basic — Single Persona + +```bash +agent-relay run "Review the auth module for security vulnerabilities" --agent security-review +``` +```` + +Resolves `security-review` to: + +- **Preset:** `analyst` (read-only analysis, no code modification) +- **Pattern:** `dag` (parallel context reads, convergent analysis) +- **Agent:** `security-reviewer-v1` from the default persona registry + +### 2. With Explicit Context Files + +```bash +agent-relay run "Refactor the payment service" --agent code-gen \ + --context src/services/payment.ts \ + --context src/services/payment.test.ts \ + --context src/types/payment.d.ts +``` + +The `--context` flag is repeatable. Each path becomes a deterministic step that captures the file content for the agent's task prompt via `{{steps.X.output}}` interpolation. + +When `--context` is provided, intent-based heuristics are skipped entirely. + +### 3. Dry Run — Inspect Before Executing + +```bash +agent-relay run "Write API documentation for the SDK" --agent documentation --dry-run +``` + +Output includes: + +- Generated TypeScript workflow source +- Workflow metadata (step count, estimated waves, pattern, preset) +- Resolved persona details +- Inferred context files (if any) + +### 4. With Verification Commands + +```bash +agent-relay run "Fix the broken login flow" --agent debugging \ + --context src/auth/login.ts \ + --verify "npm test -- --grep 'login'" \ + --verify "npx tsc --noEmit" +``` + +Verification commands run after the agent completes its task. Each must exit 0 for the workflow to succeed. Failed verifications cause the workflow to fail with a clear error. + +### 5. Save Generated Workflow to Disk + +```bash +agent-relay run "Implement pagination for the users API" --agent code-gen \ + --output workflows/generated/users-pagination.ts +``` + +The `--output` flag writes the generated TypeScript workflow to disk before executing. This is useful for: + +- Reviewing and customizing the generated workflow +- Re-running the same workflow later without regeneration +- Version-controlling generated workflows + +### 6. Profile Disambiguation + +```bash +agent-relay run "Review the database migration" --agent review --profile reviewer-v2 +``` + +When multiple personas share the same intent (e.g., `reviewer-v1` and `reviewer-v2` both serve `review`), use `--profile` to select a specific persona by ID. + +### 7. Premium Tier Execution + +```bash +agent-relay run "Architect a new microservice" --agent architecture-plan --tier premium +``` + +The `--tier` flag controls model selection. `premium` uses higher-capability models at increased cost. + +### 8. Custom Concurrency and Timeout + +```bash +agent-relay run "Run security audit" --agent security-review \ + --concurrency 8 \ + --timeout 7200000 +``` + +- `--concurrency` controls the max number of parallel steps (1–32, default: 4) +- `--timeout` sets the workflow timeout in milliseconds (minimum: 1000, default: 3600000 = 1 hour) + +## Persona Reference + +### Preset Types + +| Preset | Behavior | Use When | +| --------- | -------------------------------------------------------- | -------------------------------------------------------- | +| `worker` | Can modify files, create code, write documentation | The task produces artifacts or changes code | +| `analyst` | Read-only analysis, produces reports and recommendations | The task is investigation or review with no code changes | + +### Pattern Types + +| Pattern | Behavior | Use When | +| ---------- | --------------------------------------------- | ------------------------------------------------------------------ | +| `dag` | Parallel context reads + convergent execution | Context steps are independent and can run simultaneously | +| `pipeline` | Sequential step-by-step processing | Task has inherent ordering (e.g., analyze → synthesize → validate) | + +### Default Persona Registry + +| ID | Name | Intent | Preset | Pattern | +| ------------------------- | ----------------------- | ----------------------- | ------- | -------- | +| `reviewer-v1` | Code Reviewer | `review` | analyst | dag | +| `reviewer-v2` | Senior Reviewer | `review` | analyst | dag | +| `architect-v1` | Architecture Planner | `architecture-plan` | analyst | dag | +| `requirements-analyst-v1` | Requirements Analyst | `requirements-analysis` | analyst | pipeline | +| `security-reviewer-v1` | Security Reviewer | `security-review` | analyst | dag | +| `verifier-v1` | Verification Specialist | `verification` | analyst | dag | +| `test-strategist-v1` | Test Strategist | `test-strategy` | analyst | dag | +| `docs-writer-v1` | Documentation Writer | `documentation` | worker | pipeline | +| `tdd-coach-v1` | TDD Coach | `tdd-enforcement` | worker | pipeline | +| `code-worker-v1` | Code Worker | `code-gen` | worker | dag | + +### Custom Personas + +The persona registry is extensible. To add custom personas programmatically: + +```ts +import { personaRegistry } from '@agent-relay/sdk/workflows'; + +personaRegistry.register({ + id: 'my-custom-reviewer-v1', + name: 'Custom Reviewer', + intent: 'review', + preset: 'analyst', + pattern: 'dag', + description: 'Reviews code with custom org standards', +}); +``` + +## Context Heuristics + +When `--context` is omitted, the CLI infers context files based on the resolved intent: + +| Intent | Auto-detected Context | +| ------------------- | --------------------------------------------- | +| `review` | Changed files (`git diff`), `tsconfig.json` | +| `security-review` | Changed files, `package.json`, `.env.example` | +| `architecture-plan` | `tsconfig.json`, `package.json`, entry points | +| `debugging` | Test output (`npm test`), failing test files | +| `documentation` | `README.md`, entry points, existing docs | +| `verification` | CI workflows, `package.json`, `tsconfig.json` | +| `npm-provenance` | Publish workflow, `package.json`, `.npmrc` | + +Context files are capped at 10 per workflow to avoid overwhelming the agent. + +## Troubleshooting + +### "Unknown persona" warning + +If `--agent` receives a ref that doesn't match any registered persona ID or intent, the CLI falls back to derivation: + +- Unknown refs default to `preset: 'worker'` and `pattern: 'dag'` +- The workflow still generates and runs — just without persona-specific configuration +- Use `--dry-run` to verify resolution before executing + +### No context files inferred + +If intent heuristics find no matching files on disk, the workflow runs with an empty context phase. Use `--context` to explicitly provide files. + +### Mutual exclusivity + +Agent mode flags (`--agent`, `--context`, `--verify`, `--dry-run`, `--profile`, `--tier`, `--output`) are mutually exclusive with YAML mode flags (`--resume`, `--workflow`, `--start-from`, `--previous-run-id`, `--validate`). Mixing them produces an error. + +```` + +--- + +## File 4: `web/content/docs/agent-flag.mdx` — MDX Mirror + +The MDX version includes the same content as `docs/agent-flag.md` with: +- YAML frontmatter (`title`, `description`) +- `` wrapping for multi-example code blocks +- `` and `` components where appropriate + +### Frontmatter + +```yaml +--- +title: "Agent Mode (--agent Flag)" +description: "Generate and execute workflows from persona-driven task descriptions using the --agent CLI flag." +--- +```` + +### MDX-specific conversions + +| Plain markdown (`.md`) | MDX (`.mdx`) | +| ---------------------- | ------------------------ | +| `> **Note:**` | `` | +| `> **Warning:**` | `` | +| Adjacent code blocks | Wrap in `` | +| No frontmatter | Include YAML frontmatter | + +--- + +## File 5: `docs/reference-sdk.md` — SDK API Reference Updates + +### Location + +Add a new section to the existing SDK reference for the persona-utils and workflow-generator exports. + +### Content to Add + +````markdown +## Persona Resolution API + +### `resolvePersonaByIdOrIntent(ref, profile?)` + +Resolves a persona reference (ID or intent string) to a complete `PersonaResolution` containing the intent, preset, pattern, and optional persona profile. + +```ts +import { resolvePersonaByIdOrIntent } from '@agent-relay/sdk/workflows'; + +// Resolve by intent +const result = resolvePersonaByIdOrIntent('security-review'); +// → { resolved: true, intent: 'security-review', preset: 'analyst', pattern: 'dag', persona: {...} } + +// Resolve by persona ID +const result = resolvePersonaByIdOrIntent('reviewer-v2'); +// → { resolved: true, intent: 'review', preset: 'analyst', pattern: 'dag', persona: {...} } + +// Unknown ref — falls back to derivation +const result = resolvePersonaByIdOrIntent('custom-task'); +// → { resolved: false, intent: 'custom-task', preset: 'worker', pattern: 'dag' } +``` +```` + +**Parameters:** + +- `ref` (`string`) — Persona ID or intent string +- `profile` (`PersonaProfile`, optional) — Disambiguation hint when multiple personas share an intent + +**Returns:** `PersonaResolution` + +### `derivePreset(intent)` + +Pure function that maps an intent string to an `AgentPreset` (`'worker'` or `'analyst'`). + +```ts +import { derivePreset } from '@agent-relay/sdk/workflows'; + +derivePreset('review'); // → 'analyst' +derivePreset('security-review'); // → 'analyst' +derivePreset('code-gen'); // → 'worker' +derivePreset('documentation'); // → 'worker' +``` + +### `derivePattern(intent)` + +Pure function that maps an intent string to a `SwarmPattern` (`'dag'` or `'pipeline'`). + +```ts +import { derivePattern } from '@agent-relay/sdk/workflows'; + +derivePattern('review'); // → 'dag' +derivePattern('requirements-analysis'); // → 'pipeline' +derivePattern('documentation'); // → 'pipeline' +derivePattern('code-gen'); // → 'dag' +``` + +### `personaRegistry` + +Module-level singleton for managing persona profiles. Initialized with 10 default profiles on import. + +```ts +import { personaRegistry } from '@agent-relay/sdk/workflows'; + +// Lookup by ID +const persona = personaRegistry.getById('reviewer-v1'); + +// Lookup by intent +const ids = personaRegistry.getByIntent('review'); +// → ['reviewer-v1', 'reviewer-v2'] + +// Register a custom persona +personaRegistry.register({ + id: 'my-persona', + name: 'My Persona', + intent: 'custom-task', + preset: 'worker', + pattern: 'dag', +}); +``` + +### Constants + +```ts +import { ANALYST_INTENTS, PIPELINE_INTENTS } from '@agent-relay/sdk/workflows'; + +// ANALYST_INTENTS: 'review' | 'architecture-plan' | 'requirements-analysis' +// | 'security-review' | 'verification' | 'test-strategy' + +// PIPELINE_INTENTS: 'requirements-analysis' | 'documentation' | 'tdd-enforcement' +``` + +### Types + +```ts +import type { + PersonaProfile, + PersonaSelection, + PersonaResolution, + PersonaRegistry, + WorkflowGeneratorInput, + ContextFileSpec, + VerificationSpec, + SkillMaterializationPlan, +} from '@agent-relay/sdk/workflows'; +``` + +## Workflow Generator API + +### `generateWorkflow(input, options?)` + +Generates a complete, runnable TypeScript workflow file from a `WorkflowGeneratorInput`. + +```ts +import { generateWorkflow } from '@agent-relay/sdk/workflows'; +import type { WorkflowGeneratorInput } from '@agent-relay/sdk/workflows'; + +const input: WorkflowGeneratorInput = { + taskDescription: 'Review auth middleware', + workflowName: 'review-auth', + persona: { + id: 'security-reviewer-v1', + name: 'Security Reviewer', + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + }, + selection: { + intent: 'security-review', + preset: 'analyst', + pattern: 'dag', + resolved: true, + resolutionType: 'intent', + }, + skillPlan: { installs: [] }, + contextFiles: [{ stepName: 'read-auth', command: 'cat src/auth.ts' }], + verifications: [{ stepName: 'verify-types', command: 'npx tsc --noEmit' }], + maxConcurrency: 4, + timeout: 3_600_000, +}; + +const { source, metadata } = generateWorkflow(input); + +// source: complete TypeScript workflow file as a string +// metadata: { name, pattern, preset, stepCount, agentCount, estimatedWaves, ... } +``` + +**Parameters:** + +- `input` (`WorkflowGeneratorInput`) — Resolved persona, task, context files, and verifications +- `options` (`WorkflowGeneratorOptions`, optional) — Code generation options + +**Returns:** `GeneratedWorkflow` — `{ source: string, metadata: WorkflowMetadata }` + +### `WorkflowGeneratorOptions` + +```ts +interface WorkflowGeneratorOptions { + indent?: 'spaces' | 'tabs'; // default: 'spaces' + indentSize?: number; // default: 2 + comments?: boolean; // default: true + header?: boolean; // default: true +} +``` + +### `WorkflowMetadata` + +```ts +interface WorkflowMetadata { + name: string; + pattern: SwarmPattern; + preset: AgentPreset; + agentCount: number; + stepCount: number; + phases: { + bootstrap: number; + skills: number; + context: number; + task: number; + verification: number; + final: number; + }; + hasSkills: boolean; + hasVerification: boolean; + estimatedWaves: number; +} +``` + +``` + +--- + +## File 6: `web/content/docs/reference-sdk.mdx` — MDX Mirror of SDK Reference + +Apply the same additions as `docs/reference-sdk.md` with MDX components. Follow the docs-sync rule for component conversion. + +--- + +## SDK Exports Summary + +The following public exports were introduced across Phases 1–3 and must be documented: + +### From `persona-utils.ts` (Phase 1) + +| Export | Kind | Description | +|---|---|---| +| `resolvePersonaByIdOrIntent` | function | Resolve a persona ref to intent/preset/pattern | +| `resolvePersonaSelection` | function | Convenience wrapper accepting `PersonaSelection` | +| `derivePreset` | function | Map intent → `AgentPreset` | +| `derivePattern` | function | Map intent → `SwarmPattern` | +| `isAnalystIntent` | function | Check if intent maps to analyst preset | +| `isPipelineIntent` | function | Check if intent maps to pipeline pattern | +| `personaRegistry` | object | Singleton persona registry | +| `initPersonaRegistry` | function | Initialize registry with profiles | +| `resetPersonaRegistry` | function | Clear registry (testing) | +| `getPersonaIdToIntentMap` | function | Get reverse lookup map | +| `DEFAULT_PERSONA_PROFILES` | constant | Array of 10 default profiles | +| `ANALYST_INTENTS` | constant | Tuple of analyst intent strings | +| `PIPELINE_INTENTS` | constant | Tuple of pipeline intent strings | +| `PersonaProfile` | type | Persona profile shape | +| `PersonaSelection` | type | Input for persona resolution | +| `PersonaResolution` | type | Result of persona resolution | +| `PersonaRegistry` | type | Registry interface | +| `WorkflowGeneratorInput` | type | Input for workflow generation | +| `ContextFileSpec` | type | Context file step spec | +| `VerificationSpec` | type | Verification step spec | +| `SkillMaterializationPlan` | type | Skill install plan | +| `AnalystIntent` | type | Union of analyst intent strings | +| `PipelineIntent` | type | Union of pipeline intent strings | + +### From `workflow-generator.ts` (Phase 2) + +| Export | Kind | Description | +|---|---|---| +| `generateWorkflow` | function | Generate workflow source from input | +| `emitBootstrapPhase` | function | Generate bootstrap phase lines | +| `emitSkillPhase` | function | Generate skill install phase lines | +| `emitContextPhase` | function | Generate context-gathering phase lines | +| `emitTaskPhase` | function | Generate task execution phase lines | +| `emitVerificationPhase` | function | Generate verification phase lines | +| `emitFinalPhase` | function | Generate final phase lines | +| `GeneratedWorkflow` | type | Output of `generateWorkflow` | +| `WorkflowMetadata` | type | Metadata about generated workflow | +| `WorkflowGeneratorOptions` | type | Code generation options | + +### From `context-heuristics.ts` (Phase 3) + +| Export | Kind | Description | +|---|---|---| +| `inferContextFiles` | function | Infer context files from intent + filesystem | +| `ContextHeuristic` | type | Heuristic definition shape | +| `CandidateSpec` | type | Candidate file spec | + +### From `cli.ts` (Phase 3) + +| Export | Kind | Description | +|---|---|---| +| `parseAgentFlags` | function | Parse `--agent` mode CLI flags | +| `AgentModeFlags` | type | Parsed agent mode flag values | + +--- + +## Implementation Notes + +1. **Docs-sync rule applies.** Every `.mdx` file change must be mirrored in the corresponding `.md` file, and vice versa. MDX components (``, ``, ``) are converted to plain markdown equivalents. + +2. **README changes are minimal.** The README gets a concise "Agent Mode" section with the three canonical examples and a flag reference table. Detailed usage goes in the dedicated `docs/agent-flag.md` guide. + +3. **CLI help text is the source of truth.** The `printUsage()` function in `cli.ts` is the canonical flag reference. README and docs should match it exactly. + +4. **SDK reference uses runnable examples.** All code examples in the SDK reference section must be valid TypeScript that a consumer can copy-paste into their project. + +5. **No new dependencies.** This phase only modifies documentation files and the `printUsage()` function. No npm packages, no new source modules. + +6. **Phase 3 help text may need updating.** Phase 3 defined the initial `printUsage()` content. This phase ensures it is complete and includes all examples. If Phase 3 already implemented the full help text, this phase validates it matches the final API. + +--- + +## Acceptance Criteria + +- [ ] `README.md` includes an "Agent Mode" section with basic, context, and dry-run examples +- [ ] `README.md` flag reference table matches all flags from `parseAgentFlags()` in Phase 3 +- [ ] `README.md` persona reference table lists all 13 production intents with correct preset/pattern +- [ ] `printUsage()` in `cli.ts` displays complete help text covering both YAML and agent modes +- [ ] `printUsage()` examples are valid, runnable commands +- [ ] `docs/agent-flag.md` exists with comprehensive usage guide (8+ examples) +- [ ] `web/content/docs/agent-flag.mdx` exists as the MDX mirror with correct frontmatter +- [ ] `docs/reference-sdk.md` includes persona-utils and workflow-generator API documentation +- [ ] `web/content/docs/reference-sdk.mdx` mirrors the SDK reference additions +- [ ] All code examples in docs are valid TypeScript with correct import paths (`@agent-relay/sdk/workflows`) +- [ ] SDK export tables list all public functions, constants, and types from Phases 1–3 +- [ ] Docs-sync rule is satisfied: every `.mdx` change has a corresponding `.md` mirror +- [ ] No broken markdown formatting (tables render correctly, code blocks have language tags) +- [ ] `--help` output matches documented flag descriptions +``` diff --git a/build-plans/05-documentation.ts b/build-plans/05-documentation.ts new file mode 100644 index 000000000..94293fc4e --- /dev/null +++ b/build-plans/05-documentation.ts @@ -0,0 +1,470 @@ +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const { workflow } = require('@agent-relay/sdk/workflows'); + +const REPO_ROOT = '/Users/khaliqgant/Projects/AgentWorkforce/relay-workflows'; +const SPEC_PATH = 'workflows/meta-agent-flag/05-documentation.spec.md'; +const README_PATH = 'README.md'; +const SDK_WORKFLOWS_DIR = 'packages/sdk/src/workflows'; +const CLI_PATH = `${SDK_WORKFLOWS_DIR}/cli.ts`; +const PERSONA_UTILS_PATH = `${SDK_WORKFLOWS_DIR}/persona-utils.ts`; +const WORKFLOW_GENERATOR_PATH = `${SDK_WORKFLOWS_DIR}/workflow-generator.ts`; +const CONTEXT_HEURISTICS_PATH = `${SDK_WORKFLOWS_DIR}/context-heuristics.ts`; +const WORKFLOWS_INDEX_PATH = `${SDK_WORKFLOWS_DIR}/index.ts`; +const AGENT_FLAG_DOC_PATH = 'docs/agent-flag.md'; +const AGENT_FLAG_MDX_PATH = 'web/content/docs/agent-flag.mdx'; +const SDK_REFERENCE_DOC_PATH = 'docs/reference-sdk.md'; +const SDK_REFERENCE_MDX_PATH = 'web/content/docs/reference-sdk.mdx'; +const REFERENCE_WORKFLOWS_MDX_PATH = 'web/content/docs/reference-workflows.mdx'; + +async function main() { + const wf = workflow('phase-5-documentation') + .description( + 'Document the --agent flag feature in README, CLI help, SDK reference, and docs site mirrors' + ) + .pattern('dag') + .channel('wf-phase-5-documentation') + .maxConcurrency(4) + .timeout(3_600_000) + .agent('readme-doc-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused documentation writer for README CLI feature docs', + retries: 2, + }) + .agent('cli-help-updater', { + cli: 'codex', + preset: 'worker', + role: 'Focused TypeScript CLI help text updater', + retries: 2, + }) + .agent('agent-guide-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused docs writer for markdown and MDX agent-mode guides', + retries: 2, + }) + .agent('sdk-reference-writer', { + cli: 'codex', + preset: 'worker', + role: 'Focused API reference writer for SDK markdown and MDX mirrors', + retries: 2, + }) + .agent('docs-reviewer', { + cli: 'codex', + preset: 'reviewer', + role: 'Reviews Phase 5 documentation for spec conformance, docs sync, and runnable examples', + retries: 1, + }); + + wf.step('guard-not-main', { + type: 'deterministic', + command: [ + 'branch="$(git branch --show-current)"', + 'if [ "$branch" = "main" ]; then echo "Refusing to run Phase 5 workflow on main"; exit 1; fi', + 'echo "Running on branch: ${branch:-detached}"', + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-spec', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `cat ${SPEC_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-readme', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `sed -n '1,260p' ${README_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-cli', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: `sed -n '1,520p' ${CLI_PATH}`, + captureOutput: true, + failOnError: true, + }); + + wf.step('read-phase-api-surface', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `if test -f ${PERSONA_UTILS_PATH}; then sed -n '1,380p' ${PERSONA_UTILS_PATH}; else echo "Missing ${PERSONA_UTILS_PATH}; Phase 1 may not have run yet."; fi`, + `if test -f ${WORKFLOW_GENERATOR_PATH}; then sed -n '1,420p' ${WORKFLOW_GENERATOR_PATH}; else echo "Missing ${WORKFLOW_GENERATOR_PATH}; Phase 2 may not have run yet."; fi`, + `if test -f ${CONTEXT_HEURISTICS_PATH}; then sed -n '1,300p' ${CONTEXT_HEURISTICS_PATH}; else echo "Missing ${CONTEXT_HEURISTICS_PATH}; Phase 3 may not have run yet."; fi`, + `if test -f ${WORKFLOWS_INDEX_PATH}; then sed -n '1,240p' ${WORKFLOWS_INDEX_PATH}; else echo "Missing ${WORKFLOWS_INDEX_PATH}."; fi`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-existing-docs', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `if test -f ${AGENT_FLAG_DOC_PATH}; then sed -n '1,360p' ${AGENT_FLAG_DOC_PATH}; else echo "No existing ${AGENT_FLAG_DOC_PATH}"; fi`, + `if test -f ${AGENT_FLAG_MDX_PATH}; then sed -n '1,360p' ${AGENT_FLAG_MDX_PATH}; else echo "No existing ${AGENT_FLAG_MDX_PATH}"; fi`, + `if test -f ${SDK_REFERENCE_DOC_PATH}; then sed -n '1,420p' ${SDK_REFERENCE_DOC_PATH}; else echo "No existing ${SDK_REFERENCE_DOC_PATH}"; fi`, + `if test -f ${SDK_REFERENCE_MDX_PATH}; then sed -n '1,420p' ${SDK_REFERENCE_MDX_PATH}; else echo "No existing ${SDK_REFERENCE_MDX_PATH}"; fi`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('read-doc-style', { + type: 'deterministic', + dependsOn: ['guard-not-main'], + command: [ + `sed -n '1,180p' docs/introduction.md`, + `sed -n '1,220p' web/content/docs/cli-workflows.mdx`, + `if test -f ${REFERENCE_WORKFLOWS_MDX_PATH}; then sed -n '1,260p' ${REFERENCE_WORKFLOWS_MDX_PATH}; else echo "No ${REFERENCE_WORKFLOWS_MDX_PATH} style reference."; fi`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('update-cli-help-text', { + agent: 'cli-help-updater', + dependsOn: ['read-spec', 'read-cli', 'read-phase-api-surface'], + task: ` +You are implementing Phase 5 CLI help documentation. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Own only ${CLI_PATH}; do not revert or rewrite files outside that scope. + +Spec: +{{steps.read-spec.output}} + +Current CLI: +{{steps.read-cli.output}} + +Current Phase 1-3 API surface: +{{steps.read-phase-api-surface.output}} + +Requirements: +1. Update printUsage() in ${CLI_PATH} to match the "Updated printUsage() Content" section of the spec. +2. Preserve existing YAML mode, resume mode, validation mode, agent mode parsing, exports, imports, and runtime behavior. +3. Keep flag names, short aliases, defaults, and examples aligned with parseAgentFlags(). +4. Include YAML Mode Options, Agent Mode Options, General, and all examples from the spec. +5. Do not add dependencies and do not edit any file except ${CLI_PATH}. + +Only edit ${CLI_PATH}. End your output with CLI_HELP_DOCS_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'CLI_HELP_DOCS_DONE' }, + retries: 2, + }); + + wf.step('verify-cli-help-text', { + type: 'deterministic', + dependsOn: ['update-cli-help-text'], + command: [ + `grep -q -- ".*--agent " ${CLI_PATH}`, + `grep -q -- "Run a relay.yaml workflow file, or generate and run a workflow from a persona." ${CLI_PATH}`, + `grep -q -- "YAML Mode Options:" ${CLI_PATH}`, + `grep -q -- "Agent Mode Options:" ${CLI_PATH}`, + `grep -q -- "--agent, -a " ${CLI_PATH}`, + `grep -q -- "--profile, -p " ${CLI_PATH}`, + `grep -q -- "--tier, -t " ${CLI_PATH}`, + `grep -q -- "--dry-run, -d" ${CLI_PATH}`, + `grep -q -- "--context, -c " ${CLI_PATH}`, + `grep -q -- "--verify, -v " ${CLI_PATH}`, + `grep -q -- "--output, -o " ${CLI_PATH}`, + `grep -q -- "--concurrency " ${CLI_PATH}`, + `grep -q -- "--timeout " ${CLI_PATH}`, + `grep -q -- "Review auth for vulnerabilities" ${CLI_PATH}`, + `grep -q -- "Refactor auth module" ${CLI_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('update-readme-agent-mode', { + agent: 'readme-doc-writer', + dependsOn: ['read-spec', 'read-readme', 'read-cli'], + task: ` +You are implementing the Phase 5 README documentation. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Own only ${README_PATH}; do not revert or rewrite files outside that scope. + +Spec: +{{steps.read-spec.output}} + +Current README: +{{steps.read-readme.output}} + +Current CLI help context: +{{steps.read-cli.output}} + +Requirements: +1. Add a new "## Agent Mode" section after the existing Quick Start or Usage section. +2. Include the Basic Usage, With Explicit Context Files, and Dry Run examples from the spec. +3. Include the All Agent Mode Flags table with every flag, alias, type, default, and description from the spec. +4. Include the Available Personas table with all 13 production intents and the correct preset/pattern values. +5. Keep this README section concise; leave the comprehensive guide details for ${AGENT_FLAG_DOC_PATH}. +6. Preserve all unrelated README content and formatting. +7. Do not edit any file except ${README_PATH}. + +Only edit ${README_PATH}. End your output with README_AGENT_MODE_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'README_AGENT_MODE_DONE' }, + retries: 2, + }); + + wf.step('verify-readme-agent-mode', { + type: 'deterministic', + dependsOn: ['update-readme-agent-mode'], + command: [ + `grep -q -- "^## Agent Mode" ${README_PATH}`, + `grep -q -- "persona-driven workflow generation" ${README_PATH}`, + `grep -q -- "agent-relay run \\"Review the auth module for security vulnerabilities\\" --agent security-review" ${README_PATH}`, + `grep -q -- "agent-relay run \\"Refactor the payment service\\" --agent code-gen" ${README_PATH}`, + `grep -q -- "agent-relay run \\"Write API documentation\\" --agent documentation --dry-run" ${README_PATH}`, + `grep -q -- "--agent " ${README_PATH}`, + `grep -q -- "--profile " ${README_PATH}`, + `grep -q -- "--tier " ${README_PATH}`, + `grep -q -- "--dry-run" ${README_PATH}`, + `grep -q -- "--context " ${README_PATH}`, + `grep -q -- "--verify " ${README_PATH}`, + `grep -q -- "--output " ${README_PATH}`, + `grep -q -- "--concurrency " ${README_PATH}`, + `grep -q -- "--timeout " ${README_PATH}`, + `grep -q -- "security-review" ${README_PATH}`, + `grep -q -- "requirements-analysis" ${README_PATH}`, + `grep -q -- "implement-frontend" ${README_PATH}`, + `grep -q -- "npm-provenance" ${README_PATH}`, + ].join(' && '), + captureOutput: true, + failOnError: true, + }); + + wf.step('write-agent-flag-guides', { + agent: 'agent-guide-writer', + dependsOn: ['read-spec', 'read-existing-docs', 'read-doc-style', 'read-phase-api-surface'], + task: ` +You are implementing the dedicated Phase 5 agent-mode documentation guide. Do not use Relaycast MCP tools or spawn sub-agents. + +You are not alone in the codebase. Own only ${AGENT_FLAG_DOC_PATH} and ${AGENT_FLAG_MDX_PATH}; do not revert or rewrite files outside that scope. + +Spec: +{{steps.read-spec.output}} + +Existing docs, if present: +{{steps.read-existing-docs.output}} + +Docs style references: +{{steps.read-doc-style.output}} + +Current Phase 1-3 API surface: +{{steps.read-phase-api-surface.output}} + +Requirements: +1. Create or update ${AGENT_FLAG_DOC_PATH} with the complete "Agent Mode (--agent Flag)" guide from the spec. +2. Include How It Works, all 8 usage examples, Persona Reference, Custom Personas, Context Heuristics, and Troubleshooting. +3. Include runnable bash examples and valid TypeScript examples using @agent-relay/sdk/workflows. +4. Create or update ${AGENT_FLAG_MDX_PATH} as the MDX mirror with the required frontmatter. +5. Follow the docs-sync rule: the MDX content must mirror the markdown content, converting plain note/warning blocks to and where appropriate. +6. Use only when it helps group adjacent examples; keep MDX valid. +7. Do not add dependencies and do not edit any files except ${AGENT_FLAG_DOC_PATH} and ${AGENT_FLAG_MDX_PATH}. + +Only edit ${AGENT_FLAG_DOC_PATH} and ${AGENT_FLAG_MDX_PATH}. End your output with AGENT_FLAG_GUIDES_DONE. +`.trim(), + verification: { type: 'output_contains', value: 'AGENT_FLAG_GUIDES_DONE' }, + retries: 2, + }); + + wf.step('verify-agent-flag-guides', { + type: 'deterministic', + dependsOn: ['write-agent-flag-guides'], + command: [ + `test -f ${AGENT_FLAG_DOC_PATH}`, + `test -f ${AGENT_FLAG_MDX_PATH}`, + `grep -q -- "^# Agent Mode" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "Agent Mode (--agent Flag)" ${AGENT_FLAG_MDX_PATH}`, + `grep -q -- "Generate and execute workflows from persona-driven task descriptions using the --agent CLI flag." ${AGENT_FLAG_MDX_PATH}`, + `grep -q -- "## How It Works" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "### 1. Basic" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "### 8. Custom Concurrency and Timeout" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "## Persona Reference" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "## Context Heuristics" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "## Troubleshooting" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "resolvePersonaByIdOrIntent" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "generateWorkflow" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "reviewer-v2" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- "npm-provenance" ${AGENT_FLAG_DOC_PATH}`, + `grep -q -- " { + console.error(err); + process.exit(1); +}); From a83badb5c55ccb8bf473cd10f26b5046482c38f1 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 10 Apr 2026 21:18:59 +0200 Subject: [PATCH 2/2] fix: add proper exit code and DRY_RUN handling to coordinator Address Devin review comment: missing workflow failure exit code and DRY_RUN env var handling that was present in all other workflow files. --- build-plans/00-meta-workflow-coordinator.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/build-plans/00-meta-workflow-coordinator.ts b/build-plans/00-meta-workflow-coordinator.ts index 75e80eb84..9d06be120 100644 --- a/build-plans/00-meta-workflow-coordinator.ts +++ b/build-plans/00-meta-workflow-coordinator.ts @@ -239,7 +239,16 @@ This should be a complete agent-relay workflow file that: .onError('fail-fast') .run({ cwd: '/Users/khaliqgant/Projects/AgentWorkforce/relay-workflows' }); - console.log('Result:', result.status); + if ('status' in result) { + console.log('Result:', result.status); + } else { + console.log('Dry run completed.'); + return; + } + + if (result.status !== 'completed') { + process.exitCode = 1; + } } main().catch((err) => {