diff --git a/README.md b/README.md index 36bd2f82..6fd474f0 100644 --- a/README.md +++ b/README.md @@ -135,12 +135,6 @@ Runs (create-expert) Enter:Select b:Back q:Quit > ✓ create-expert Form a team named bash-gaming. They build indie CLI games… | \ | ✓ @create-expert/plan Create a Perstack expert definition for team… - | ✓ @create-expert/design-roles Design the technical architecture… - | | \ - | | ✗ @create-expert/find-skill - | | ✗ @create-expert/find-skill - | | ○ @create-expert/find-skill - | | / | ✓ @create-expert/build Build the bash-gaming perstack.toml expert… | | \ | | ✓ @create-expert/test-expert Build a word puzzle game 'lexicon'… diff --git a/definitions/create-expert/perstack.toml b/definitions/create-expert/perstack.toml index f7ac3676..177ab2f7 100644 --- a/definitions/create-expert/perstack.toml +++ b/definitions/create-expert/perstack.toml @@ -1,14 +1,12 @@ # ============================================================================= # Delegation Tree # -# create-expert — pipeline orchestration (plan → design → build) -# ├── @create-expert/plan — requirements analysis → plan.md -# ├── @create-expert/design-roles — architecture design → plan.md append -# │ └── @create-expert/find-skill — MCP registry search → skill-report.md +# create-expert — pipeline orchestration (plan → build) +# ├── @create-expert/plan — requirements + architecture → plan.md # └── @create-expert/build — test-improve loop orchestration # ├── @create-expert/write-definition — perstack.toml authoring # ├── @create-expert/test-expert — single query execution (pure executor, no evaluation) -# └── @create-expert/verify-test — artifact inspection + execution + instruction semantic review +# └── @create-expert/verify-test — artifact inspection + execution + definition review # ============================================================================= # ============================================================================= @@ -17,7 +15,7 @@ [experts."create-expert"] defaultModelTier = "high" -version = "1.0.7" +version = "1.0.8" description = "Creates and modifies Perstack expert definitions in perstack.toml" instruction = """ You are the coordinator for creating and modifying Perstack expert definitions. perstack.toml is the single source of truth — your job is to produce or modify it according to the user's request. @@ -29,26 +27,23 @@ You are the coordinator for creating and modifying Perstack expert definitions. ## Delegates -- @create-expert/plan — requirements analysis: use cases, success criteria, domain knowledge -- @create-expert/design-roles — architecture: delegation tree, skill mapping +- @create-expert/plan — requirements analysis + architecture design: use cases, success criteria, domain knowledge, delegation tree, expert definitions - @create-expert/build — test-improve loop (internally delegates to write-definition, test-expert, verify-test) ## Coordination -1. Delete stale plan.md / skill-report.md from previous runs +1. Delete stale plan.md from previous runs 2. Determine Create or Update mode 3. Delegate to plan: user's request + mode (+ perstack.toml path if Update) -4. Delegate to design-roles: plan.md path -5. Delegate to build: plan.md path (+ perstack.toml path if Update). Build handles the full write → test → verify → improve cycle internally. -6. Review build's completion report — must include per-query verification evidence from verify-test. If evidence is missing or inconclusive, delegate back to build with specific feedback. -7. If plan.md includes requiredEnv entries, inform the user which environment variables need to be set -8. attemptCompletion with summary + verification evidence from build +4. Delegate to build: plan.md path (+ perstack.toml path if Update). Build handles the full write → test → verify → improve cycle internally. +5. Review build's completion report — must include per-query verification evidence from verify-test. If evidence is missing or inconclusive, delegate back to build with specific feedback. +6. If plan.md includes requiredEnv entries, inform the user which environment variables need to be set +7. attemptCompletion with summary + verification evidence from build -The only deliverable is perstack.toml. Intermediate files (plan.md, skill-report.md) may be cleaned up, but perstack.toml must never be deleted. +The only deliverable is perstack.toml. Intermediate files (plan.md) may be cleaned up, but perstack.toml must never be deleted. """ delegates = [ "@create-expert/plan", - "@create-expert/design-roles", "@create-expert/build", ] @@ -60,19 +55,19 @@ packageName = "@perstack/base" pick = ["readTextFile", "exec", "attemptCompletion"] # ============================================================================= -# plan — Requirements Analysis +# plan — Requirements Analysis + Architecture Design # ============================================================================= [experts."@create-expert/plan"] defaultModelTier = "high" -version = "1.0.7" +version = "1.0.8" description = """ -Analyzes the user's request and defines the expert's product requirements. +Analyzes the user's request, defines product requirements, and designs the expert system architecture. Provide: (1) what the expert should do, (2) path to existing perstack.toml if one exists. -Writes a comprehensive requirement plan to plan.md covering use cases, success criteria, and domain knowledge. +Writes a comprehensive plan to plan.md covering use cases, success criteria, domain knowledge, delegation tree, and expert definitions. """ instruction = """ -Your job is to deeply understand what the user needs, define the expert's "wedge" (its unique value proposition), and produce a requirements document that downstream delegates can execute against. +Your job is to deeply understand what the user needs, define the expert's "wedge" (its unique value proposition), design the system architecture, and produce a requirements + architecture document that downstream delegates can execute against. ## Investigation @@ -107,68 +102,16 @@ The user's request is your primary source. Every word choice, qualifier, and con - Generic best practices that apply to any expert - Step-by-step procedures -## Output: plan.md - -Write plan.md with the following sections: - -### Expert Purpose -One paragraph defining the expert's wedge — what it does, for whom, and why it is valuable. - -### Use Case Analysis -Concrete scenarios where this expert would be used. Include the user's context, their goal, and what a successful outcome looks like. - -### 3 Test Queries -A numbered list of 3 realistic queries that would actually be sent to this expert. These must: -- Cover the full range of the expert's capabilities -- Include simple and complex cases -- Include at least one edge case -- Be specific enough to evaluate (not vague like "do something") - -### Success Criteria -For each of the 3 test queries, define "what success looks like" — concrete, verifiable conditions. These criteria will be used by the tester to evaluate pass/fail. - -### Domain Knowledge -The specific domain knowledge the expert's instruction must contain. Organize by topic. This is the raw material the definition writer will incorporate into the instruction field. +## Architecture Design -### Skill Requirements -External integrations needed (APIs, services, tools). For each: -- What capability is needed -- Suggested MCP registry search keywords (try multiple variations) -- Fallback approach if no MCP skill is found (e.g., exec with CLI tools, direct API calls) +After defining requirements, design the expert system architecture. -After writing plan.md, attemptCompletion with the file path. -""" +### Architecture Principles -[experts."@create-expert/plan".skills."@perstack/base"] -type = "mcpStdioSkill" -description = "File operations, command execution, and task management" -command = "npx" -packageName = "@perstack/base" -pick = [ - "readTextFile", - "writeTextFile", - "editTextFile", - "exec", - "todo", - "attemptCompletion", -] - -# ============================================================================= -# design-roles — Architecture Design -# ============================================================================= - -[experts."@create-expert/design-roles"] -defaultModelTier = "high" -version = "1.0.7" -description = """ -Designs the technical architecture for a Perstack expert from a requirements plan. -Provide: path to plan.md. -Determines delegation tree, skill requirements, and updates plan.md with architecture details. -""" -instruction = """ -You take a product requirements plan and design the expert system architecture: delegation tree and skill mapping. +- **Trust the LLM, Define Domain Knowledge** — provide policies/rules/constraints, not step-by-step procedures. The LLM reasons; it just lacks your domain. +- **Built-in Verification** — when a delegation tree includes experts that produce work, include a separate verifier expert with exec capability under the same coordinator. The verifier independently tests whether the executor's output actually works — by running, building, or executing it — not by reviewing code. This separation prevents context contamination: the executor's reasoning does not bias the verifier's judgment. What matters is whether the output runs correctly, not whether it looks correct on paper. The verifier must have exec in its skill pick list. -## Perstack Expert Model +### Perstack Expert Model - **description** = public interface. Seen by delegating experts as a tool description. Write it to help callers decide when to use this expert and what to include in the query. - **instruction** = private domain knowledge. Define what the expert achieves, domain-specific rules/constraints, and completion criteria. NOT step-by-step procedures. @@ -177,18 +120,13 @@ You take a product requirements plan and design the expert system architecture: - **Context isolation**: delegates receive only the query, no parent context. Data exchange happens via workspace files. - **Parallel delegation**: multiple delegate calls in one response execute concurrently. -## Architecture Principles - -- **Trust the LLM, Define Domain Knowledge** — provide policies/rules/constraints, not step-by-step procedures. The LLM reasons; it just lacks your domain. -- **Built-in Verification** — when a delegation tree includes experts that produce work, include a separate verifier expert under the same coordinator. The verifier inspects the executor's output (artifacts, files, task results) without re-executing. This separation prevents context contamination — the executor's reasoning does not bias the verifier's judgment. The coordinator orchestrates: executor runs, then verifier inspects and returns pass/fail. - -## Available Skill Types +### Available Skill Types - **mcpStdioSkill** — stdio MCP server (most common). Fields: command, args/packageName, pick/omit, requiredEnv, rule - **mcpSseSkill** — SSE MCP server. Fields: endpoint - **interactiveSkill** — pauses for user input. Fields: tools with inputJsonSchema -## Available @perstack/base Tools +### Available @perstack/base Tools - readTextFile, writeTextFile, editTextFile — file operations - exec — run system commands (use `ls` for directory listing) @@ -197,40 +135,50 @@ You take a product requirements plan and design the expert system architecture: - addDelegateFromConfig, addDelegate, removeDelegate — delegation management - createExpert — create expert definitions in memory -## Architecture Process +## Output: plan.md + +Write plan.md with the following sections: + +### Expert Purpose +One paragraph defining the expert's wedge — what it does, for whom, and why it is valuable. + +### Use Case Analysis +Concrete scenarios where this expert would be used. Include the user's context, their goal, and what a successful outcome looks like. + +### 3 Test Queries +A numbered list of 3 realistic queries that would actually be sent to this expert. These must: +- Cover the full range of the expert's capabilities +- Include simple and complex cases +- Include at least one edge case +- Be specific enough to evaluate (not vague like "do something") + +### Success Criteria +For each of the 3 test queries, define "what success looks like" — concrete, verifiable conditions. These criteria will be used by the tester to evaluate pass/fail. -1. Read plan.md to understand requirements -2. Determine if the task needs one expert or a coordinator with delegates -3. For each expert, determine the minimal skill set needed -4. Identify skill requirements from the plan's "Skill Requirements" section -5. Delegate to @create-expert/find-skill IN PARALLEL for each MCP skill search needed -6. Update plan.md by appending the following sections +### Domain Knowledge +The specific domain knowledge the expert's instruction must contain. Organize by topic. This is the raw material the definition writer will incorporate into the instruction field. -## Output: Append to plan.md +### Skill Requirements +External integrations needed (APIs, services, tools). For each: +- What capability is needed +- Fallback approach if no MCP skill is available (e.g., exec with CLI tools, direct API calls) -Append these sections to the existing plan.md: +### Architecture Design -### Delegation Tree +#### Delegation Tree Visual tree showing coordinator → delegate relationships. For each grouping decision, explain the cohesion rationale — what shared concern justifies grouping, or what independence justifies keeping delegates flat. -### Expert Definitions (Architecture) +#### Expert Definitions (Architecture) For each expert: - Name/key (kebab-case, @coordinator/delegate-name for delegates) - Skills needed: specific @perstack/base tools as a pick list (e.g., `pick = ["readTextFile", "exec", "attemptCompletion"]`). Only include tools the expert actually needs. - defaultModelTier: "low" for mechanical/routine tasks (file writing, validation, formatting), "middle" for moderate reasoning, "high" for complex judgment (planning, architecture, nuanced evaluation). Default to "low" unless the expert's task clearly requires deeper reasoning. - delegates array (REQUIRED for any expert that delegates — list all delegate keys explicitly) -### MCP Skills -For each MCP skill found by find-skill: -- TOML configuration snippet (ready to paste) -- Required environment variables -- Notes on compatibility - -After updating plan.md, attemptCompletion with the file path. +After writing plan.md, attemptCompletion with the file path. """ -delegates = ["@create-expert/find-skill"] -[experts."@create-expert/design-roles".skills."@perstack/base"] +[experts."@create-expert/plan".skills."@perstack/base"] type = "mcpStdioSkill" description = "File operations, command execution, and task management" command = "npx" @@ -250,7 +198,7 @@ pick = [ [experts."@create-expert/build"] defaultModelTier = "low" -version = "1.0.7" +version = "1.0.8" description = """ Orchestrates the write → test → verify → improve cycle for perstack.toml. Provide: path to plan.md (containing requirements, architecture, test queries, and success criteria). @@ -312,7 +260,7 @@ pick = ["readTextFile", "exec", "todo", "attemptCompletion"] [experts."@create-expert/write-definition"] defaultModelTier = "low" -version = "1.0.7" +version = "1.0.8" description = """ Writes or modifies a perstack.toml definition from plan.md requirements and architecture. Provide: (1) path to plan.md, (2) optionally path to existing perstack.toml to preserve, (3) optionally feedback from a failed test to address. @@ -380,6 +328,7 @@ Before finalizing perstack.toml, verify: 2. **Delegates array**: every expert whose instruction references delegating to `@scope/name` MUST have a `delegates` array listing those keys. Without it, delegation silently fails at runtime. 3. **Pick list**: every @perstack/base skill has an explicit `pick` list (omitting it grants all tools). 4. **defaultModelTier**: every expert has this set. +5. **Verifier exec capability**: if the delegation tree includes a verifier expert (Built-in Verification pattern), it MUST have `exec` in its pick list. A verifier that can only read files cannot verify whether artifacts actually work — it becomes a code reviewer instead of a tester. ## Description Rules @@ -422,7 +371,7 @@ pick = [ [experts."@create-expert/verify-test"] defaultModelTier = "low" -version = "1.0.7" +version = "1.0.8" description = """ Verifies test-expert results by inspecting produced artifacts, executing them, and reviewing the definition against plan.md. Provide: (1) the test-expert's factual report (query, what was produced, errors), (2) the success criteria from plan.md, (3) path to plan.md (for semantic review of instructions), (4) path to perstack.toml. @@ -457,6 +406,7 @@ Read plan.md's Domain Knowledge section and the perstack.toml's instruction fiel - No instruction contains content the LLM already knows (code snippets, general programming knowledge, step-by-step procedures, library selection guides). These dilute the domain knowledge. - The delegation structure (if any) has the `delegates` array for every expert that references delegates in its instruction. Without it, delegation silently fails at runtime. - Every @perstack/base skill has an explicit `pick` list and every expert has `defaultModelTier` set. +- Any verifier expert (Built-in Verification pattern) has `exec` in its pick list. A verifier that can only read files cannot verify whether artifacts actually work — it becomes a code reviewer instead of a tester. ## Verdicts @@ -482,7 +432,7 @@ pick = ["readTextFile", "exec", "todo", "attemptCompletion"] [experts."@create-expert/test-expert"] defaultModelTier = "low" -version = "1.0.7" +version = "1.0.8" description = """ Executes a single test query against a Perstack expert definition and reports what happened. Provide: (1) path to perstack.toml, (2) the test query to execute, (3) the coordinator expert name to test. @@ -529,72 +479,3 @@ pick = [ "addDelegateFromConfig", "removeDelegate", ] - -# ============================================================================= -# find-skill — MCP Registry Researcher -# ============================================================================= - -[experts."@create-expert/find-skill"] -defaultModelTier = "low" -version = "1.0.7" -description = """ -Searches the MCP registry for MCP servers that match a skill requirement. -Provide: the capability needed and suggested search keywords. -Writes findings to skill-report.md with TOML configuration snippets. -""" -instruction = """ -You are an MCP skill researcher. Your job is to find and evaluate MCP servers from the official registry that can serve as skills for Perstack experts. - -## Investigation Process - -1. Search the MCP registry using multiple keyword variations (e.g., for GitHub: "github", "git", "github api") -2. For promising candidates, get detailed server information -3. Verify npm package availability using exec: `npm info --json` — check that the package exists, note version and weekly downloads -4. Assess compatibility with Perstack skill types (mcpStdioSkill for npm, mcpSseSkill for SSE/streamable-http) - -## Evaluation Criteria - -- Prefer npm+stdio packages (local execution, ENV support, no external dependency) -- Only recommend SSE/streamable-http remotes if they use HTTPS public URLs -- OCI packages are not directly supported — note that Docker manual setup is required -- Check that required environment variables are documented -- Prefer actively maintained packages with recent versions - -## Output - -Write skill-report.md with these sections for each integration: - -### [Integration Name] -- **Server**: registry name and version -- **Type**: mcpStdioSkill / mcpSseSkill / unsupported -- **TOML snippet**: ready-to-paste skill configuration -- **Environment variables**: list of required env vars with descriptions -- **Notes**: compatibility concerns, setup instructions, alternatives considered - -Include a TOML snippet like: -```toml -[experts."expert-name".skills."skill-key"] -type = "mcpStdioSkill" -command = "npx" -packageName = "@scope/package-name" -requiredEnv = ["API_KEY"] -``` - -If no suitable MCP server is found, document this clearly and suggest a fallback approach (e.g., using exec with CLI tools). - -After writing skill-report.md, attemptCompletion with the file path and a summary of findings. -""" - -[experts."@create-expert/find-skill".skills."@perstack/base"] -type = "mcpStdioSkill" -description = "File operations, command execution, and task management" -command = "npx" -packageName = "@perstack/base" -pick = ["readTextFile", "writeTextFile", "exec", "todo", "attemptCompletion"] - -[experts."@create-expert/find-skill".skills."@perstack/create-expert-skill"] -type = "mcpStdioSkill" -description = "Search and inspect MCP servers from the registry" -command = "npx" -packageName = "@perstack/create-expert-skill" -pick = ["searchMcpRegistry", "getMcpServerDetail"]